diff --git a/src/Makefile.am b/src/Makefile.am index 4ab052fabb82f0a7466a564e4c06ffed9c6b6aba..f1376a6258180bd0a72306a60af0beece6e097d8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -32,9 +32,9 @@ AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # Build the libswiftsim library lib_LTLIBRARIES = libswiftsim.la libswiftsim_la_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ - io.c timers.c debug.c + io.c timers.c debug.c scheduler.c # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ - engine.h swift.h io.h timers.h debug.h + engine.h swift.h io.h timers.h debug.h scheduler.h diff --git a/src/engine.c b/src/engine.c index cc93613fe32bb7edeef010446a5882410b7c9d9f..1e4afdb89bfa16b967b6d9fddf35ce8803f1cf0d 100644 --- a/src/engine.c +++ b/src/engine.c @@ -44,6 +44,7 @@ #include "cell.h" #include "space.h" #include "queue.h" +#include "scheduler.h" #include "engine.h" #include "runner.h" #include "runner_iact.h" @@ -54,67 +55,304 @@ /** - * @brief Prepare the #engine by re-building the cells and tasks. + * @brief Fill the #space's task list. * - * @param e The #engine to prepare. + * @param s The #space we are working in. */ -void engine_prepare ( struct engine *e ) { +void engine_maketasks ( struct engine *e ) { - int j, k, qid, rebuild; struct space *s = e->s; - struct queue *q; + struct scheduler *sched = &e->sched; + int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid; + int *cdim = s->cdim; + struct task *t, *t2; + struct cell *ci, *cj; + + /* Re-set the scheduler. */ + scheduler_reset( sched , s->tot_cells * space_maxtaskspercell ); + + /* Run through the highest level of cells and add pairs. */ + for ( i = 0 ; i < cdim[0] ; i++ ) + for ( j = 0 ; j < cdim[1] ; j++ ) + for ( k = 0 ; k < cdim[2] ; k++ ) { + cid = cell_getid( cdim , i , j , k ); + if ( s->cells[cid].count == 0 ) + continue; + ci = &s->cells[cid]; + if ( ci->count == 0 ) + continue; + scheduler_addtask( sched , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , 0 ); + for ( ii = -1 ; ii < 2 ; ii++ ) { + iii = i + ii; + if ( !s->periodic && ( iii < 0 || iii >= cdim[0] ) ) + continue; + iii = ( iii + cdim[0] ) % cdim[0]; + for ( jj = -1 ; jj < 2 ; jj++ ) { + jjj = j + jj; + if ( !s->periodic && ( jjj < 0 || jjj >= cdim[1] ) ) + continue; + jjj = ( jjj + cdim[1] ) % cdim[1]; + for ( kk = -1 ; kk < 2 ; kk++ ) { + kkk = k + kk; + if ( !s->periodic && ( kkk < 0 || kkk >= cdim[2] ) ) + continue; + kkk = ( kkk + cdim[2] ) % cdim[2]; + cjd = cell_getid( cdim , iii , jjj , kkk ); + cj = &s->cells[cjd]; + if ( cid >= cjd || cj->count == 0 ) + continue; + sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ]; + t = scheduler_addtask( sched , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 ); + } + } + } + } + + /* Split the tasks. */ + scheduler_splittasks( sched ); + + /* Count the number of tasks associated with each cell and + store the density tasks in each cell, and make each sort + depend on the sorts of its progeny. */ + // #pragma omp parallel for private(t,j) + for ( k = 0 ; k < sched->nr_tasks ; k++ ) { + t = &sched->tasks[k]; + if ( t->skip ) + continue; + if ( t->type == task_type_sort && t->ci->split ) + for ( j = 0 ; j < 8 ; j++ ) { + if ( t->ci->progeny[j] != NULL ) { + if ( t->ci->progeny[j]->sorts == NULL ) + t->ci->progeny[j]->sorts = scheduler_addtask( sched , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 ); + t->ci->progeny[j]->sorts->skip = 0; + task_addunlock( t->ci->progeny[j]->sorts , t ); + } + } + if ( t->type == task_type_self ) { + atomic_inc( &t->ci->nr_tasks ); + if ( t->subtype == task_subtype_density ) { + t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t; + } + } + else if ( t->type == task_type_pair ) { + atomic_inc( &t->ci->nr_tasks ); + atomic_inc( &t->cj->nr_tasks ); + if ( t->subtype == task_subtype_density ) { + t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t; + t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t; + } + } + else if ( t->type == task_type_sub ) { + atomic_inc( &t->ci->nr_tasks ); + if ( t->cj != NULL ) + atomic_inc( &t->cj->nr_tasks ); + if ( t->subtype == task_subtype_density ) { + t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t; + if ( t->cj != NULL ) + t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t; + } + } + } + + /* Append a ghost task to each cell. */ + space_map_cells_pre( s , 1 , &scheduler_map_mkghosts , sched ); + + /* Run through the tasks and make force tasks for each density task. + Each force task depends on the cell ghosts and unlocks the kick2 task + of its super-cell. */ + kk = sched->nr_tasks; + // #pragma omp parallel for private(t,t2) + for ( k = 0 ; k < kk ; k++ ) { + + /* Get a pointer to the task. */ + t = &sched->tasks[k]; + + /* Skip? */ + if ( t->skip ) + continue; + + /* Self-interaction? */ + if ( t->type == task_type_self && t->subtype == task_subtype_density ) { + task_addunlock( t , t->ci->super->ghost ); + t2 = scheduler_addtask( sched , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , 0 ); + task_addunlock( t->ci->ghost , t2 ); + task_addunlock( t2 , t->ci->super->kick2 ); + } + + /* Otherwise, pair interaction? */ + else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) { + task_addunlock( t , t->ci->super->ghost ); + if ( t->ci->super != t->cj->super ) + task_addunlock( t , t->cj->super->ghost ); + t2 = scheduler_addtask( sched , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 ); + task_addunlock( t->ci->ghost , t2 ); + task_addunlock( t->cj->ghost , t2 ); + task_addunlock( t2 , t->ci->super->kick2 ); + if ( t->ci->super != t->cj->super ) + task_addunlock( t2 , t->cj->super->kick2 ); + } + + /* Otherwise, sub interaction? */ + else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) { + task_addunlock( t , t->ci->super->ghost ); + if ( t->cj != NULL && t->ci->super != t->cj->super ) + task_addunlock( t , t->cj->super->ghost ); + t2 = scheduler_addtask( sched , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , 0 ); + task_addunlock( t->ci->ghost , t2 ); + if ( t->cj != NULL ) + task_addunlock( t->cj->ghost , t2 ); + task_addunlock( t2 , t->ci->super->kick2 ); + if ( t->cj != NULL && t->ci->super != t->cj->super ) + task_addunlock( t2 , t->cj->super->kick2 ); + } + + } + + /* Rank the tasks. */ + scheduler_ranktasks( sched ); + + /* Count the number of each task type. */ + int counts[ task_type_count+1 ]; + for ( k = 0 ; k <= task_type_count ; k++ ) + counts[k] = 0; + for ( k = 0 ; k < sched->nr_tasks ; k++ ) + if ( !sched->tasks[k].skip ) + counts[ (int)sched->tasks[k].type ] += 1; + else + counts[ task_type_count ] += 1; + printf( "engine_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] ); + for ( k = 1 ; k < task_type_count ; k++ ) + printf( " %s=%i" , taskID_names[k] , counts[k] ); + printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); + + } + - TIMER_TIC - /* Rebuild the space. */ - // tic = getticks(); - rebuild = ( space_prepare( e->s ) || e->step == 0 ); - // printf( "engine_prepare: space_prepare took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); +/** + * @brief Mark tasks to be skipped and set the sort flags accordingly. + * + * @return 1 if the space has to be rebuilt, 0 otherwise. + */ + +int engine_marktasks ( struct engine *e ) { + + struct scheduler *s = &e->sched; + int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind; + struct task *t, *tasks = s->tasks; + float dt_step = e->dt_step; + struct cell *ci, *cj; - /* The queues only need to be re-built if we have variable time-steps - or the space was rebuilt. */ - if ( !(e->policy & engine_policy_fixdt) || rebuild ) { + /* Run through the tasks and mark as skip or not. */ + for ( k = 0 ; k < nr_tasks ; k++ ) { - // tic = getticks(); - /* Init the queues (round-robin). */ - for ( qid = 0 ; qid < e->nr_queues ; qid++ ) - queue_init( &e->queues[qid] , s->nr_tasks , s->tasks ); - - /* Fill the queues (round-robin). */ - for ( qid = 0 , k = 0 ; k < s->nr_tasks ; k++ ) { - if ( s->tasks[ s->tasks_ind[k] ].skip ) - continue; - q = &e->queues[qid]; - qid = ( qid + 1 ) % e->nr_queues; - q->tid[ q->count ] = s->tasks_ind[k]; - q->count += 1; + /* Get a handle on the kth task. */ + t = &tasks[ ind[k] ]; + + /* Sort-task? Note that due to the task ranking, the sorts + will all come before the pairs and/or subs. */ + if ( t->type == task_type_sort ) { + + /* Re-set the flags. */ + t->flags = 0; + t->skip = 1; + } - // printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); - } + /* Single-cell task? */ + else if ( t->type == task_type_self || + t->type == task_type_ghost || + ( t->type == task_type_sub && t->cj == NULL ) ) { + + /* Set this task's skip. */ + t->skip = ( t->ci->dt_min > dt_step ); + + } - /* Otherwise, just re-set them. */ - else { - for ( qid = 0 ; qid < e->nr_queues ; qid++ ) - e->queues[qid].next = 0; + /* Pair? */ + else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) { + + /* Local pointers. */ + ci = t->ci; + cj = t->cj; + + /* Set this task's skip. */ + t->skip = ( ci->dt_min > dt_step && cj->dt_min > dt_step ); + + /* Too much particle movement? */ + if ( t->tight && + ( fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max > cj->dmin || + ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) ) + return 1; + + /* Set the sort flags. */ + if ( !t->skip && t->type == task_type_pair ) { + ci->sorts->flags |= (1 << t->flags); + ci->sorts->skip = 0; + cj->sorts->flags |= (1 << t->flags); + cj->sorts->skip = 0; + } + + } + + /* Kick2? */ + else if ( t->type == task_type_kick2 ) + t->skip = 0; + + /* None? */ + else if ( t->type == task_type_none ) + t->skip = 1; + } + + /* All is well... */ + return 0; + + } + + +/** + * @brief Prepare the #engine by re-building the cells and tasks. + * + * @param e The #engine to prepare. + */ + +void engine_prepare ( struct engine *e ) { + + int rebuild; + + TIMER_TIC - /* Run throught the tasks and get all the waits right. */ + /* Run through the tasks and mark as skip or not. */ // tic = getticks(); - #pragma omp parallel for schedule(static) private(j) - for ( k = 0 ; k < s->nr_tasks ; k++ ) { - if ( s->tasks[k].skip ) - continue; - for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ ) - atomic_inc( &s->tasks[k].unlock_tasks[j]->wait ); - } - // printf( "engine_prepare: preparing task dependencies took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); + rebuild = ( e->step == 0 || engine_marktasks( e ) ); + // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); + + /* Did this not go through? */ + if ( rebuild ) { - /* Re-set the queues.*/ - for ( k = 0 ; k < e->nr_queues ; k++ ) - e->queues[k].next = 0; + /* Re-build the space. */ + tic = getticks(); + space_rebuild( e->s , 0.0 ); + printf( "engine_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); + + /* Re-build the tasks. */ + tic = getticks(); + engine_maketasks( e ); + printf( "engine_prepare: engine_maketasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); + + /* Run through the tasks and mark as skip or not. */ + // tic = getticks(); + if ( engine_marktasks( e ) ) + error( "engine_marktasks failed after space_rebuild." ); + // printf( "engine_prepare: engine_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); + } + + /* Start the scheduler. */ + scheduler_start( &e->sched ); + TIMER_TOC( timer_prepare ); } @@ -455,7 +693,7 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest * @param sort_queues Flag to try to sort the queues topologically. */ -void engine_step ( struct engine *e , int sort_queues ) { +void engine_step ( struct engine *e ) { int k; float dt = e->dt, dt_step, dt_max = 0.0f, dt_min = FLT_MAX; @@ -509,15 +747,6 @@ void engine_step ( struct engine *e , int sort_queues ) { /* Prepare the space. */ engine_prepare( e ); - /* Sort the queues?*/ - if ( sort_queues ) { - #pragma omp parallel for default(none), shared(e) - for ( k = 0 ; k < e->nr_queues ; k++ ) { - queue_sort( &e->queues[k] ); - e->queues[k].next = 0; - } - } - // engine_single_density( e->s->dim , 3392063069037 , e->s->parts , e->s->nr_parts , e->s->periodic ); /* Start the clock. */ @@ -634,7 +863,6 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread /* Store the values. */ e->s = s; e->nr_threads = nr_threads; - e->nr_queues = nr_queues; e->policy = policy; e->step = 0; e->nullstep = 0; @@ -661,14 +889,8 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread dt *= 0.5f; e->dt = dt; - /* Allocate the queues. */ - if ( posix_memalign( (void *)(&e->queues) , 64 , nr_queues * sizeof(struct queue) ) != 0 ) - error( "Failed to allocate queues." ); - bzero( e->queues , nr_queues * sizeof(struct queue) ); - - /* Sort the queues topologically. */ - // for ( k = 0 ; k < nr_queues ; k++ ) - // queue_sort( &e->queues[k] ); + /* Init the scheduler. */ + scheduler_init( &e->sched , e->s , nr_queues , scheduler_flag_steal ); /* Allocate and init the threads. */ if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL ) diff --git a/src/engine.h b/src/engine.h index 2d3c6a3575ad8d53fdbdf0e8f5768ad0e0128b76..8efae350763a2e5133aa2199ec52207ca2e6b981 100644 --- a/src/engine.h +++ b/src/engine.h @@ -46,11 +46,8 @@ struct engine { /* The running policy. */ int policy; - /* The number of queues. */ - int nr_queues; - - /* The queues. */ - struct queue *queues; + /* The task scheduler. */ + struct scheduler sched; /* The maximum dt to step (current). */ float dt_step; @@ -85,4 +82,5 @@ struct engine { void engine_barrier( struct engine *e ); void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy ); void engine_prepare ( struct engine *e ); -void engine_step ( struct engine *e , int sort_queues ); +void engine_step ( struct engine *e ); +void engine_maketasks ( struct engine *e ); diff --git a/src/io.c b/src/io.c index a229e028e80dce199bafe9ed2b415a37218650ea..c965cf925f1cdb16fe4f2cb9e67b239fcb522da5 100644 --- a/src/io.c +++ b/src/io.c @@ -37,6 +37,7 @@ #include "task.h" #include "part.h" #include "space.h" +#include "scheduler.h" #include "engine.h" #include "error.h" #include "kernel.h" diff --git a/src/queue.c b/src/queue.c index 95b1eac0d6b29c9645e4018d55451fe3016d4e73..ebf187915e8e6567343be531cfb472ca8c1119de 100644 --- a/src/queue.c +++ b/src/queue.c @@ -24,12 +24,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <pthread.h> -#include <math.h> -#include <float.h> -#include <limits.h> -#include <omp.h> -#include <sched.h> /* Local headers. */ #include "cycle.h" @@ -93,11 +87,30 @@ void queue_insert ( struct queue *q , struct task *t ) { if ( lock_lock( &q->lock ) != 0 ) error( "Failed to get queue lock." ); - /* Swap next task to end. */ - q->tid[ q->count ] = q->tid[ q->next ]; + /* Does the queue need to be grown? */ + if ( q->count == q->size ) { + int *temp; + q->size *= queue_sizegrow; + if ( ( temp = (int *)malloc( sizeof(int) * q->size ) ) == NULL ) + error( "Failed to allocate new indices." ); + memcpy( temp , q->tid , sizeof(int) * q->count ); + free( q->tid ); + q->tid = temp; + } + + /* Drop the task at the end of the queue. */ + q->tid[ q->count ] = ( t - q->tasks ); q->count += 1; - q->tid[ q->next ] = t - q->tasks; - q->next += 1; + + /* Shuffle up. */ + for ( int k = q->count - 1 ; k > 0 ; k /= 2 ) + if ( q->tasks[ q->tid[k] ].rank < q->tasks[ q->tid[k/2] ].rank ) { + int temp = q->tid[k]; + q->tid[k] = q->tid[k/2]; + q->tid[k/2] = temp; + } + else + break; /* Unlock the queue. */ if ( lock_unlock( &q->lock ) != 0 ) @@ -113,23 +126,18 @@ void queue_insert ( struct queue *q , struct task *t ) { * @param tasks List of tasks to which the queue indices refer to. */ -void queue_init ( struct queue *q , int size , struct task *tasks ) { +void queue_init ( struct queue *q , struct task *tasks ) { /* Allocate the task list if needed. */ - if ( q->tid == NULL || q->size < size ) { - if ( q->tid != NULL ) - free( q->tid ); - q->size = size; - if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL ) - error( "Failed to allocate queue tids." ); - } + q->size = queue_sizeinit; + if ( ( q->tid = (int *)malloc( sizeof(int) * q->size ) ) == NULL ) + error( "Failed to allocate queue tids." ); /* Set the tasks pointer. */ q->tasks = tasks; /* Init counters. */ q->count = 0; - q->next = 0; /* Init the queue lock. */ if ( lock_init( &q->lock ) != 0 ) @@ -146,157 +154,29 @@ void queue_init ( struct queue *q , int size , struct task *tasks ) { * @param keep Remove the returned task from this queue. */ -struct task *queue_gettask_old ( struct queue *q , int blocking , int keep ) { +struct task *queue_gettask ( struct queue *q , int qid , int blocking ) { - int k, tid = -1, qcount, *qtid = q->tid; + int k, qcount, *qtid = q->tid; lock_type *qlock = &q->lock; struct task *qtasks = q->tasks, *res = NULL; TIMER_TIC /* If there are no tasks, leave immediately. */ - if ( q->next >= q->count ) { + if ( q->count == 0 ) { TIMER_TOC(queue_timer_gettask); return NULL; } /* Main loop, while there are tasks... */ - while ( q->next < q->count ) { + while ( q->count > 0 ) { /* Grab the task lock. */ - // if ( blocking ) { - if ( lock_lock( qlock ) != 0 ) - error( "Locking the task_lock failed.\n" ); - // } - // else { - // if ( lock_trylock( qlock ) != 0 ) - // break; - // } + if ( lock_lock( qlock ) != 0 ) + error( "Locking the qlock failed.\n" ); /* Loop over the remaining task IDs. */ qcount = q->count; - for ( k = q->next ; k < qcount ; k++ ) { - - /* Put a finger on the task. */ - res = &qtasks[ qtid[k] ]; - - /* Is this task blocked? */ - if ( res->wait ) - continue; - - /* Different criteria for different types. */ - if ( res->type == task_type_self || res->type == task_type_sort || (res->type == task_type_sub && res->cj == NULL) ) { - if ( res->ci->hold || cell_locktree( res->ci ) != 0 ) - continue; - } - else if ( res->type == task_type_pair || (res->type == task_type_sub && res->cj != NULL) ) { - if ( res->ci->hold || res->cj->hold || res->ci->wait || res->cj->wait ) - continue; - if ( cell_locktree( res->ci ) != 0 ) - continue; - if ( cell_locktree( res->cj ) != 0 ) { - cell_unlocktree( res->ci ); - continue; - } - } - - /* If we made it this far, we're safe. */ - break; - - } /* loop over the task IDs. */ - - /* Did we get a task? */ - if ( k < qcount ) { - - /* Do we need to swap? */ - if ( k != q->next ) - COUNT(queue_counter_swap); - - /* get the task ID. */ - tid = qtid[k]; - - /* Remove the task? */ - if ( keep ) { - - /* Bubble-up. */ - q->count = qcount - 1; - for ( ; k < qcount - 1 ; k++ ) - qtid[k] = qtid[k+1]; - - } - - /* No, leave it in the queue. */ - else { - - TIMER_TIC2 - - /* Bubble-down the task. */ - while ( k > q->next ) { - qtid[ k ] = qtid[ k-1 ]; - k -= 1; - } - qtid[ q->next ] = tid; - - /* up the counter. */ - q->next += 1; - - TIMER_TOC2(queue_timer_bubble); - - } - - } - - /* Release the task lock. */ - if ( lock_unlock( qlock ) != 0 ) - error( "Unlocking the task_lock failed.\n" ); - - /* Leave? */ - if ( tid >= 0 ) { - TIMER_TOC(queue_timer_gettask); - return &qtasks[tid]; - } - else if ( !blocking ) - break; - - } /* while there are tasks. */ - - /* No beef. */ - TIMER_TOC(queue_timer_gettask); - return NULL; - - } - - -struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ) { - - int k, tid = -1, qcount, *qtid = q->tid, hits; - lock_type *qlock = &q->lock; - struct task *qtasks = q->tasks, *res = NULL; - struct cell *ci_best = NULL, *cj_best = NULL; - int ind_best, score_best = -1, score; - TIMER_TIC - - /* If there are no tasks, leave immediately. */ - if ( q->next >= q->count ) { - TIMER_TOC(queue_timer_gettask); - return NULL; - } - - /* Main loop, while there are tasks... */ - while ( q->next < q->count ) { - - /* Grab the task lock. */ - // if ( blocking ) { - if ( lock_lock( qlock ) != 0 ) - error( "Locking the qlock failed.\n" ); - // } - // else { - // if ( lock_trylock( qlock ) != 0 ) - // break; - // } - - /* Loop over the remaining task IDs. */ - qcount = q->count; ind_best = -1; hits = 0; - for ( k = q->next ; k < qcount && hits < queue_maxhits ; k++ ) { + for ( k = 0 ; k < qcount ; k++ ) { /* Put a finger on the task. */ res = &qtasks[ qtid[k] ]; @@ -305,109 +185,60 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep if ( res->wait ) continue; - /* Get the score for this task. */ - if ( res->cj == NULL ) - score = 2 * ( res->ci->super->owner == rid ); - else - score = ( res->ci->super->owner == rid ) + ( res->cj->super->owner == rid ); - if ( score <= score_best ) - continue; - /* Try to lock ci. */ if ( res->type == task_type_self || res->type == task_type_sort || (res->type == task_type_sub && res->cj == NULL) ) { - if ( res->ci != ci_best && res->ci != cj_best && cell_locktree( res->ci ) != 0 ) + if ( cell_locktree( res->ci ) != 0 ) continue; } else if ( res->type == task_type_pair || (res->type == task_type_sub && res->cj != NULL) ) { if ( res->ci->hold || res->cj->hold || res->ci->wait || res->cj->wait ) continue; - if ( res->ci != ci_best && res->ci != cj_best && cell_locktree( res->ci ) != 0 ) + if ( cell_locktree( res->ci ) != 0 ) continue; - if ( res->cj != ci_best && res->cj != cj_best && cell_locktree( res->cj ) != 0 ) { - if ( res->ci != ci_best && res->ci != cj_best ) - cell_unlocktree( res->ci ); + if ( cell_locktree( res->cj ) != 0 ) { + cell_unlocktree( res->ci ); continue; } } - /* If we owned a previous task, unlock it. */ - if ( ind_best >= 0 ) { - res = &qtasks[ qtid[ ind_best ] ]; - if ( res->type == task_type_self || res->type == task_type_sort || res->type == task_type_pair || res->type == task_type_sub ) - if ( res->ci != ci_best && res->ci != cj_best ) - cell_unlocktree( res->ci ); - if ( res->type == task_type_pair || (res->type == task_type_sub && res->cj != NULL) ) - if ( res->cj != ci_best && res->cj != cj_best ) - cell_unlocktree( res->cj ); - } - /* If we made it this far, we're safe. */ - ind_best = k; - ci_best = qtasks[ qtid[ k ] ].ci; - cj_best = qtasks[ qtid[ k ] ].cj; - score_best = score; - hits += 1; - - /* Should we bother looking any farther? */ - if ( score_best == 2 ); - break; + break; } /* loop over the task IDs. */ /* Did we get a task? */ - if ( ind_best >= 0 ) { + if ( k < qcount ) { - /* Do we need to swap? */ - if ( ind_best != q->next ) - COUNT(queue_counter_swap); + /* Another one bites the dust. */ + q->count -= 1; - /* get the task ID. */ - tid = qtid[ ind_best ]; - /* Own the cells involved. */ - qtasks[ tid ].ci->super->owner = rid; - if ( qtasks[ tid ].cj != NULL ) - qtasks[ tid ].cj->super->owner = rid; - - /* Remove the task? */ - if ( keep ) { - - /* Bubble-up. */ - /* q->count = qcount - 1; - for ( k = ind_best ; k < qcount - 1 ; k++ ) - qtid[k] = qtid[k+1]; */ - - /* Swap with last task. */ - q->count = qcount - 1; - qtid[ ind_best ] = qtid[ q->count ]; - - } - - /* No, leave it in the queue. */ - else { - - TIMER_TIC2 - - /* Bubble-down the task. */ - /* for ( k = ind_best ; k > q->next ; k-- ) - qtid[ k ] = qtid[ k-1 ]; - qtid[ q->next ] = tid; */ + res->ci->super->owner = qid; + if ( res->cj != NULL ) + res->cj->super->owner = qid; - /* Swap with the first task. */ - if ( ind_best != q->next ) { - qtid[ ind_best ] = qtid[ q->next ]; - qtid[ q->next ] = tid; + /* Swap this task with the last task and re-heap. */ + if ( k < q->count ) { + qtid[ k ] = qtid[ q->count ]; + while ( 1 ) { + int i = 2*k; + if ( i >= q->count ) + break; + if ( i+1 < q->count && qtasks[ qtid[i+1] ].rank < qtasks[ qtid[i] ].rank ) + i += 1; + if ( qtasks[ qtid[i] ].rank < qtasks[ qtid[k] ].rank ) { + int temp = qtid[i]; + qtid[i] = qtid[k]; + qtid[k] = temp; + k = i; + } + else + break; } - - /* up the counter. */ - q->next += 1; - - TIMER_TOC2(queue_timer_bubble); - } - + } /* Release the task lock. */ @@ -415,136 +246,15 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep error( "Unlocking the qlock failed.\n" ); /* Leave? */ - if ( tid >= 0 ) { - TIMER_TOC(queue_timer_gettask); - return &qtasks[tid]; - } - else if ( !blocking ) + if ( res != NULL || !blocking ) break; } /* while there are tasks. */ /* No beef. */ TIMER_TOC(queue_timer_gettask); - return NULL; + return res; } -/** - * @brief Sort the tasks IDs according to their weight and constraints. - * - * @param q The #queue. - */ - -void queue_sort ( struct queue *q ) { - - struct { - short int lo, hi; - } qstack[20]; - int qpos, i, j, k, lo, hi, imin, temp; - int pivot_weight, pivot_wait; - int *weight, *wait; - int *data = q->tid; - struct task *t; - - printf( "queue_sort: sorting queue with %i tasks.\n" , q->count ); - - /* Allocate and pre-compute each task's weight. */ - if ( ( weight = (int *)alloca( sizeof(int) * q->count ) ) == NULL || - ( wait = (int *)alloca( sizeof(int) * q->count ) ) == NULL ) - error( "Failed to allocate weight buffer." ); - for ( k = 0 ; k < q->count ; k++ ) { - t = &q->tasks[ q->tid[k] ]; - switch ( t->type ) { - case task_type_self: - wait[k] = t->rank; - weight[k] = 0; // t->ci->count * t->ci->count; - break; - case task_type_pair: - wait[k] = t->rank; - weight[k] = 0; // t->ci->count * t->cj->count; - break; - case task_type_sub: - wait[k] = t->rank; - weight[k] = 0; // (t->cj == NULL) ? t->ci->count * t->ci->count : t->ci->count * t->cj->count; - break; - case task_type_sort: - wait[k] = t->rank; - weight[k] = 0; // t->ci->count; - break; - case task_type_ghost: - wait[k] = t->rank; - weight[k] = 0; // t->ci->count; - break; - } - } - - /* Sort tasks. */ - qstack[0].lo = 0; qstack[0].hi = q->count - 1; qpos = 0; - while ( qpos >= 0 ) { - lo = qstack[qpos].lo; hi = qstack[qpos].hi; - qpos -= 1; - if ( hi - lo < 15 ) { - for ( i = lo ; i < hi ; i++ ) { - imin = i; - for ( j = i+1 ; j <= hi ; j++ ) - if ( ( wait[ j ] < wait[ imin ] ) || - ( wait[ j ] == wait[ imin ] && weight[ j ] > weight[ imin ] ) ) - if ( imin != i ) { - temp = data[imin]; data[imin] = data[i]; data[i] = temp; - temp = wait[imin]; wait[imin] = wait[i]; wait[i] = temp; - temp = weight[imin]; weight[imin] = weight[i]; weight[i] = temp; - } - } - } - else { - pivot_weight = weight[ ( lo + hi ) / 2 ]; - pivot_wait = wait[ ( lo + hi ) / 2 ]; - i = lo; j = hi; - while ( i <= j ) { - while ( ( wait[ i ] < pivot_wait ) || - ( wait[ i ] == pivot_wait && weight[ i ] > pivot_weight ) ) - i++; - while ( ( wait[ j ] > pivot_wait ) || - ( wait[ j ] == pivot_wait && weight[ j ] < pivot_weight ) ) - j--; - if ( i <= j ) { - if ( i < j ) { - temp = data[i]; data[i] = data[j]; data[j] = temp; - temp = wait[i]; wait[i] = wait[j]; wait[j] = temp; - temp = weight[i]; weight[i] = weight[j]; weight[j] = temp; - } - i += 1; j -= 1; - } - } - if ( j > ( lo + hi ) / 2 ) { - if ( lo < j ) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - if ( i < hi ) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - } - else { - if ( i < hi ) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - if ( lo < j ) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - } - } - } - - } - - diff --git a/src/queue.h b/src/queue.h index 2db5b57c04f92ef5b3809ca77167d0fbce8ae780..f15664be93a4a00516614911b9826cfe41458220 100644 --- a/src/queue.h +++ b/src/queue.h @@ -20,6 +20,8 @@ /* Some constants. */ #define queue_maxhits 10 +#define queue_sizeinit 100 +#define queue_sizegrow 2 /* The queue timers themselves. */ @@ -47,7 +49,7 @@ struct queue { lock_type lock; /* Size, count and next element. */ - int size, count, next; + int size, count; /* The actual tasks to which the indices refer. */ struct task *tasks; @@ -59,8 +61,6 @@ struct queue { /* Function prototypes. */ -struct task *queue_gettask_old ( struct queue *q , int blocking , int keep ); -struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ); -void queue_init ( struct queue *q , int size , struct task *tasks ); +struct task *queue_gettask ( struct queue *q , int qid , int blocking ); +void queue_init ( struct queue *q , struct task *tasks ); void queue_insert ( struct queue *q , struct task *t ); -void queue_sort ( struct queue *q ); diff --git a/src/runner.c b/src/runner.c index e55895ea739e0c118fe7610bb31948ae2d8fd0c7..90c34007d66183a77793157fa6bfbef52c6baa8d 100644 --- a/src/runner.c +++ b/src/runner.c @@ -29,7 +29,6 @@ #include <float.h> #include <limits.h> #include <omp.h> -#include <sched.h> /* Local headers. */ #include "cycle.h" @@ -42,6 +41,7 @@ #include "cell.h" #include "space.h" #include "queue.h" +#include "scheduler.h" #include "engine.h" #include "runner.h" #include "runner_iact.h" @@ -617,15 +617,10 @@ void *runner_main ( void *data ) { struct runner *r = (struct runner *)data; struct engine *e = r->e; + struct scheduler *sched = &e->sched; int threadID = r->id; - int k, qid, naq, keep, tpq; - struct queue *queues[ e->nr_queues ], *myq; struct task *t; struct cell *ci, *cj; - unsigned int myseed = rand() + r->id; - #ifdef TIMER - ticks stalled; - #endif /* Main loop. */ while ( 1 ) { @@ -633,88 +628,17 @@ void *runner_main ( void *data ) { /* Wait at the barrier. */ engine_barrier( e ); - /* Set some convenient local data. */ - keep = e->policy & engine_policy_keep; - myq = &e->queues[ threadID * e->nr_queues / e->nr_threads ]; - tpq = ceil( ((double)e->nr_threads) / e->nr_queues ); - #ifdef TIMER - stalled = 0; - #endif - - /* Set up the local list of active queues. */ - naq = e->nr_queues; - for ( k = 0 ; k < naq ; k++ ) - queues[k] = &e->queues[k]; - - /* Set up the local list of active queues. */ - naq = e->nr_queues; - for ( k = 0 ; k < naq ; k++ ) - queues[k] = &e->queues[k]; - /* Loop while there are tasks... */ while ( 1 ) { - /* Remove any inactive queues. */ - for ( k = 0 ; k < naq ; k++ ) - if ( queues[k]->next == queues[k]->count ) { - naq -= 1; - queues[k] = queues[naq]; - k -= 1; - } - if ( naq == 0 ) - break; - /* Get a task, how and from where depends on the policy. */ TIMER_TIC - t = NULL; - if ( e->nr_queues == 1 ) { - t = queue_gettask_old( &e->queues[0] , 1 , 0 ); - } - else if ( e->policy & engine_policy_steal ) { - if ( ( myq->next == myq->count ) || - ( t = queue_gettask( myq , r->id , 0 , 0 ) ) == NULL ) { - TIMER_TIC2 - qid = rand_r( &myseed ) % naq; - keep = ( e->policy & engine_policy_keep ) && - ( myq->count <= myq->size-tpq ); - if ( myq->next == myq->count ) - COUNT(runner_counter_steal_empty); - else - COUNT(runner_counter_steal_stall); - t = queue_gettask( queues[qid] , r->id , 0 , keep ); - if ( t != NULL && keep ) - queue_insert( myq , t ); - TIMER_TOC2(timer_steal); - } - } - else if ( e->policy & engine_policy_rand ) { - qid = rand_r( &myseed ) % naq; - t = queue_gettask( queues[qid] , r->id , e->policy & engine_policy_block , 0 ); - } - else { - t = queue_gettask( &e->queues[threadID] , r->id , e->policy & engine_policy_block , 0 ); - } + t = scheduler_gettask( sched , threadID ); TIMER_TOC(timer_getpair); /* Did I get anything? */ - if ( t == NULL ) { - COUNT(runner_counter_stall); - #ifdef TIMER - if ( !stalled ) - stalled = getticks(); - #endif - continue; - } - #ifdef TIMER - else if ( stalled ) { - timers_toc( timer_stalled , stalled ); - #ifdef TIMER_VERBOSE - printf( "runner_main[%02i]: stalled %.3f ms\n" , r->id , ((double)stalled) / CPU_TPS * 1000 ); - fflush(stdout); - #endif - stalled = 0; - } - #endif + if ( t == NULL ) + break; /* Get the cells. */ ci = t->ci; @@ -731,7 +655,6 @@ void *runner_main ( void *data ) { runner_doself2_force( r , ci ); else error( "Unknown task subtype." ); - cell_unlocktree( ci ); break; case task_type_pair: if ( t->subtype == task_subtype_density ) @@ -740,12 +663,9 @@ void *runner_main ( void *data ) { runner_dopair2_force( r , ci , cj ); else error( "Unknown task subtype." ); - cell_unlocktree( ci ); - cell_unlocktree( cj ); break; case task_type_sort: runner_dosort( r , ci , t->flags , 1 ); - cell_unlocktree( ci ); break; case task_type_sub: if ( t->subtype == task_subtype_density ) @@ -754,9 +674,6 @@ void *runner_main ( void *data ) { runner_dosub2_force( r , ci , cj , t->flags ); else error( "Unknown task subtype." ); - cell_unlocktree( ci ); - if ( cj != NULL ) - cell_unlocktree( cj ); break; case task_type_ghost: if ( ci->super == ci ) @@ -769,26 +686,12 @@ void *runner_main ( void *data ) { error( "Unknown task type." ); } t->toc = getticks(); + + /* We're done with this task. */ + scheduler_done( sched , t ); - /* Resolve any dependencies. */ - for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) - if ( atomic_dec( &t->unlock_tasks[k]->wait ) == 0 ) - error( "Task negative wait." ); - } /* main loop. */ - /* Any leftover stalls? */ - #ifdef TIMER - if ( stalled ) { - timers_toc( timer_stalled , stalled ); - #ifdef TIMER_VERBOSE - printf( "runner_main[%02i]: stalled %.3f ms\n" , r->id , ((double)stalled) / CPU_TPS * 1000 ); - fflush(stdout); - #endif - stalled = 0; - } - #endif - } /* Be kind, rewind. */ diff --git a/src/scheduler.c b/src/scheduler.c new file mode 100644 index 0000000000000000000000000000000000000000..8ffc16a48057e4042f5e6a967c2a3f88848905c2 --- /dev/null +++ b/src/scheduler.c @@ -0,0 +1,713 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +/* Local headers. */ +#include "error.h" +#include "cycle.h" +#include "atomic.h" +#include "timers.h" +#include "const.h" +#include "vector.h" +#include "lock.h" +#include "task.h" +#include "part.h" +#include "debug.h" +#include "cell.h" +#include "space.h" +#include "queue.h" +#include "kernel.h" +#include "scheduler.h" + + +/** + * @brief Mapping function to append a ghost task to each cell. + * + * Looks for the super cell, e.g. the highest-level cell above each + * cell for which a pair is defined. All ghosts below this cell will + * depend on the ghost of their parents (sounds spooky, but it isn't). + * + * A kick2-task is appended to each super cell. + */ + +void scheduler_map_mkghosts ( struct cell *c , void *data ) { + + struct scheduler *s = (struct scheduler *)data; + struct cell *finger; + + /* Find the super cell, i.e. the highest cell hierarchically above + this one to still have at least one task associated with it. */ + c->super = c; + for ( finger = c->parent ; finger != NULL ; finger = finger->parent ) + if ( finger->nr_tasks > 0 ) + c->super = finger; + + /* Make the ghost task */ + if ( c->super != c || c->nr_tasks > 0 ) + c->ghost = scheduler_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 ); + + /* Append a kick task if we are the active super cell. */ + if ( c->super == c && c->nr_tasks > 0 ) + c->kick2 = scheduler_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 ); + + /* If we are not the super cell ourselves, make our ghost depend + on our parent cell. */ + if ( c->super != c ) + task_addunlock( c->parent->ghost , c->ghost ); + + } + + +/** + * @brief Split tasks that may be too large. + * + * @param s The #scheduler we are working in. + */ + +void scheduler_splittasks ( struct scheduler *s ) { + + int j, k, ind, sid, tid = 0, redo; + struct cell *ci, *cj; + double hi, hj, shift[3]; + struct task *t, *t_old; + // float dt_step = s->dt_step; + int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } , + { -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } , + { -1 , -1 , -1 , 12 , 7 , 6 , 4 , 3 } , + { -1 , -1 , -1 , -1 , 8 , 7 , 5 , 4 } , + { -1 , -1 , -1 , -1 , -1 , 12 , 10 , 9 } , + { -1 , -1 , -1 , -1 , -1 , -1 , 11 , 10 } , + { -1 , -1 , -1 , -1 , -1 , -1 , -1 , 12 } }; + + /* Loop through the tasks... */ + // #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift) + { + redo = 0; t_old = t = NULL; + while ( 1 ) { + + /* Get a pointer on the task. */ + if ( redo ) { + redo = 0; + t = t_old; + } + else { + if ( ( ind = atomic_inc( &tid ) ) < s->nr_tasks ) + t_old = t = &s->tasks[ s->tasks_ind[ ind ] ]; + else + break; + } + + /* Empty task? */ + if ( t->ci == NULL || ( t->type == task_type_pair && t->cj == NULL ) ) { + t->type = task_type_none; + t->skip = 1; + continue; + } + + /* Self-interaction? */ + if ( t->type == task_type_self ) { + + /* Get a handle on the cell involved. */ + ci = t->ci; + + /* Ingore this task? */ + /* if ( ci->dt_min > dt_step ) { + t->skip = 1; + continue; + } */ + + /* Is this cell even split? */ + if ( ci->split ) { + + /* Make a sub? */ + if ( scheduler_dosub && ci->count < space_subsize && ci->maxdepth - ci->depth < scheduler_maxsubdepth ) { + + /* convert to a self-subtask. */ + t->type = task_type_sub; + + } + + /* Otherwise, make tasks explicitly. */ + else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* Add the self taks. */ + for ( k = 0 ; ci->progeny[k] == NULL ; k++ ); + t->ci = ci->progeny[k]; + for ( k += 1 ; k < 8 ; k++ ) + if ( ci->progeny[k] != NULL ) + scheduler_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , 0 ); + + /* Make a task for each pair of progeny. */ + for ( j = 0 ; j < 8 ; j++ ) + if ( ci->progeny[j] != NULL ) + for ( k = j + 1 ; k < 8 ; k++ ) + if ( ci->progeny[k] != NULL ) + scheduler_addtask( s , task_type_pair , task_subtype_density , pts[j][k] , 0 , ci->progeny[j] , ci->progeny[k] , 0 ); + } + + } + + } + + /* Pair interaction? */ + else if ( t->type == task_type_pair ) { + + /* Get a handle on the cells involved. */ + ci = t->ci; + cj = t->cj; + hi = ci->dmin; + hj = cj->dmin; + + /* Ingore this task? */ + /* if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) { + t->skip = 1; + continue; + } */ + + /* Get the sort ID, use space_getsid and not t->flags + to make sure we get ci and cj swapped if needed. */ + sid = space_getsid( s->space , &ci , &cj , shift ); + + /* Should this task be split-up? */ + if ( ci->split && cj->split && + ci->h_max*kernel_gamma*space_stretch < hi/2 && + cj->h_max*kernel_gamma*space_stretch < hj/2 ) { + + /* Replace by a single sub-task? */ + if ( scheduler_dosub && + ci->count < space_subsize && cj->count < space_subsize && + ci->maxdepth - ci->depth < scheduler_maxsubdepth && cj->maxdepth - cj->depth < scheduler_maxsubdepth && + sid != 0 && sid != 2 && sid != 6 && sid != 8 ) { + + /* Make this task a sub task. */ + t->type = task_type_sub; + + } + + /* Otherwise, split it. */ + else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* For each different sorting type... */ + switch ( sid ) { + + case 0: /* ( 1 , 1 , 1 ) */ + t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; t->flags = 0; + break; + + case 1: /* ( 1 , 1 , 0 ) */ + t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[7] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; t->tight = 1; + break; + + case 3: /* ( 1 , 0 , 1 ) */ + t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[7] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[4] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[5] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[4] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[6] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[5] , cj->progeny[3] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[6] , cj->progeny[3] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[7] , cj->progeny[3] , 1 ); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[6] , cj->progeny[3] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[4] , cj->progeny[3] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; t->tight = 1; + break; + + case 7: /* ( 1 , -1 , 0 ) */ + t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[4] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[5] , cj->progeny[3] , 1 ); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; t->tight = 1; + break; + + case 9: /* ( 0 , 1 , 1 ) */ + t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[7] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[2] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[3] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[2] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[3] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[6] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[5] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[3] , cj->progeny[5] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[6] , cj->progeny[5] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[7] , cj->progeny[5] , 1 ); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[6] , cj->progeny[5] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[2] , cj->progeny[5] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; t->tight = 1; + t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[1] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[3] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[1] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[3] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[5] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[6] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[3] , cj->progeny[6] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[5] , cj->progeny[6] , 1 ); + t = scheduler_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[7] , cj->progeny[6] , 1 ); + break; + + } + + } + + } /* split this task? */ + + /* Otherwise, if not spilt, stitch-up the sorting. */ + else { + + /* Create the sort for ci. */ + // lock_lock( &ci->lock ); + if ( ci->sorts == NULL ) + ci->sorts = scheduler_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 ); + else + ci->sorts->flags |= (1 << sid); + // lock_unlock_blind( &ci->lock ); + task_addunlock( ci->sorts , t ); + + /* Create the sort for cj. */ + // lock_lock( &cj->lock ); + if ( cj->sorts == NULL ) + cj->sorts = scheduler_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , 0 ); + else + cj->sorts->flags |= (1 << sid); + // lock_unlock_blind( &cj->lock ); + task_addunlock( cj->sorts , t ); + + } + + } /* pair interaction? */ + + } /* loop over all tasks. */ + + } + + } + + +/** + * @brief Add a #task to the #scheduler. + * + * @param s The #scheduler we are working in. + * @param type The type of the task. + * @param subtype The sub-type of the task. + * @param flags The flags of the task. + * @param wait + * @param ci The first cell to interact. + * @param cj The second cell to interact. + * @param tight + */ + +struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ) { + + int ind; + struct task *t; + + /* Get the next free task. */ + ind = atomic_inc( &s->tasks_next ); + t = &s->tasks[ ind ]; + + /* Copy the data. */ + t->type = type; + t->subtype = subtype; + t->flags = flags; + t->wait = wait; + t->ci = ci; + t->cj = cj; + t->skip = 0; + t->tight = tight; + t->nr_unlock_tasks = 0; + + /* Init the lock. */ + lock_init( &t->lock ); + + /* Add an index for it. */ + // lock_lock( &s->lock ); + s->tasks_ind[ atomic_inc( &s->nr_tasks ) ] = ind; + // lock_unlock_blind( &s->lock ); + + /* Return a pointer to the new task. */ + return t; + + } + + + +/** + * @brief Sort the tasks in topological order over all queues. + * + * @param s The #scheduler. + */ + +void scheduler_ranktasks ( struct scheduler *s ) { + + int i, j = 0, k, temp, left = 0, rank; + struct task *t, *tasks = s->tasks; + int *tid = s->tasks_ind, nr_tasks = s->nr_tasks; + + /* Run throught the tasks and get all the waits right. */ + for ( i = 0 , k = 0 ; k < nr_tasks ; k++ ) { + tid[k] = k; + for ( j = 0 ; j < tasks[k].nr_unlock_tasks ; j++ ) + tasks[k].unlock_tasks[j]->wait += 1; + } + + /* Main loop. */ + for ( j = 0 , rank = 0 ; left < nr_tasks ; rank++ ) { + + /* Load the tids of tasks with no waits. */ + for ( k = left ; k < nr_tasks ; k++ ) + if ( tasks[ tid[k] ].wait == 0 ) { + temp = tid[j]; tid[j] = tid[k]; tid[k] = temp; + j += 1; + } + + /* Did we get anything? */ + if ( j == left ) + error( "Unsatisfiable task dependencies detected." ); + + /* Unlock the next layer of tasks. */ + for ( i = left ; i < j ; i++ ) { + t = &tasks[ tid[i] ]; + t->rank = rank; + tid[i] = t - tasks; + if ( tid[i] >= nr_tasks ) + error( "Task index overshoot." ); + /* printf( "scheduler_ranktasks: task %i of type %s has rank %i.\n" , i , + (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */ + for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) + t->unlock_tasks[k]->wait -= 1; + } + + /* The new left (no, not tony). */ + left = j; + + } + + } + + +/** + * @brief (Re)allocate the task arrays. + * + * @param s The #scheduler. + * @param size The maximum number of tasks in the #scheduler. + */ + +void scheduler_reset ( struct scheduler *s , int size ) { + + int k; + + /* Do we need to re-allocate? */ + if ( size > s->size ) { + + /* Free exising task lists if necessary. */ + if ( s->tasks != NULL ) + free( s->tasks ); + if ( s->tasks_ind != NULL ) + free( s->tasks_ind ); + + /* Allocate the new lists. */ + if ( ( s->tasks = (struct task *)malloc( sizeof(struct task) * size ) ) == NULL || + ( s->tasks_ind = (int *)malloc( sizeof(int) * size ) ) == NULL ) + error( "Failed to allocate task lists." ); + + } + + /* Reset the counters. */ + s->size = size; + s->nr_tasks = 0; + s->tasks_next = 0; + s->waiting = 0; + + /* Set the task pointers in the queues. */ + for ( k = 0 ; k < s->nr_queues ; k++ ) + s->queues[k].tasks = s->tasks; + + } + + +/** + * @brief Start the scheduler, i.e. fill the queues with ready tasks. + * + * @param s The #scheduler. + */ + +void scheduler_start ( struct scheduler *s ) { + + int k, j; + struct task *t; + + /* Run through the tasks and get all the waits right. */ + // #pragma omp parallel for schedule(static) private(t,j) + for ( k = 0 ; k < s->nr_tasks ; k++ ) { + t = &s->tasks[k]; + if ( !t->skip ) + for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) + atomic_inc( &t->unlock_tasks[j]->wait ); + } + + /* Loop over the tasks and enqueue whoever is ready. */ + for ( k = 0 ; k < s->nr_tasks ; k++ ) { + t = &s->tasks[k]; + if ( !t->skip && t->wait == 0 ) + scheduler_enqueue( s , t ); + } + + } + + +/** + * @brief Put a task on one of the queues. + * + * @param s The #scheduler. + * @param t The #task. + */ + +void scheduler_enqueue ( struct scheduler *s , struct task *t ) { + + int k, qid = -1; + + /* Ignore skipped tasks. */ + if ( t->skip ) + return; + + /* Find the previous owner for each task type. */ + switch ( t->type ) { + case task_type_self: + case task_type_sort: + case task_type_ghost: + case task_type_kick2: + qid = t->ci->super->owner; + break; + case task_type_pair: + case task_type_sub: + qid = t->ci->super->owner; + if ( t->cj != NULL && + ( qid < 0 || s->queues[qid].count > s->queues[t->cj->super->owner].count ) ) + qid = t->cj->super->owner; + break; + } + + /* If no previous owner, find the shortest queue. */ + if ( qid < 0 ) + for ( qid = 0 , k = 1 ; k < s->nr_queues ; k++ ) + if ( s->queues[k].count < s->queues[qid].count ) + qid = k; + + /* Increase the waiting counter. */ + atomic_inc( &s->waiting ); + + /* Insert the task into that queue. */ + queue_insert( &s->queues[qid] , t ); + + } + + +/** + * @brief Take care of a tasks dependencies. + * + * @param s The #scheduler. + * @param t The finished #task. + */ + +void scheduler_done ( struct scheduler *s , struct task *t ) { + + int k; + struct task *t2; + + /* Release whatever locks this task held. */ + switch ( t->type ) { + case task_type_self: + case task_type_sort: + cell_unlocktree( t->ci ); + break; + case task_type_pair: + case task_type_sub: + cell_unlocktree( t->ci ); + if ( t->cj != NULL ) + cell_unlocktree( t->cj ); + break; + } + + /* Loop through the dependencies and add them to a queue if + they are ready. */ + for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) { + t2 = t->unlock_tasks[k]; + if ( atomic_dec( &t2->wait ) == 1 && !t2->skip ) + scheduler_enqueue( s , t2 ); + } + + /* Task definitely done. */ + atomic_dec( &s->waiting ); + + } + + +/** + * @brief Get a task, preferably from the given queue. + * + * @param s The #scheduler. + * @param qid The ID of the prefered #queue. + * + * @return A pointer to a #task or @c NULL if there are no available tasks. + */ + +struct task *scheduler_gettask ( struct scheduler *s , int qid ) { + + struct task *res; + int k, max_count, max_ind; + + /* Loop as long as there are tasks... */ + while ( s->waiting > 0 ) { + + /* Try to get a task from the suggested queue. */ + if ( ( res = queue_gettask( &s->queues[qid] , qid , 0 ) ) != NULL ) + return res; + + /* If unsucessful, try stealing from the largest queue. */ + if ( s->flags & scheduler_flag_steal ) { + max_count = 0; max_ind = 0; + for ( k = 0 ; k < s->nr_queues ; k++ ) + if ( k != qid && s->queues[k].count > max_count ) { + max_ind = k; + max_count = s->queues[k].count; + } + if ( max_count > 0 && ( res = queue_gettask( &s->queues[ max_ind ] , qid , 0 ) ) != NULL ) + return res; + } + + } + + /* No milk today. */ + return NULL; + + } + + +/** + * @brief Initialize the #scheduler. + * + * @param s The #scheduler. + * @param nr_queues The number of queues in this scheduler. + * @param flags The #scheduler flags. + */ + +void scheduler_init ( struct scheduler *s , struct space *space , int nr_queues , unsigned int flags ) { + + int k; + + /* Init the lock. */ + lock_init( &s->lock ); + + /* Allocate the queues. */ + if ( ( s->queues = (struct queue *)malloc( sizeof(struct queue) * nr_queues ) ) == NULL ) + error( "Failed to allocate queues." ); + + /* Initialize each queue. */ + for ( k = 0 ; k < nr_queues ; k++ ) + queue_init( &s->queues[k] , NULL ); + + /* Set the scheduler variables. */ + s->nr_queues = nr_queues; + s->flags = flags; + s->space = space; + + /* Init other values. */ + s->tasks = NULL; + s->tasks_ind = NULL; + s->waiting = 0; + s->size = 0; + s->nr_tasks = 0; + s->tasks_next = 0; + + } + diff --git a/src/scheduler.h b/src/scheduler.h new file mode 100644 index 0000000000000000000000000000000000000000..485c9b101b25cb699b7f24e1589884bc94986b72 --- /dev/null +++ b/src/scheduler.h @@ -0,0 +1,75 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + + +/* Some constants. */ +#define scheduler_maxwait 3 +#define scheduler_maxunlock 40 +#define scheduler_dosub 1 +#define scheduler_maxsubdepth 3 + +/* Flags . */ +#define scheduler_flag_none 0 +#define scheduler_flag_steal 1 + + +/* Data of a scheduler. */ +struct scheduler { + + /* Scheduler flags. */ + unsigned int flags; + + /* Number of queues in this scheduler. */ + int nr_queues; + + /* Array of queues. */ + struct queue *queues; + + /* Total number of tasks. */ + int nr_tasks, size, tasks_next; + + /* Total number of waiting tasks. */ + int waiting; + + /* The task array. */ + struct task *tasks; + + /* The task indices. */ + int *tasks_ind; + + /* Lock for this scheduler. */ + lock_type lock; + + /* The space associated with this scheduler. */ + struct space *space; + + }; + + +/* Function prototypes. */ +void scheduler_init ( struct scheduler *s , struct space *space , int nr_queues , unsigned int flags ); +struct task *scheduler_gettask ( struct scheduler *s , int qid ); +void scheduler_enqueue ( struct scheduler *s , struct task *t ); +void scheduler_start ( struct scheduler *s ); +void scheduler_reset ( struct scheduler *s , int nr_tasks ); +void scheduler_ranktasks ( struct scheduler *s ); +struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ); +void scheduler_splittasks ( struct scheduler *s ); +void scheduler_map_mkghosts ( struct cell *c , void *data ); +void scheduler_done ( struct scheduler *s , struct task *t ); diff --git a/src/space.c b/src/space.c index 97fad5127eb5f72a13bb6e29ba5eab0098876912..fe719a580754f6bb17fdef34a5b68f06afc6493e 100644 --- a/src/space.c +++ b/src/space.c @@ -77,193 +77,6 @@ const int sortlistID[27] = { }; -/** - * @brief Mark tasks to be skipped and set the sort flags accordingly. - * - * @return 1 if the space has to be rebuilt, 0 otherwise. - */ - -int space_marktasks ( struct space *s ) { - - int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind; - struct task *t, *tasks = s->tasks; - float dt_step = s->dt_step; - struct cell *ci, *cj; - - /* Run through the tasks and mark as skip or not. */ - for ( k = 0 ; k < nr_tasks ; k++ ) { - - /* Get a handle on the kth task. */ - t = &tasks[ ind[k] ]; - - /* Sort-task? Note that due to the task ranking, the sorts - will all come before the pairs and/or subs. */ - if ( t->type == task_type_sort ) { - - /* Re-set the flags. */ - t->flags = 0; - t->skip = 1; - - } - - /* Single-cell task? */ - else if ( t->type == task_type_self || - t->type == task_type_ghost || - ( t->type == task_type_sub && t->cj == NULL ) ) { - - /* Set this task's skip. */ - t->skip = ( t->ci->dt_min > dt_step ); - - } - - /* Pair? */ - else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) { - - /* Local pointers. */ - ci = t->ci; - cj = t->cj; - - /* Set this task's skip. */ - t->skip = ( ci->dt_min > dt_step && cj->dt_min > dt_step ); - - /* Too much particle movement? */ - if ( t->tight && - ( fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) ) - return 1; - - /* Set the sort flags. */ - if ( !t->skip && t->type == task_type_pair ) { - ci->sorts->flags |= (1 << t->flags); - ci->sorts->skip = 0; - cj->sorts->flags |= (1 << t->flags); - cj->sorts->skip = 0; - } - - } - - /* Kick2? */ - else if ( t->type == task_type_kick2 ) - t->skip = 0; - - /* None? */ - else if ( t->type == task_type_none ) - t->skip = 1; - - } - - /* All is well... */ - return 0; - - } - - -/** - * @brief Check the integrity of the space and rebuild if necessary. - * - * @param s The #space. - * - * Runs through the tasks and marks those as "skip" which have no - * effect for the current @c dt_max. Verifies the integrity of the - * cell tree for those tasks and triggers a rebuild if necessary. - */ - -int space_prepare ( struct space *s ) { - - int k, rebuild; - // struct task *t; - // float dt_step = s->dt_step; - float dx_max = 0.0f; - // int counts[ task_type_count + 1 ]; - ticks tic; - - /* Get the maximum displacement in the whole system. */ - for ( k = 0 ; k < s->nr_cells ; k++ ) - dx_max = fmaxf( dx_max , s->cells[k].dx_max ); - // printf( "space_prepare: dx_max is %e.\n" , dx_max ); - - /* Run through the tasks and mark as skip or not. */ - // tic = getticks(); - rebuild = space_marktasks( s ); - // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); - - /* Did this not go through? */ - if ( rebuild ) { - - /* Re-build the space. */ - tic = getticks(); - space_rebuild( s , 0.0 ); - printf( "space_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); - - /* Run through the tasks and mark as skip or not. */ - // tic = getticks(); - if ( space_marktasks( s ) ) - error( "space_marktasks failed after space_rebuild." ); - // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); - - } - - - /* Let whoever cares know if we rebuilt. */ - return rebuild; - - } - - -/** - * @brief Sort the tasks in topological order over all queues. - * - * @param s The #space. - */ - -void space_ranktasks ( struct space *s ) { - - int i, j = 0, k, temp, left = 0, rank; - struct task *t, *tasks = s->tasks; - int *tid = s->tasks_ind, nr_tasks = s->nr_tasks; - - /* Run throught the tasks and get all the waits right. */ - for ( i = 0 , k = 0 ; k < nr_tasks ; k++ ) { - tid[k] = k; - for ( j = 0 ; j < tasks[k].nr_unlock_tasks ; j++ ) - tasks[k].unlock_tasks[j]->wait += 1; - } - - /* Main loop. */ - for ( j = 0 , rank = 0 ; left < nr_tasks ; rank++ ) { - - /* Load the tids of tasks with no waits. */ - for ( k = left ; k < nr_tasks ; k++ ) - if ( tasks[ tid[k] ].wait == 0 ) { - temp = tid[j]; tid[j] = tid[k]; tid[k] = temp; - j += 1; - } - - /* Did we get anything? */ - if ( j == left ) - error( "Unsatisfiable task dependencies detected." ); - - /* Unlock the next layer of tasks. */ - for ( i = left ; i < j ; i++ ) { - t = &tasks[ tid[i] ]; - t->rank = rank; - tid[i] = t - tasks; - if ( tid[i] >= nr_tasks ) - error( "Task index overshoot." ); - /* printf( "engine_ranktasks: task %i of type %s has rank %i.\n" , i , - (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */ - for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) - t->unlock_tasks[k]->wait -= 1; - } - - /* The new left (no, not tony). */ - left = j; - - } - - } - - /** * @brief Get the shift-id of the given pair of cells, swapping them * if need be. @@ -509,11 +322,6 @@ void space_rebuild ( struct space *s , double cell_max ) { } // printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); - /* Now that we have the cell structre, re-build the tasks. */ - // tic = getticks(); - space_maketasks( s , 1 ); - // printf( "space_rebuild: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); - } @@ -645,44 +453,6 @@ void space_map_clearsort ( struct cell *c , void *data ) { } -/** - * @brief Mapping function to append a ghost task to each cell. - * - * Looks for the super cell, e.g. the highest-level cell above each - * cell for which a pair is defined. All ghosts below this cell will - * depend on the ghost of their parents (sounds spooky, but it isn't). - * - * A kick2-task is appended to each super cell. - */ - -void space_map_mkghosts ( struct cell *c , void *data ) { - - struct space *s = (struct space *)data; - struct cell *finger; - - /* Find the super cell, i.e. the highest cell hierarchically above - this one to still have at least one task associated with it. */ - c->super = c; - for ( finger = c->parent ; finger != NULL ; finger = finger->parent ) - if ( finger->nr_tasks > 0 ) - c->super = finger; - - /* Make the ghost task */ - if ( c->super != c || c->nr_tasks > 0 ) - c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 ); - - /* Append a kick task if we are the active super cell. */ - if ( c->super == c && c->nr_tasks > 0 ) - c->kick2 = space_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 ); - - /* If we are not the super cell ourselves, make our ghost depend - on our parent cell. */ - if ( c->super != c ) - task_addunlock( c->parent->ghost , c->ghost ); - - } - - /** * @brief Map a function to all particles in a aspace. * @@ -812,530 +582,6 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell } -/** - * @brief Add a #task to the #space. - * - * @param s The #space we are working in. - * @param type The type of the task. - * @param subtype The sub-type of the task. - * @param flags The flags of the task. - * @param wait - * @param ci The first cell to interact. - * @param cj The second cell to interact. - * @param tight - */ - -struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ) { - - int ind; - struct task *t; - - /* Get the next free task. */ - ind = atomic_inc( &s->tasks_next ); - t = &s->tasks[ ind ]; - - /* Copy the data. */ - t->type = type; - t->subtype = subtype; - t->flags = flags; - t->wait = wait; - t->ci = ci; - t->cj = cj; - t->skip = 0; - t->tight = tight; - t->nr_unlock_tasks = 0; - - /* Init the lock. */ - lock_init( &t->lock ); - - /* Add an index for it. */ - // lock_lock( &s->lock ); - s->tasks_ind[ atomic_inc( &s->nr_tasks ) ] = ind; - // lock_unlock_blind( &s->lock ); - - /* Return a pointer to the new task. */ - return t; - - } - - - -/** - * @brief Split tasks that may be too large. - * - * @param s The #space we are working in. - */ - -void space_splittasks ( struct space *s ) { - - int j, k, ind, sid, tid = 0, redo; - struct cell *ci, *cj; - double hi, hj, shift[3]; - struct task *t, *t_old; - // float dt_step = s->dt_step; - int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } , - { -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } , - { -1 , -1 , -1 , 12 , 7 , 6 , 4 , 3 } , - { -1 , -1 , -1 , -1 , 8 , 7 , 5 , 4 } , - { -1 , -1 , -1 , -1 , -1 , 12 , 10 , 9 } , - { -1 , -1 , -1 , -1 , -1 , -1 , 11 , 10 } , - { -1 , -1 , -1 , -1 , -1 , -1 , -1 , 12 } }; - - /* Loop through the tasks... */ - // #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift) - { - redo = 0; t_old = t = NULL; - while ( 1 ) { - - /* Get a pointer on the task. */ - if ( redo ) { - redo = 0; - t = t_old; - } - else { - if ( ( ind = atomic_inc( &tid ) ) < s->nr_tasks ) - t_old = t = &s->tasks[ s->tasks_ind[ ind ] ]; - else - break; - } - - /* Empty task? */ - if ( t->ci == NULL || ( t->type == task_type_pair && t->cj == NULL ) ) { - t->type = task_type_none; - t->skip = 1; - continue; - } - - /* Self-interaction? */ - if ( t->type == task_type_self ) { - - /* Get a handle on the cell involved. */ - ci = t->ci; - - /* Ingore this task? */ - /* if ( ci->dt_min > dt_step ) { - t->skip = 1; - continue; - } */ - - /* Is this cell even split? */ - if ( ci->split ) { - - /* Make a sub? */ - if ( space_dosub && ci->count < space_subsize && ci->maxdepth - ci->depth < space_maxsubdepth ) { - - /* convert to a self-subtask. */ - t->type = task_type_sub; - - } - - /* Otherwise, make tasks explicitly. */ - else { - - /* Take a step back (we're going to recycle the current task)... */ - redo = 1; - - /* Add the self taks. */ - for ( k = 0 ; ci->progeny[k] == NULL ; k++ ); - t->ci = ci->progeny[k]; - for ( k += 1 ; k < 8 ; k++ ) - if ( ci->progeny[k] != NULL ) - space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , 0 ); - - /* Make a task for each pair of progeny. */ - for ( j = 0 ; j < 8 ; j++ ) - if ( ci->progeny[j] != NULL ) - for ( k = j + 1 ; k < 8 ; k++ ) - if ( ci->progeny[k] != NULL ) - space_addtask( s , task_type_pair , task_subtype_density , pts[j][k] , 0 , ci->progeny[j] , ci->progeny[k] , 0 ); - } - - } - - } - - /* Pair interaction? */ - else if ( t->type == task_type_pair ) { - - /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; - hi = ci->dmin; - hj = cj->dmin; - - /* Ingore this task? */ - /* if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) { - t->skip = 1; - continue; - } */ - - /* Get the sort ID, use space_getsid and not t->flags - to make sure we get ci and cj swapped if needed. */ - sid = space_getsid( s , &ci , &cj , shift ); - - /* Should this task be split-up? */ - if ( ci->split && cj->split && - ci->h_max*kernel_gamma*space_stretch < hi/2 && - cj->h_max*kernel_gamma*space_stretch < hj/2 ) { - - /* Replace by a single sub-task? */ - if ( space_dosub && - ci->count < space_subsize && cj->count < space_subsize && - ci->maxdepth - ci->depth < space_maxsubdepth && cj->maxdepth - cj->depth < space_maxsubdepth && - sid != 0 && sid != 2 && sid != 6 && sid != 8 ) { - - /* Make this task a sub task. */ - t->type = task_type_sub; - - } - - /* Otherwise, split it. */ - else { - - /* Take a step back (we're going to recycle the current task)... */ - redo = 1; - - /* For each different sorting type... */ - switch ( sid ) { - - case 0: /* ( 1 , 1 , 1 ) */ - t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; t->flags = 0; - break; - - case 1: /* ( 1 , 1 , 0 ) */ - t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[7] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); - break; - - case 2: /* ( 1 , 1 , -1 ) */ - t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; t->tight = 1; - break; - - case 3: /* ( 1 , 0 , 1 ) */ - t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[7] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); - break; - - case 4: /* ( 1 , 0 , 0 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[4] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[5] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[4] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[6] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[5] , cj->progeny[3] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[6] , cj->progeny[3] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[7] , cj->progeny[3] , 1 ); - break; - - case 5: /* ( 1 , 0 , -1 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[6] , cj->progeny[3] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[4] , cj->progeny[3] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); - break; - - case 6: /* ( 1 , -1 , 1 ) */ - t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; t->tight = 1; - break; - - case 7: /* ( 1 , -1 , 0 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[4] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[5] , cj->progeny[3] , 1 ); - break; - - case 8: /* ( 1 , -1 , -1 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; t->tight = 1; - break; - - case 9: /* ( 0 , 1 , 1 ) */ - t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[7] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); - break; - - case 10: /* ( 0 , 1 , 0 ) */ - t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[2] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[3] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[2] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[3] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[6] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[5] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[3] , cj->progeny[5] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[6] , cj->progeny[5] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[7] , cj->progeny[5] , 1 ); - break; - - case 11: /* ( 0 , 1 , -1 ) */ - t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[6] , cj->progeny[5] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[2] , cj->progeny[5] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 ); - break; - - case 12: /* ( 0 , 0 , 1 ) */ - t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; t->tight = 1; - t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[1] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[3] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[1] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[3] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[5] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[6] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[3] , cj->progeny[6] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[5] , cj->progeny[6] , 1 ); - t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[7] , cj->progeny[6] , 1 ); - break; - - } - - } - - } /* split this task? */ - - /* Otherwise, if not spilt, stitch-up the sorting. */ - else { - - /* Create the sort for ci. */ - // lock_lock( &ci->lock ); - if ( ci->sorts == NULL ) - ci->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 ); - ci->sorts->flags |= (1 << sid); - // lock_unlock_blind( &ci->lock ); - task_addunlock( ci->sorts , t ); - - /* Create the sort for cj. */ - // lock_lock( &cj->lock ); - if ( cj->sorts == NULL ) - cj->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , 0 ); - cj->sorts->flags |= (1 << sid); - // lock_unlock_blind( &cj->lock ); - task_addunlock( cj->sorts , t ); - - } - - } /* pair interaction? */ - - } /* loop over all tasks. */ - - } - - } - - -/** - * @brief Fill the #space's task list. - * - * @param s The #space we are working in. - * @param do_sort Flag to add sorting tasks to the list. - */ - -void space_maketasks ( struct space *s , int do_sort ) { - - int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid; - int *cdim = s->cdim; - struct task *t, *t2; - struct cell *ci, *cj; - - /* Allocate the task-list, if needed. */ - if ( s->tasks == NULL || s->tasks_size < s->tot_cells * space_maxtaskspercell ) { - if ( s->tasks != NULL ) - free( s->tasks ); - if ( s->tasks_ind != NULL ) - free( s->tasks_ind ); - s->tasks_size = s->tot_cells * space_maxtaskspercell; - if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tasks_size ) != 0 ) - error( "Failed to allocate task list." ); - if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->tasks_size ) ) == NULL ) - error( "Failed to allocate task indices." ); - } - s->nr_tasks = 0; - s->tasks_next = 0; - - /* Run through the highest level of cells and add pairs. */ - for ( i = 0 ; i < cdim[0] ; i++ ) - for ( j = 0 ; j < cdim[1] ; j++ ) - for ( k = 0 ; k < cdim[2] ; k++ ) { - cid = cell_getid( cdim , i , j , k ); - if ( s->cells[cid].count == 0 ) - continue; - ci = &s->cells[cid]; - if ( ci->count == 0 ) - continue; - space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , 0 ); - for ( ii = -1 ; ii < 2 ; ii++ ) { - iii = i + ii; - if ( !s->periodic && ( iii < 0 || iii >= cdim[0] ) ) - continue; - iii = ( iii + cdim[0] ) % cdim[0]; - for ( jj = -1 ; jj < 2 ; jj++ ) { - jjj = j + jj; - if ( !s->periodic && ( jjj < 0 || jjj >= cdim[1] ) ) - continue; - jjj = ( jjj + cdim[1] ) % cdim[1]; - for ( kk = -1 ; kk < 2 ; kk++ ) { - kkk = k + kk; - if ( !s->periodic && ( kkk < 0 || kkk >= cdim[2] ) ) - continue; - kkk = ( kkk + cdim[2] ) % cdim[2]; - cjd = cell_getid( cdim , iii , jjj , kkk ); - cj = &s->cells[cjd]; - if ( cid >= cjd || cj->count == 0 ) - continue; - sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ]; - t = space_addtask( s , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 ); - } - } - } - } - - /* Split the tasks. */ - space_splittasks( s ); - - /* Count the number of tasks associated with each cell and - store the density tasks in each cell, and make each sort - depend on the sorts of its progeny. */ - // #pragma omp parallel for private(t,j) - for ( k = 0 ; k < s->nr_tasks ; k++ ) { - t = &s->tasks[k]; - if ( t->skip ) - continue; - if ( t->type == task_type_sort && t->ci->split ) - for ( j = 0 ; j < 8 ; j++ ) { - if ( t->ci->progeny[j] == NULL ) - continue; - if ( t->ci->progeny[j]->sorts == NULL ) - t->ci->progeny[j]->sorts = space_addtask( s , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 ); - t->ci->progeny[j]->sorts->skip = 0; - task_addunlock( t->ci->progeny[j]->sorts , t ); - } - if ( t->type == task_type_self ) { - atomic_inc( &t->ci->nr_tasks ); - if ( t->subtype == task_subtype_density ) { - t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t; - } - } - else if ( t->type == task_type_pair ) { - atomic_inc( &t->ci->nr_tasks ); - atomic_inc( &t->cj->nr_tasks ); - if ( t->subtype == task_subtype_density ) { - t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t; - t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t; - } - } - else if ( t->type == task_type_sub ) { - atomic_inc( &t->ci->nr_tasks ); - if ( t->cj != NULL ) - atomic_inc( &t->cj->nr_tasks ); - if ( t->subtype == task_subtype_density ) { - t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t; - if ( t->cj != NULL ) - t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t; - } - } - } - - /* Append a ghost task to each cell. */ - space_map_cells_pre( s , 1 , &space_map_mkghosts , s ); - - /* Run through the tasks and make force tasks for each density task. - Each force task depends on the cell ghosts and unlocks the kick2 task - of its super-cell. */ - kk = s->nr_tasks; - // #pragma omp parallel for private(t,t2) - for ( k = 0 ; k < kk ; k++ ) { - - /* Get a pointer to the task. */ - t = &s->tasks[k]; - - /* Skip? */ - if ( t->skip ) - continue; - - /* Self-interaction? */ - if ( t->type == task_type_self && t->subtype == task_subtype_density ) { - task_addunlock( t , t->ci->super->ghost ); - t2 = space_addtask( s , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , 0 ); - task_addunlock( t->ci->ghost , t2 ); - task_addunlock( t2 , t->ci->super->kick2 ); - } - - /* Otherwise, pair interaction? */ - else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) { - task_addunlock( t , t->ci->super->ghost ); - if ( t->ci->super != t->cj->super ) - task_addunlock( t , t->cj->super->ghost ); - t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 ); - task_addunlock( t->ci->ghost , t2 ); - task_addunlock( t->cj->ghost , t2 ); - task_addunlock( t2 , t->ci->super->kick2 ); - if ( t->ci->super != t->cj->super ) - task_addunlock( t2 , t->cj->super->kick2 ); - } - - /* Otherwise, sub interaction? */ - else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) { - task_addunlock( t , t->ci->super->ghost ); - if ( t->cj != NULL && t->ci->super != t->cj->super ) - task_addunlock( t , t->cj->super->ghost ); - t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , 0 ); - task_addunlock( t->ci->ghost , t2 ); - if ( t->cj != NULL ) - task_addunlock( t->cj->ghost , t2 ); - task_addunlock( t2 , t->ci->super->kick2 ); - if ( t->cj != NULL && t->ci->super != t->cj->super ) - task_addunlock( t2 , t->cj->super->kick2 ); - } - - } - - /* Rank the tasks. */ - space_ranktasks( s ); - - /* Count the number of each task type. */ - int counts[ task_type_count+1 ]; - for ( k = 0 ; k <= task_type_count ; k++ ) - counts[k] = 0; - for ( k = 0 ; k < s->nr_tasks ; k++ ) - if ( !s->tasks[k].skip ) - counts[ (int)s->tasks[k].type ] += 1; - else - counts[ task_type_count ] += 1; - printf( "space_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] ); - for ( k = 1 ; k < task_type_count ; k++ ) - printf( " %s=%i" , taskID_names[k] , counts[k] ); - printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); - - } - - - /** * @brief Split cells that contain too many particles. * @@ -1428,6 +674,7 @@ void space_split ( struct space *s , struct cell *c ) { xp->x_old[1] = x[1] = p->x[1]; xp->x_old[2] = x[2] = p->x[2]; dt = p->dt; + h = p->h; if ( h > h_max ) h_max = h; if ( dt < dt_min ) diff --git a/src/space.h b/src/space.h index 0e1b504710c70a0e4796880ba89582991582ba17..6e31cd4397da07c01100e3e1766e33161b76bc4b 100644 --- a/src/space.h +++ b/src/space.h @@ -26,8 +26,6 @@ #define space_splitratio 0.875f #define space_splitsize_default 400 #define space_subsize_default 5000 -#define space_maxsubdepth 3 -#define space_dosub 1 #define space_stretch 1.05f #define space_maxtaskspercell 31 #define space_maxreldx 0.2f @@ -92,12 +90,6 @@ struct space { /* Is the space periodic? */ int periodic; - /* The list of tasks. */ - struct task *tasks; - int nr_tasks, tasks_next; - int tasks_size; - int *tasks_ind; - /* General-purpose lock for this space. */ lock_type lock; @@ -105,22 +97,14 @@ struct space { /* function prototypes. */ -void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struct cell *cj , int sid ); void parts_sort ( struct part *parts , int *ind , int N , int min , int max ); struct cell *space_getcell ( struct space *s ); -struct task *space_gettask ( struct space *s ); -struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ); int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift ); void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int periodic , double h_max ); -void space_maketasks ( struct space *s , int do_sort ); void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data ); void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct cell *c , void *data ) , void *data ); void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data ); -int space_prepare ( struct space *s ); -void space_ranktasks ( struct space *s ); void space_rebuild ( struct space *s , double h_max ); void space_recycle ( struct space *s , struct cell *c ); void space_split ( struct space *s , struct cell *c ); - - diff --git a/src/swift.h b/src/swift.h index e30c1b151709480cefab175e48fa996c30bb1d4b..bb310cbd6924ed78b63c990fb7bf3907959d8354 100644 --- a/src/swift.h +++ b/src/swift.h @@ -27,6 +27,7 @@ #include "atomic.h" #include "lock.h" #include "task.h" +#include "scheduler.h" #include "part.h" #include "cell.h" #include "space.h"