diff --git a/src/cell.c b/src/cell.c index 356e4769e120bbf395964ad7a3d0ba8cea6a923e..86ff7b2ee78f001986b375dcccb5e4920a4e89cf 100644 --- a/src/cell.c +++ b/src/cell.c @@ -47,7 +47,9 @@ ticks cell_timer[ cell_timer_count ]; /* Define the timer macros. */ #ifdef TIMER_VERBOSE - #define TIMER + #ifndef TIMER + #define TIMER + #endif #endif #ifdef TIMER #define TIMER_TIC ticks tic = getticks(); @@ -61,7 +63,7 @@ ticks cell_timer[ cell_timer_count ]; # define INLINE inline # endif #endif - INLINE ticks timer_toc ( int t , ticks tic ) { + INLINE static ticks timer_toc ( int t , ticks tic ) { ticks d = (getticks() - tic); __sync_add_and_fetch( &cell_timer[t] , d ); return d; diff --git a/src/cell.h b/src/cell.h index 27c4242ffdeac5312863d561f58daa8ead16d93b..f9baf21d11d8dd8211af78fc66ec9c6f6ead2a9e 100644 --- a/src/cell.h +++ b/src/cell.h @@ -58,7 +58,7 @@ struct cell { struct entry *sort; /* Number of pairs associated with this cell. */ - int nr_pairs; + // int nr_pairs; /* Pointers to the next level of cells. */ struct cell *progeny[8]; diff --git a/src/cycle.h b/src/cycle.h index e357a017c79a6e9befb73e2b988bd23918b66f37..16f57e7e1ef942d2736f4328be9117b2deab6d6e 100644 --- a/src/cycle.h +++ b/src/cycle.h @@ -178,7 +178,7 @@ typedef unsigned long long ticks; # define INLINE inline # endif #endif -INLINE ticks getticks(void) +INLINE static ticks getticks(void) { ticks ret; @@ -234,7 +234,7 @@ typedef unsigned long long ticks; # define INLINE inline # endif #endif -INLINE ticks getticks(void) +INLINE static ticks getticks(void) { unsigned a, d; asm volatile("rdtsc" : "=a" (a), "=d" (d)); diff --git a/src/engine.c b/src/engine.c index 53b62c81e16d953f144e7806c72c8c625e776452..1f93b1ab9884fd6834cacb60f00fcdce04f5a422 100644 --- a/src/engine.c +++ b/src/engine.c @@ -129,15 +129,16 @@ void engine_ranktasks ( struct engine *e ) { struct task *t; struct space *s = e->s; int *tid = s->tasks_ind; - + /* Run throught the tasks and get all the waits right. */ for ( k = 0 ; k < s->nr_tasks ; k++ ) { + tid[k] = k; for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ ) s->tasks[k].unlock_tasks[j]->wait += 1; } /* Main loop. */ - for ( rank = 0 ; left < s->nr_tasks ; rank++ ) { + for ( j = 0 , rank = 0 ; left < s->nr_tasks ; rank++ ) { /* Load the tids of tasks with no waits. */ for ( k = left ; k < s->nr_tasks ; k++ ) @@ -308,8 +309,8 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_ } /* Sort the queues topologically. */ - for ( k = 0 ; k < nr_queues ; k++ ) - queue_sort( &e->queues[k] ); + // for ( k = 0 ; k < nr_queues ; k++ ) + // queue_sort( &e->queues[k] ); /* Allocate and init the threads. */ if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL ) diff --git a/src/lock.h b/src/lock.h index dc364177e1057583724e2268ee6d8c1093b77ee2..9ff386119e720b95f1da4a36b7bab5ffc2eb909f 100644 --- a/src/lock.h +++ b/src/lock.h @@ -40,7 +40,7 @@ #define lock_type volatile int #define lock_init( l ) ( *l = 0 ) #define lock_destroy( l ) 0 - INLINE int lock_lock ( volatile int *l ) { + INLINE static int lock_lock ( volatile int *l ) { while ( __sync_val_compare_and_swap( l , 0 , 1 ) != 0 ) while( *l ); return 0; diff --git a/src/queue.c b/src/queue.c index 272582519a7e08dc21ba45887c25c8d98cca8bdd..ca248da6f74872f27905a2f01023c954c04241f6 100644 --- a/src/queue.c +++ b/src/queue.c @@ -43,7 +43,9 @@ /* Define the timer macros. */ #ifdef TIMER_VERBOSE - #define TIMER + #ifndef TIMER + #define TIMER + #endif #endif #ifdef TIMER #define TIMER_TIC ticks tic = getticks(); @@ -57,7 +59,7 @@ # define INLINE inline # endif #endif - INLINE ticks timer_toc ( int t , ticks tic ) { + INLINE static ticks timer_toc ( int t , ticks tic ) { ticks d = (getticks() - tic); __sync_add_and_fetch( &queue_timer[t] , d ); return d; @@ -445,6 +447,8 @@ void queue_sort ( struct queue *q ) { int *weight, *wait; int *data = q->tid; struct task *t; + + printf( "queue_sort: sorting queue with %i tasks.\n" , q->count ); /* Allocate and pre-compute each task's weight. */ if ( ( weight = (int *)alloca( sizeof(int) * q->count ) ) == NULL || diff --git a/src/runner.c b/src/runner.c index 21c4135f44f2b394f248c8e5aae875c993cc33e7..f79ae2d8d408b9eee017f56763cc831291f35e77 100644 --- a/src/runner.c +++ b/src/runner.c @@ -177,14 +177,8 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { int i, ind, off[8], inds[8], temp_i; // float shift[3]; float buff[8], px[3]; - struct cell *temp_c; TIMER_TIC - /* Does this cell even need to be sorted? */ - for ( temp_c = c ; temp_c != NULL && temp_c->nr_pairs == 0 ; temp_c = temp_c->parent ); - if ( temp_c == NULL ) - return; - /* start by allocating the entry arrays. */ if ( lock_lock( &c->lock ) != 0 ) error( "Failed to lock cell." ); @@ -204,6 +198,11 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { if ( !( flags & (1 << j) ) ) continue; + /* Sort any un-sorted progeny. */ + for ( k = 0 ; k < 8 ; k++ ) + if ( c->progeny[k] != NULL && ( c->progeny[k]->sorts[0] == NULL || !(c->progeny[k]->sorts[0]->flags & (1 << j)) ) ) + runner_dosort( r , c->progeny[k] , 1 << j ); + /* Init the particle index offsets. */ for ( off[0] = 0 , k = 1 ; k < 8 ; k++ ) if ( c->progeny[k-1] != NULL ) @@ -298,7 +297,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { error( "Sorting failed, indices borked." ); } } */ - + #ifdef TIMER_VERBOSE printf( "runner_dosort[%02i]: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) took %.3f ms.\n" , r->id , c->count , c->depth , @@ -335,7 +334,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) { runner_doghost( r , c->progeny[k] ); return; } - + /* Init the IDs that have to be updated. */ if ( ( pid = (int *)alloca( sizeof(int) * count ) ) == NULL ) error( "Call to alloca failed." ); diff --git a/src/runner.h b/src/runner.h index 9763a2ae50351427aabe8c01c28ae37d47a6b074..4bb83a460fe58c1aed945f22758c30bd8e5a516c 100644 --- a/src/runner.h +++ b/src/runner.h @@ -39,9 +39,15 @@ enum { extern ticks runner_timer[ runner_timer_count ]; +/* SID stuff. */ +extern const char runner_flip[]; + + /* Define the timer macros. */ #ifdef TIMER_VERBOSE - #define TIMER + #ifndef TIMER + #define TIMER + #endif #endif #ifdef TIMER #define TIMER_TIC ticks tic = getticks(); @@ -55,7 +61,7 @@ extern ticks runner_timer[ runner_timer_count ]; # define INLINE inline # endif #endif - INLINE ticks timer_toc ( int t , ticks tic ) { + INLINE static ticks timer_toc ( int t , ticks tic ) { ticks d = (getticks() - tic); __sync_add_and_fetch( &runner_timer[t] , d ); return d; diff --git a/src/runner_doiact.h b/src/runner_doiact.h index fd4dd880116c06a225311df865775f4b765dcbc5..7372c23277c5df7d021dd11ee5c22592b139365d 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -498,7 +498,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * #endif #ifdef TIMER_VERBOSE - printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); + printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); #else TIMER_TOC(runner_timer_dopair_subset); #endif @@ -607,7 +607,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * #endif #ifdef TIMER_VERBOSE - printf( "runner_doself_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); + printf( "runner_doself_subset[%02i]: %i/%i parts at depth %i took %.3f ms.\n" , r->id , count , ci->count , ci->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 ); #else TIMER_TOC(runner_timer_dopair_subset); #endif @@ -624,12 +624,11 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * * @param cj The second #cell. */ -void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) { +void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { struct engine *restrict e = r->e; int pid, pjd, k, sid; double rshift, shift[3] = { 0.0 , 0.0 , 0.0 }; - struct cell *temp; struct entry *restrict sort_i, *restrict sort_j; struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; struct cpart *restrict cpi, *restrict cparts_i; @@ -653,26 +652,9 @@ void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restri /* Anything to do here? */ if ( ci->dt_min > dt_max && cj->dt_min > dt_max ) return; - - /* Get the relative distance between the pairs, wrapping. */ - for ( k = 0 ; k < 3 ; k++ ) { - if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 ) - shift[k] = e->s->dim[k]; - else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 ) - shift[k] = -e->s->dim[k]; - } - /* Get the sorting index. */ - for ( sid = 0 , k = 0 ; k < 3 ; k++ ) - sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 ); - - /* Switch the cells around? */ - if ( runner_flip[sid] ) { - temp = ci; ci = cj; cj = temp; - for ( k = 0 ; k < 3 ; k++ ) - shift[k] = -shift[k]; - } - sid = sortlistID[sid]; + /* Get the sort ID. */ + sid = space_getsid( e->s , &ci , &cj , shift ); /* Get the cutoff shift. */ for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ ) @@ -709,7 +691,8 @@ void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restri /* if ( ci->split && cj->split && sid == 4 ) printf( "boing!\n" ); */ - + + /* Loop over the parts in ci. */ for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) { @@ -857,12 +840,11 @@ void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restri } -void DOPAIR2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) { +void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { struct engine *restrict e = r->e; int pid, pjd, k, sid; double rshift, shift[3] = { 0.0 , 0.0 , 0.0 }; - struct cell *temp; struct entry *restrict sort_i, *restrict sort_j; struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL; int countdt_i = 0, countdt_j = 0; @@ -888,26 +870,9 @@ void DOPAIR2 ( struct runner *r , struct cell *restrict ci , struct cell *restri /* Anything to do here? */ if ( ci->dt_min > dt_max && cj->dt_min > dt_max ) return; - - /* Get the relative distance between the pairs, wrapping. */ - for ( k = 0 ; k < 3 ; k++ ) { - if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 ) - shift[k] = e->s->dim[k]; - else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 ) - shift[k] = -e->s->dim[k]; - } - /* Get the sorting index. */ - for ( sid = 0 , k = 0 ; k < 3 ; k++ ) - sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 ); - - /* Switch the cells around? */ - if ( runner_flip[sid] ) { - temp = ci; ci = cj; cj = temp; - for ( k = 0 ; k < 3 ; k++ ) - shift[k] = -shift[k]; - } - sid = sortlistID[sid]; + /* Get the shift ID. */ + sid = space_getsid( e->s , &ci , &cj , shift ); /* Get the cutoff shift. */ for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ ) @@ -1262,7 +1227,7 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) { pi = &parts[pid]; if ( cpi->dt > dt_max ) continue; - + /* Get the particle position and radius. */ for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k]; @@ -1512,9 +1477,9 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) { * redundant computations to find the sid on-the-fly. */ -void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) { +void DOSUB1 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) { - int j, k; + int j = 0, k; double shift[3]; float h; struct space *s = r->e->s; @@ -1551,31 +1516,9 @@ void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restric h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) ); /* Get the type of pair if not specified explicitly. */ - if ( sid < 0 ) { - - /* Get the relative distance between the pairs, wrapping. */ - for ( k = 0 ; k < 3 ; k++ ) { - if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 ) - shift[k] = s->dim[k]; - else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 ) - shift[k] = -s->dim[k]; - else - shift[k] = 0.0; - } - - /* Get the sorting index. */ - for ( sid = 0 , k = 0 ; k < 3 ; k++ ) - sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 ); - - /* Flip? */ - if ( sid < 13 ) { - struct cell *temp = cj; cj = ci; ci = temp; - } - else - sid = 26 - sid; - - } - + // if ( sid < 0 ) + sid = space_getsid( s , &ci , &cj , shift ); + /* Recurse? */ if ( ci->split && cj->split && ci->h_max*2 < h && cj->h_max*2 < h ) { @@ -1787,7 +1730,7 @@ void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restric #ifdef TIMER_VERBOSE - printf( "runner_DOSUB[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); + printf( "runner_dosub1[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , sid , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); #else TIMER_TOC(TIMER_DOSUB); #endif @@ -1795,7 +1738,7 @@ void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restric } -void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) { +void DOSUB2 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) { int j, k; double shift[3]; @@ -1834,30 +1777,8 @@ void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restric h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) ); /* Get the type of pair if not specified explicitly. */ - if ( sid < 0 ) { - - /* Get the relative distance between the pairs, wrapping. */ - for ( k = 0 ; k < 3 ; k++ ) { - if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 ) - shift[k] = s->dim[k]; - else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 ) - shift[k] = -s->dim[k]; - else - shift[k] = 0.0; - } - - /* Get the sorting index. */ - for ( sid = 0 , k = 0 ; k < 3 ; k++ ) - sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 ); - - /* Flip? */ - if ( sid < 13 ) { - struct cell *temp = cj; cj = ci; ci = temp; - } - else - sid = 26 - sid; - - } + if ( sid < 0 ) + sid = space_getsid( s , &ci , &cj , shift ); /* Recurse? */ if ( ci->split && cj->split && @@ -2070,7 +1991,7 @@ void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restric #ifdef TIMER_VERBOSE - printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); + printf( "runner_dosub2[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , sid , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); #else TIMER_TOC(TIMER_DOSUB); #endif @@ -2518,7 +2439,7 @@ void DOSUB_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *p #ifdef TIMER_VERBOSE - printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); + printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , sid , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); #else TIMER_TOC(TIMER_DOSUB); #endif diff --git a/src/space.c b/src/space.c index 51eec54bf80d70041423e317ad6de5d2530e2968..858fc7313f4ce15189b6f49390b61949d4e261ac 100644 --- a/src/space.c +++ b/src/space.c @@ -77,6 +77,54 @@ const int sortlistID[27] = { }; +/** + * @brief Get the shift-id of the given pair of cells, swapping them + * if need be. + * + * @param s The space + * @param ci Pointer to first #cell. + * @param cj Pointer second #cell. + * @param shift Vector from ci to cj. + * + * @return The shift ID and set shift, may or may not swap ci and cj. + */ + +int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift ) { + + int k, sid = 0; + struct cell *temp; + double dx[3]; + + /* Get the relative distance between the pairs, wrapping. */ + for ( k = 0 ; k < 3 ; k++ ) { + dx[k] = (*cj)->loc[k] - (*ci)->loc[k]; + if ( dx[k] < -s->dim[k]/2 ) + shift[k] = s->dim[k]; + else if ( dx[k] > s->dim[k]/2 ) + shift[k] = -s->dim[k]; + else + shift[k] = 0.0; + dx[k] += shift[k]; + } + + /* Get the sorting index. */ + for ( k = 0 ; k < 3 ; k++ ) + sid = 3*sid + ( (dx[k] < 0.0) ? 0 : ( (dx[k] > 0.0) ? 2 : 1 ) ); + + /* Switch the cells around? */ + if ( runner_flip[sid] ) { + temp = *ci; *ci = *cj; *cj = temp; + for ( k = 0 ; k < 3 ; k++ ) + shift[k] = -shift[k]; + } + sid = sortlistID[sid]; + + /* Return the sort ID. */ + return sid; + + } + + /** * @breif Recursively dismantle a cell tree. * @@ -613,6 +661,7 @@ void space_splittasks ( struct space *s ) { struct cell *ci, *cj; double hi, hj, shift[3]; struct task *t; + float dt_max = s->dt_max; int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } , { -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } , { -1 , -1 , -1 , 12 , 7 , 6 , 4 , 3 } , @@ -633,6 +682,12 @@ void space_splittasks ( struct space *s ) { /* Get a handle on the cell involved. */ ci = t->ci; + /* Ingore this task? */ + if ( ci->dt_min > dt_max ) { + t->type = task_type_none; + continue; + } + /* Is this cell even split? */ if ( !ci->split ) continue; @@ -645,9 +700,7 @@ void space_splittasks ( struct space *s ) { /* Wait for this tasks sorts, as we will now have pairwise components in this sub. */ - for ( k = 0 ; k < 14 ; k++ ) - if ( k == 0 || ci->sorts[k] != ci->sorts[k-1] ) - task_addunlock( ci->sorts[k] , t ); + space_addsorts( s , t , ci , NULL , -1 ); } @@ -662,19 +715,14 @@ void space_splittasks ( struct space *s ) { t->ci = ci->progeny[k]; for ( k += 1 ; k < 8 ; k++ ) if ( ci->progeny[k] != NULL ) - t = space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , NULL , 0 , NULL , 0 ); + space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , NULL , 0 , NULL , 0 ); /* Make a task for each pair of progeny. */ for ( j = 0 ; j < 8 ; j++ ) - if ( ci->progeny[j] != NULL && ci->progeny[j]->count > 0 ) + if ( ci->progeny[j] != NULL ) for ( k = j + 1 ; k < 8 ; k++ ) - if ( ci->progeny[k] != NULL && ci->progeny[k]->count > 0 ) { - t = space_addtask( s , task_type_pair , task_subtype_density , 0 , 0 , ci->progeny[j] , ci->progeny[k] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[j]->sorts[ pts[j][k] ] , t ); - task_addunlock( ci->progeny[k]->sorts[ pts[j][k] ] , t ); - ci->progeny[k]->nr_pairs += 1; - ci->progeny[j]->nr_pairs += 1; - } + if ( ci->progeny[k] != NULL ) + space_addtask( s , task_type_pair , task_subtype_density , pts[j][k] , 0 , ci->progeny[j] , ci->progeny[k] , NULL , 0 , NULL , 0 ); } } @@ -688,33 +736,20 @@ void space_splittasks ( struct space *s ) { hi = fmax( ci->h[0] , fmax( ci->h[1] , ci->h[2] ) ); hj = fmax( cj->h[0] , fmax( cj->h[1] , cj->h[2] ) ); + /* Ingore this task? */ + if ( ci->dt_min > dt_max && cj->dt_min > dt_max ) { + t->type = task_type_none; + continue; + } + + /* Get the sort ID, use space_getsid and not t->flags + to make sure we get ci and cj swapped if needed. */ + sid = space_getsid( s , &ci , &cj , shift ); + /* Should this task be split-up? */ if ( ci->split && cj->split && ci->h_max*space_stretch < hi/2 && cj->h_max*space_stretch < hj/2 ) { - /* Get the relative distance between the pairs, wrapping. */ - for ( k = 0 ; k < 3 ; k++ ) { - if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 ) - shift[k] = s->dim[k]; - else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 ) - shift[k] = -s->dim[k]; - else - shift[k] = 0.0; - } - - /* Get the sorting index. */ - for ( sid = 0 , k = 0 ; k < 3 ; k++ ) - sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 ); - - /* Flip? */ - if ( sid < 13 ) { - cj = t->ci; - ci = t->cj; - t->ci = ci; t->cj = cj; - } - else - sid = 26 - sid; - /* Replace by a single sub-task? */ if ( space_dosub && ci->count < space_subsize && cj->count < space_subsize && @@ -723,12 +758,10 @@ void space_splittasks ( struct space *s ) { /* Make this task a sub task. */ t->type = task_type_sub; t->flags = sid; + t->ci = ci; t->cj = cj; - /* Make it depend on all the sorts of its two cells. */ - for ( k = 0 ; k < 14 ; k++ ) - task_addunlock( ci->sorts[k] , t ); - for ( k = 0 ; k < 14 ; k++ ) - task_addunlock( cj->sorts[k] , t ); + /* Create the sorts recursively. */ + space_addsorts( s , t , ci , cj , sid ); /* Don't go any further. */ continue; @@ -738,267 +771,145 @@ void space_splittasks ( struct space *s ) { /* Take a step back (we're going to recycle the current task)... */ tid -= 1; - /* Remove the dependency of this task on the sorts of ci and cj. */ - task_rmunlock( ci->sorts[sid] , t ); - task_rmunlock( cj->sorts[sid] , t ); - ci->nr_pairs -= 1; - cj->nr_pairs -= 1; - t->nr_unlock_cells = 0; - /* For each different sorting type... */ switch ( sid ) { case 0: /* ( 1 , 1 , 1 ) */ - t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[7]->sorts[0] , t ); task_addunlock( cj->progeny[0]->sorts[0] , t ); - ci->progeny[7]->nr_pairs += 1; - cj->progeny[0]->nr_pairs += 1; + t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; t->flags = 0; break; case 1: /* ( 1 , 1 , 0 ) */ - t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[6]->sorts[1] , t ); task_addunlock( cj->progeny[0]->sorts[1] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[1] , t ); task_addunlock( cj->progeny[1]->sorts[1] , t ); + t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; + t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[7] , cj->progeny[1] , NULL , 0 , NULL , 0 ); t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[0] , t ); task_addunlock( cj->progeny[1]->sorts[0] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[2] , t ); task_addunlock( cj->progeny[0]->sorts[2] , t ); - ci->progeny[6]->nr_pairs += 2; - ci->progeny[7]->nr_pairs += 2; - cj->progeny[0]->nr_pairs += 2; - cj->progeny[1]->nr_pairs += 2; + t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); break; case 2: /* ( 1 , 1 , -1 ) */ - t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; - task_addunlock( ci->progeny[6]->sorts[2] , t ); task_addunlock( cj->progeny[1]->sorts[2] , t ); - ci->progeny[6]->nr_pairs += 1; - cj->progeny[1]->nr_pairs += 1; + t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; break; case 3: /* ( 1 , 0 , 1 ) */ - t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[5]->sorts[3] , t ); task_addunlock( cj->progeny[0]->sorts[3] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[3] , t ); task_addunlock( cj->progeny[2]->sorts[3] , t ); + t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; + t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[7] , cj->progeny[2] , NULL , 0 , NULL , 0 ); t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[0] , t ); task_addunlock( cj->progeny[2]->sorts[0] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[6] , t ); task_addunlock( cj->progeny[0]->sorts[6] , t ); - ci->progeny[5]->nr_pairs += 2; - ci->progeny[7]->nr_pairs += 2; - cj->progeny[0]->nr_pairs += 2; - cj->progeny[2]->nr_pairs += 2; + t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); break; case 4: /* ( 1 , 0 , 0 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[4]->sorts[4] , t ); task_addunlock( cj->progeny[0]->sorts[4] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[5] , t ); task_addunlock( cj->progeny[0]->sorts[5] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[7] , t ); task_addunlock( cj->progeny[0]->sorts[7] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[8] , t ); task_addunlock( cj->progeny[0]->sorts[8] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[4]->sorts[3] , t ); task_addunlock( cj->progeny[1]->sorts[3] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[4] , t ); task_addunlock( cj->progeny[1]->sorts[4] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[6] , t ); task_addunlock( cj->progeny[1]->sorts[6] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[7] , t ); task_addunlock( cj->progeny[1]->sorts[7] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[4]->sorts[1] , t ); task_addunlock( cj->progeny[2]->sorts[1] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[2] , t ); task_addunlock( cj->progeny[2]->sorts[2] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[4] , t ); task_addunlock( cj->progeny[2]->sorts[4] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[5] , t ); task_addunlock( cj->progeny[2]->sorts[5] , t ); + t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; + t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[4] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[5] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[4] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[6] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , NULL , 0 , NULL , 0 ); t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[4]->sorts[0] , t ); task_addunlock( cj->progeny[3]->sorts[0] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[1] , t ); task_addunlock( cj->progeny[3]->sorts[1] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[3] , t ); task_addunlock( cj->progeny[3]->sorts[3] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[4] , t ); task_addunlock( cj->progeny[3]->sorts[4] , t ); - ci->progeny[4]->nr_pairs += 4; - ci->progeny[5]->nr_pairs += 4; - ci->progeny[6]->nr_pairs += 4; - ci->progeny[7]->nr_pairs += 4; - cj->progeny[0]->nr_pairs += 4; - cj->progeny[1]->nr_pairs += 4; - cj->progeny[2]->nr_pairs += 4; - cj->progeny[3]->nr_pairs += 4; + t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[5] , cj->progeny[3] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[6] , cj->progeny[3] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[7] , cj->progeny[3] , NULL , 0 , NULL , 0 ); break; case 5: /* ( 1 , 0 , -1 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; - task_addunlock( ci->progeny[4]->sorts[5] , t ); task_addunlock( cj->progeny[1]->sorts[5] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[5] , t ); task_addunlock( cj->progeny[3]->sorts[5] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[4]->sorts[2] , t ); task_addunlock( cj->progeny[3]->sorts[2] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[8] , t ); task_addunlock( cj->progeny[1]->sorts[8] , t ); - ci->progeny[4]->nr_pairs += 2; - ci->progeny[6]->nr_pairs += 2; - cj->progeny[1]->nr_pairs += 2; - cj->progeny[3]->nr_pairs += 2; + t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; + t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[6] , cj->progeny[3] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[4] , cj->progeny[3] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); break; case 6: /* ( 1 , -1 , 1 ) */ - t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; - task_addunlock( ci->progeny[5]->sorts[6] , t ); task_addunlock( cj->progeny[2]->sorts[6] , t ); - ci->progeny[5]->nr_pairs += 1; - cj->progeny[2]->nr_pairs += 1; + t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; break; case 7: /* ( 1 , -1 , 0 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; - task_addunlock( ci->progeny[4]->sorts[6] , t ); task_addunlock( cj->progeny[3]->sorts[6] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[8] , t ); task_addunlock( cj->progeny[2]->sorts[8] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[4]->sorts[7] , t ); task_addunlock( cj->progeny[2]->sorts[7] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[3] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[7] , t ); task_addunlock( cj->progeny[3]->sorts[7] , t ); - ci->progeny[4]->nr_pairs += 2; - ci->progeny[5]->nr_pairs += 2; - cj->progeny[2]->nr_pairs += 2; - cj->progeny[3]->nr_pairs += 2; + t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; + t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[4] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[5] , cj->progeny[3] , NULL , 0 , NULL , 0 ); break; case 8: /* ( 1 , -1 , -1 ) */ - t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; - task_addunlock( ci->progeny[4]->sorts[8] , t ); task_addunlock( cj->progeny[3]->sorts[8] , t ); - ci->progeny[4]->nr_pairs += 1; - cj->progeny[3]->nr_pairs += 1; + t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; break; case 9: /* ( 0 , 1 , 1 ) */ - t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[3]->sorts[9] , t ); task_addunlock( cj->progeny[0]->sorts[9] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[9] , t ); task_addunlock( cj->progeny[4]->sorts[9] , t ); + t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; + t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[7] , cj->progeny[4] , NULL , 0 , NULL , 0 ); t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[0] , t ); task_addunlock( cj->progeny[4]->sorts[0] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[8] , t ); task_addunlock( cj->progeny[0]->sorts[8] , t ); - ci->progeny[3]->nr_pairs += 2; - ci->progeny[7]->nr_pairs += 2; - cj->progeny[0]->nr_pairs += 2; - cj->progeny[4]->nr_pairs += 2; + t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); break; case 10: /* ( 0 , 1 , 0 ) */ - t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[2]->sorts[10] , t ); task_addunlock( cj->progeny[0]->sorts[10] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[11] , t ); task_addunlock( cj->progeny[0]->sorts[11] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[7] , t ); task_addunlock( cj->progeny[0]->sorts[7] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[6] , t ); task_addunlock( cj->progeny[0]->sorts[6] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[2]->sorts[9] , t ); task_addunlock( cj->progeny[1]->sorts[9] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[10] , t ); task_addunlock( cj->progeny[1]->sorts[10] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[8] , t ); task_addunlock( cj->progeny[1]->sorts[8] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[7] , t ); task_addunlock( cj->progeny[1]->sorts[7] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[2]->sorts[1] , t ); task_addunlock( cj->progeny[4]->sorts[1] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[2] , t ); task_addunlock( cj->progeny[4]->sorts[2] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[10] , t ); task_addunlock( cj->progeny[4]->sorts[10] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[11] , t ); task_addunlock( cj->progeny[4]->sorts[11] , t ); + t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; + t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[2] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[3] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[2] , cj->progeny[4] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[3] , cj->progeny[4] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[6] , cj->progeny[4] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , NULL , 0 , NULL , 0 ); t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[5] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[2]->sorts[0] , t ); task_addunlock( cj->progeny[5]->sorts[0] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[5] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[1] , t ); task_addunlock( cj->progeny[5]->sorts[1] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[5] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[9] , t ); task_addunlock( cj->progeny[5]->sorts[9] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[5] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[10] , t ); task_addunlock( cj->progeny[5]->sorts[10] , t ); - ci->progeny[2]->nr_pairs += 4; - ci->progeny[3]->nr_pairs += 4; - ci->progeny[6]->nr_pairs += 4; - ci->progeny[7]->nr_pairs += 4; - cj->progeny[0]->nr_pairs += 4; - cj->progeny[1]->nr_pairs += 4; - cj->progeny[4]->nr_pairs += 4; - cj->progeny[5]->nr_pairs += 4; + t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[3] , cj->progeny[5] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[6] , cj->progeny[5] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[7] , cj->progeny[5] , NULL , 0 , NULL , 0 ); break; case 11: /* ( 0 , 1 , -1 ) */ - t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; - task_addunlock( ci->progeny[2]->sorts[11] , t ); task_addunlock( cj->progeny[1]->sorts[11] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[5] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[11] , t ); task_addunlock( cj->progeny[5]->sorts[11] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[5] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[2]->sorts[2] , t ); task_addunlock( cj->progeny[5]->sorts[2] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[6]->sorts[6] , t ); task_addunlock( cj->progeny[1]->sorts[6] , t ); - ci->progeny[2]->nr_pairs += 2; - ci->progeny[6]->nr_pairs += 2; - cj->progeny[1]->nr_pairs += 2; - cj->progeny[5]->nr_pairs += 2; + t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; + t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[6] , cj->progeny[5] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[2] , cj->progeny[5] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , NULL , 0 , NULL , 0 ); break; case 12: /* ( 0 , 0 , 1 ) */ - t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; - task_addunlock( ci->progeny[1]->sorts[12] , t ); task_addunlock( cj->progeny[0]->sorts[12] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[11] , t ); task_addunlock( cj->progeny[0]->sorts[11] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[5] , t ); task_addunlock( cj->progeny[0]->sorts[5] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[2] , t ); task_addunlock( cj->progeny[0]->sorts[2] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[1]->sorts[9] , t ); task_addunlock( cj->progeny[2]->sorts[9] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[12] , t ); task_addunlock( cj->progeny[2]->sorts[12] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[8] , t ); task_addunlock( cj->progeny[2]->sorts[8] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[2] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[5] , t ); task_addunlock( cj->progeny[2]->sorts[5] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[1]->sorts[3] , t ); task_addunlock( cj->progeny[4]->sorts[3] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[6] , t ); task_addunlock( cj->progeny[4]->sorts[6] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[12] , t ); task_addunlock( cj->progeny[4]->sorts[12] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[4] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[11] , t ); task_addunlock( cj->progeny[4]->sorts[11] , t ); + t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; + t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[1] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[3] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[1] , cj->progeny[4] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[3] , cj->progeny[4] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[5] , cj->progeny[4] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , NULL , 0 , NULL , 0 ); t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[6] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[1]->sorts[0] , t ); task_addunlock( cj->progeny[6]->sorts[0] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[6] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[3]->sorts[3] , t ); task_addunlock( cj->progeny[6]->sorts[3] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[6] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[5]->sorts[9] , t ); task_addunlock( cj->progeny[6]->sorts[9] , t ); - t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[7] , cj->progeny[6] , NULL , 0 , NULL , 0 ); - task_addunlock( ci->progeny[7]->sorts[12] , t ); task_addunlock( cj->progeny[6]->sorts[12] , t ); - ci->progeny[1]->nr_pairs += 4; - ci->progeny[3]->nr_pairs += 4; - ci->progeny[5]->nr_pairs += 4; - ci->progeny[7]->nr_pairs += 4; - cj->progeny[0]->nr_pairs += 4; - cj->progeny[2]->nr_pairs += 4; - cj->progeny[4]->nr_pairs += 4; - cj->progeny[6]->nr_pairs += 4; + t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[3] , cj->progeny[6] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[5] , cj->progeny[6] , NULL , 0 , NULL , 0 ); + t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[7] , cj->progeny[6] , NULL , 0 , NULL , 0 ); break; } } /* split this task? */ + /* Otherwise, if not spilt, stitch-up the sorting. */ + else { + + /* Create the sort for ci. */ + if ( ci->sorts[0] == NULL ) + ci->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , NULL , 0 , NULL , 0 ); + ci->sorts[0]->flags |= (1 << sid); + task_addunlock( ci->sorts[0] , t ); + + /* Create the sort for cj. */ + if ( cj->sorts[0] == NULL ) + cj->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , NULL , 0 , NULL , 0 ); + cj->sorts[0]->flags |= (1 << sid); + task_addunlock( cj->sorts[0] , t ); + + } + } /* pair interaction? */ } /* loop over all tasks. */ @@ -1007,80 +918,213 @@ void space_splittasks ( struct space *s ) { /** - * @brief Fill the #space's task list. + * @brief Generate the sorts for a sub recursively. * * @param s The #space we are working in. - * @param do_sort Flag to add sorting tasks to the list. */ -void space_maketasks ( struct space *s , int do_sort ) { +void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struct cell *cj , int sid ) { - int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd; - int *cdim = s->cdim; - struct task *t , *t2; - int counts[task_type_count]; + float h; + double shift[3]; + int j, k; - /* Recursive function to generate sorting tasks in the cell tree. */ - void maketasks_sort_rec ( struct cell *c ) { + /* Get the cell dimensions. */ + h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) ); + + /* Single-cell sub? */ + if ( cj == NULL ) { + + /* If there is further splitting, add the pairs recursively. */ + if ( ci->split ) { + + /* Recurse for each progeny. */ + for ( j = 0 ; j < 8 ; j++ ) + if ( ci->progeny[j] != NULL ) + space_addsorts( s , t , ci->progeny[j] , NULL , -1 ); + + /* Recurse for each pair of progeny. */ + for ( j = 0 ; j < 8 ; j++ ) + if ( ci->progeny[j] != NULL ) + for ( k = j + 1 ; k < 8 ; k++ ) + if ( ci->progeny[k] != NULL ) + space_addsorts( s , t , ci->progeny[j] , ci->progeny[k] , -1 ); - int j, k; - struct task *t; + } - /* Clear the waits on this cell. */ - c->wait = 0; + } - /* Start by generating the sort task. */ - if ( c->count > 0 ) { + /* Otherwise, it's a pair. */ + else { - if ( do_sort ) { - if ( c->count < 1000 ) { - t = space_addtask( s , task_type_sort , task_subtype_none , 0x1fff , 0 , c , NULL , NULL , 0 , NULL , 0 ); - for ( k = 0 ; k < 13 ; k++ ) - c->sorts[k] = t; - } - else if ( c->count < 5000 ) { - t = space_addtask( s , task_type_sort , task_subtype_none , 0x7f , 0 , c , NULL , NULL , 0 , NULL , 0 ); - for ( k = 0 ; k < 7 ; k++ ) - c->sorts[k] = t; - t = space_addtask( s , task_type_sort , task_subtype_none , 0x1f80 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - for ( k = 7 ; k < 14 ; k++ ) - c->sorts[k] = t; - } - else { - c->sorts[0] = c->sorts[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x1 + 0x2 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - c->sorts[2] = c->sorts[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x4 + 0x8 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - c->sorts[4] = c->sorts[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x10 + 0x20 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - c->sorts[6] = c->sorts[7] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 + 0x80 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - c->sorts[8] = c->sorts[9] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 + 0x200 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - c->sorts[10] = c->sorts[11] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 + 0x800 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - c->sorts[12] = c->sorts[13] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 , 0 , c , NULL , NULL , 0 , NULL , 0 ); - } - } + /* Get the sort ID if not specified. */ + // if ( sid < 0 ) + sid = space_getsid( s , &ci , &cj , shift ); + + /* If there is no further splitting, add the sorts. */ + if ( !ci->split || !cj->split || + ci->h_max*2 >= h || cj->h_max*2 >= h ) { + + /* Create and add the sort for ci. */ + if ( ci->sorts[0] == NULL ) + ci->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , NULL , 0 , NULL , 0 ); + ci->sorts[0]->flags |= (1 << sid); + task_addunlock( ci->sorts[0] , t ); + + /* Create and add the sort for cj. */ + if ( cj->sorts[0] == NULL ) + cj->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , NULL , 0 , NULL , 0 ); + cj->sorts[0]->flags |= (1 << sid); + task_addunlock( cj->sorts[0] , t ); } + + /* Otherwise, recurse. */ + else { + + /* For each different sorting type... */ + switch ( sid ) { + + case 0: /* ( 1 , 1 , 1 ) */ + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 0 ); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + space_addsorts( s , t , ci->progeny[6] , cj->progeny[0] , 1 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[1] , 1 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[1] , 0 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 2 ); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + space_addsorts( s , t , ci->progeny[6] , cj->progeny[1] , 2 ); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + space_addsorts( s , t , ci->progeny[5] , cj->progeny[0] , 3 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[2] , 3 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[2] , 0 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 6 ); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + space_addsorts( s , t , ci->progeny[4] , cj->progeny[0] , 4 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[0] , 5 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[0] , 7 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 8 ); + space_addsorts( s , t , ci->progeny[4] , cj->progeny[1] , 3 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[1] , 4 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[1] , 6 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[1] , 7 ); + space_addsorts( s , t , ci->progeny[4] , cj->progeny[2] , 1 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[2] , 2 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[2] , 4 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[2] , 5 ); + space_addsorts( s , t , ci->progeny[4] , cj->progeny[3] , 0 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[3] , 1 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[3] , 3 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[3] , 4 ); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + space_addsorts( s , t , ci->progeny[4] , cj->progeny[1] , 5 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[3] , 5 ); + space_addsorts( s , t , ci->progeny[4] , cj->progeny[3] , 2 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[1] , 8 ); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + space_addsorts( s , t , ci->progeny[5] , cj->progeny[2] , 6 ); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + space_addsorts( s , t , ci->progeny[4] , cj->progeny[3] , 6 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[2] , 8 ); + space_addsorts( s , t , ci->progeny[4] , cj->progeny[2] , 7 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[3] , 7 ); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + space_addsorts( s , t , ci->progeny[4] , cj->progeny[3] , 8 ); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + space_addsorts( s , t , ci->progeny[3] , cj->progeny[0] , 9 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[4] , 9 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[4] , 0 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 8 ); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + space_addsorts( s , t , ci->progeny[2] , cj->progeny[0] , 10 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[0] , 11 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[0] , 7 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 6 ); + space_addsorts( s , t , ci->progeny[2] , cj->progeny[1] , 9 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[1] , 10 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[1] , 8 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[1] , 7 ); + space_addsorts( s , t , ci->progeny[2] , cj->progeny[4] , 1 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[4] , 2 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[4] , 10 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[4] , 11 ); + space_addsorts( s , t , ci->progeny[2] , cj->progeny[5] , 0 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[5] , 1 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[5] , 9 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[5] , 10 ); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + space_addsorts( s , t , ci->progeny[2] , cj->progeny[1] , 11 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[5] , 11 ); + space_addsorts( s , t , ci->progeny[2] , cj->progeny[5] , 2 ); + space_addsorts( s , t , ci->progeny[6] , cj->progeny[1] , 6 ); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + space_addsorts( s , t , ci->progeny[1] , cj->progeny[0] , 12 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[0] , 11 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[0] , 5 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[0] , 2 ); + space_addsorts( s , t , ci->progeny[1] , cj->progeny[2] , 9 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[2] , 12 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[2] , 8 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[2] , 5 ); + space_addsorts( s , t , ci->progeny[1] , cj->progeny[4] , 3 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[4] , 6 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[4] , 12 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[4] , 11 ); + space_addsorts( s , t , ci->progeny[1] , cj->progeny[6] , 0 ); + space_addsorts( s , t , ci->progeny[3] , cj->progeny[6] , 3 ); + space_addsorts( s , t , ci->progeny[5] , cj->progeny[6] , 9 ); + space_addsorts( s , t , ci->progeny[7] , cj->progeny[6] , 12 ); + break; + + } /* switch. */ + + } /* recurse. */ - /* Otherwise, add the interactions between progeny. */ - if ( c->split ) { - - /* Loop over the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) - if ( c->progeny[k] != NULL ) { - - /* Recurse. */ - maketasks_sort_rec( c->progeny[k] ); - - /* Add dependencies between the sorts. */ - for ( j = 0 ; j < 14 ; j++ ) - if ( j == 0 || c->sorts[j] != c->sorts[j-1] ) - task_addunlock( c->progeny[k]->sorts[j] , c->sorts[j] ); - - } - - } + } /* it's a pair. */ + + } + + +/** + * @brief Fill the #space's task list. + * + * @param s The #space we are working in. + * @param do_sort Flag to add sorting tasks to the list. + */ + +void space_maketasks ( struct space *s , int do_sort ) { + + int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid; + int *cdim = s->cdim; + struct task *t, *t2; + struct cell *ci, *cj; + // float dt_max = s->dt_max; + int counts[task_type_count]; - } /* void maketasks_sort_rec. */ - - /* Allocate the task-list, if needed. */ if ( s->tasks == NULL || s->tasks_size < s->tot_cells * space_maxtaskspercell ) { if ( s->tasks != NULL ) @@ -1095,10 +1139,6 @@ void space_maketasks ( struct space *s , int do_sort ) { } s->nr_tasks = 0; - /* Loop over the cells and generate their sorting tasks. */ - for ( k = 0 ; k < s->nr_cells ; k++ ) - maketasks_sort_rec( &s->cells[k] ); - /* Run through the highest level of cells and add pairs. */ for ( i = 0 ; i < cdim[0] ; i++ ) for ( j = 0 ; j < cdim[1] ; j++ ) @@ -1106,7 +1146,11 @@ void space_maketasks ( struct space *s , int do_sort ) { cid = cell_getid( cdim , i , j , k ); if ( s->cells[cid].count == 0 ) continue; - space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , &s->cells[cid] , NULL , NULL , 0 , NULL , 0 ); + ci = &s->cells[cid]; + if ( ci->count == 0 ) + continue; + // if ( ci->dt_min <= dt_max ) + space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , NULL , 0 , NULL , 0 ); for ( ii = -1 ; ii < 2 ; ii++ ) { iii = i + ii; if ( !s->periodic && ( iii < 0 || iii >= cdim[0] ) ) @@ -1123,15 +1167,12 @@ void space_maketasks ( struct space *s , int do_sort ) { continue; kkk = ( kkk + cdim[2] ) % cdim[2]; cjd = cell_getid( cdim , iii , jjj , kkk ); - if ( s->cells[cjd].count == 0 ) - continue; - if ( cid >= cjd ) + cj = &s->cells[cjd]; + if ( cid >= cjd || cj->count == 0 /* || + ( ci->dt_min > dt_max && cj->dt_min > dt_max ) */ ) continue; - t = space_addtask( s , task_type_pair , task_subtype_density , 0 , 0 , &s->cells[cid] , &s->cells[cjd] , NULL , 0 , NULL , 0 ); - task_addunlock( s->cells[cid].sorts[ sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ] ] , t ); - task_addunlock( s->cells[cjd].sorts[ sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ] ] , t ); - s->cells[cid].nr_pairs += 1; - s->cells[cjd].nr_pairs += 1; + sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ]; + t = space_addtask( s , task_type_pair , task_subtype_density , sid , 0 , ci , cj , NULL , 0 , NULL , 0 ); } } } @@ -1140,50 +1181,15 @@ void space_maketasks ( struct space *s , int do_sort ) { /* Split the tasks. */ space_splittasks( s ); - /* Remove pairs and self-interactions for cells with no parts in dt. */ + /* Make each sort depend on the sorts of its progeny. */ for ( k = 0 ; k < s->nr_tasks ; k++ ) { t = &s->tasks[k]; - if ( t->type == task_type_self ) { - if ( t->ci->dt_min > s->dt_max ) - t->type = task_type_none; - } - else if ( t->type == task_type_pair ) { - if ( t->ci->dt_min > s->dt_max && t->cj->dt_min > s->dt_max ) { - t->type = task_type_none; - for ( j = 0 ; j < 13 ; j++ ) - task_rmunlock_blind( t->ci->sorts[j] , t ); - for ( j = 0 ; j < 13 ; j++ ) - task_rmunlock_blind( t->cj->sorts[j] , t ); - } - } - else if ( t->type == task_type_sub ) { - if ( t->ci->dt_min > s->dt_max && ( t->cj == NULL || t->cj->dt_min > s->dt_max ) ) { - t->type = task_type_none; - for ( j = 0 ; j < 13 ; j++ ) - task_rmunlock_blind( t->ci->sorts[j] , t ); - if ( t->cj != NULL ) - for ( j = 0 ; j < 13 ; j++ ) - task_rmunlock_blind( t->cj->sorts[j] , t ); - } - } + if ( t->type == task_type_sort && t->ci->split ) + for ( j = 0 ; j < 8 ; j++ ) + if ( t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts[0] != NULL ) + task_addunlock( t->ci->progeny[j]->sorts[0] , t ); } - /* Remove sort tasks with no dependencies. */ - for ( k = 0 ; k < s->nr_tasks ; k++ ) { - t = &s->tasks[k]; - if ( t->type == task_type_sort && t->nr_unlock_tasks == 0 ) { - if ( t->ci->split ) - for ( i = 0 ; i < 13 ; i++ ) - if ( t->flags & ( 1 << i ) ) { - for ( j = 0 ; j < 8 ; j++ ) - if ( t->ci->progeny[j] != NULL ) - task_rmunlock_blind( t->ci->progeny[j]->sorts[i] , t ); - t->ci->sorts[i] = NULL; - } - t->type = task_type_none; - } - } - /* Count the number of tasks associated with each cell and store the density tasks in each cell. */ space_map_cells( s , 1 , &space_map_clearnrtasks , NULL ); @@ -1245,7 +1251,7 @@ void space_maketasks ( struct space *s , int do_sort ) { task_addunlock( t->ci->ghost , t2 ); task_addunlock( t->cj->ghost , t2 ); } - + /* Otherwise, sub interaction? */ else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) { task_addunlock( t , t->ci->super->ghost ); @@ -1272,7 +1278,7 @@ void space_maketasks ( struct space *s , int do_sort ) { for ( k = 1 ; k < task_type_count ; k++ ) printf( " %s=%i" , taskID_names[k] , counts[k] ); printf( " ]\n" ); - + } diff --git a/src/space.h b/src/space.h index 0c52849363cae498a2d58930d4906e013bb3dc28..2dc0f8878f7404942bf1bd975826183284893c58 100644 --- a/src/space.h +++ b/src/space.h @@ -26,7 +26,7 @@ #define space_splitratio 0.875 #define space_splitsize_default 400 #define space_subsize_default 1000 -#define space_dosub 0 +#define space_dosub 1 #define space_stretch 1.0 #define space_maxtaskspercell 43 @@ -98,10 +98,12 @@ struct space { /* function prototypes. */ +void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struct cell *cj , int sid ); void parts_sort ( struct part *parts , int *ind , int N , int min , int max ); struct cell *space_getcell ( struct space *s ); struct task *space_gettask ( struct space *s ); struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , struct task *unlock_tasks[] , int nr_unlock_tasks , struct cell *unlock_cells[] , int nr_unlock_cells ); +int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift ); void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int periodic , double h_max ); void space_maketasks ( struct space *s , int do_sort ); void space_map_cells ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data ); diff --git a/src/task.c b/src/task.c index 81b9babfcac4df407aa31756a2bc3526d93d8aca..a4c3eb5fba51f863c8614745a343c8c84f08fc14 100644 --- a/src/task.c +++ b/src/task.c @@ -44,6 +44,30 @@ const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair" #define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +/** + * @breif Remove all unlocks to tasks that are of the given type. + * + * @param t The #task. + * @param type The task type ID to remove. + */ + +void task_cleanunlock ( struct task *t , int type ) { + + int k; + + lock_lock( &t->lock ); + + for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) + if ( t->unlock_tasks[k]->type == type ) { + t->nr_unlock_tasks -= 1; + t->unlock_tasks[k] = t->unlock_tasks[ t->nr_unlock_tasks ]; + } + + lock_unlock_blind( &t->lock ); + + } + + /** * @brief Remove an unlock_task from the given task. * @@ -108,10 +132,6 @@ void task_addunlock( struct task *ta , struct task *tb ) { int k; - /* Bogus? */ - if ( ta == NULL || tb == NULL ) - return; - lock_lock( &ta->lock ); /* Check if ta already unlocks tb. */ diff --git a/src/task.h b/src/task.h index cd1c692f8147e9c5a354c5da02addedf3eb85148..662d838decbb6d98e67cab4df418bf53acf6da50 100644 --- a/src/task.h +++ b/src/task.h @@ -65,4 +65,5 @@ struct task { /* Function prototypes. */ void task_rmunlock( struct task *ta , struct task *tb ); void task_rmunlock_blind( struct task *ta , struct task *tb ); +void task_cleanunlock ( struct task *t , int type ); void task_addunlock( struct task *ta , struct task *tb );