diff --git a/src/cell.h b/src/cell.h index 63ff82c1003404e8929023c79ad083e396c27268..96d39cb43d1c6e39a621df4766251944a5743f13 100644 --- a/src/cell.h +++ b/src/cell.h @@ -38,7 +38,7 @@ struct cell { double h[3]; /* Max radii in this cell. */ - double r_max; + double h_max; /* The depth of this cell in the tree. */ int depth, split; diff --git a/src/engine.c b/src/engine.c index 22fd0562c3fd0876288c9a29e4b852f32114c472..71004d6bfa8b30068f85b6c2ebeda1f698f29f2f 100644 --- a/src/engine.c +++ b/src/engine.c @@ -55,6 +55,10 @@ * @brief Sort the tasks in topological order over all queues. * * @param e The #engine. + * + * TODO: Return the indices tid as these are the tasks sorted according + * to their ranks. They can then be dropped into the queues in order + * of these indices. */ void engine_ranktasks ( struct engine *e ) { diff --git a/src/runner.c b/src/runner.c index 549e6e10d2ff2e31a164e94bd627d2a26085c259..2abcfd237b161a61f22e8e1db7b8757af6e4dfdd 100644 --- a/src/runner.c +++ b/src/runner.c @@ -485,6 +485,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) { printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root ); pid[redo] = pid[i]; redo += 1; + p->wcount = 0.0; + p->rho = 0.0; + p->rho_dh = 0.0; continue; } diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 20e5798b3525b1551214e9573627f91143525a20..2ef802978894c040fdfa936778fd7e7f12293d18 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -99,7 +99,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *ci , struct cell *cj ) { pj = &parts_j[ pjd ]; /* Compute the pairwise distance. */ - r2 = 0.0; + r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k]*dx[k]; @@ -117,7 +117,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* loop over the parts in ci. */ #ifdef TIMER_VERBOSE - printf( "runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->r_max , cj->r_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); + printf( "runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); #else TIMER_TOC(TIMER_DOPAIR); #endif @@ -144,7 +144,8 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { struct part *pi, *pj, *parts_i, *parts_j; double pix[3], pjx[3], di, dj; float dx[3], hi, hi2, hj, hj2, r2; - double hi_max, hj_max, di_max, dj_min; + double hi_max, hj_max; + double di_max, dj_min; int count_i, count_j; TIMER_TIC @@ -172,9 +173,17 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ ) rshift += shift[k]*runner_shift[ 3*sid + k ]; - /* printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" , - ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] , - ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); */ + /* for ( k = 0 ; k < ci->count ; k++ ) + if ( ci->parts[k].id == 561590 ) + break; + if ( k == ci->count ) + for ( k = 0 ; k < cj->count ; k++ ) + if ( cj->parts[k].id == 561590 ) + break; + if ( k < cj->count ) + printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" , + ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] , + ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout); */ /* for ( hi = 0 , k = 0 ; k < ci->count ; k++ ) hi += ci->parts[k].r; for ( hj = 0 , k = 0 ; k < cj->count ; k++ ) @@ -186,7 +195,7 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { sort_j = &cj->sort[ sid*(cj->count + 1) ]; /* Get some other useful values. */ - hi_max = ci->r_max - rshift; hj_max = cj->r_max - rshift; + hi_max = ci->h_max - rshift; hj_max = cj->h_max; count_i = ci->count; count_j = cj->count; parts_i = ci->parts; parts_j = cj->parts; di_max = sort_i[count_i-1].d - rshift; @@ -216,7 +225,7 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { pj = &parts_j[ sort_j[pjd].i ]; /* Compute the pairwise distance. */ - r2 = 0.0; + r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k]*dx[k]; @@ -237,7 +246,7 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { tic = getticks(); */ /* Loop over the parts in cj. */ - for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) { + for ( pjd = 0 ; pjd < count_j && ( 1 || sort_j[pjd].d - hj_max < di_max ) ; pjd++ ) { /* Get a hold of the jth part in cj. */ pj = &parts_j[ sort_j[ pjd ].i ]; @@ -257,7 +266,7 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { pi = &parts_i[ sort_i[pid].i ]; /* Compute the pairwise distance. */ - r2 = 0.0; + r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { dx[k] = pi->x[k] - pjx[k]; r2 += dx[k]*dx[k]; @@ -275,7 +284,7 @@ void DOPAIR ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* loop over the parts in ci. */ #ifdef TIMER_VERBOSE - printf( "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->r_max , cj->r_max , fmax(ci->h[0],fmax(ci->h[1],ci->h[2])) , ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000 ); + printf( "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , fmax(ci->h[0],fmax(ci->h[1],ci->h[2])) , ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000 ); #else TIMER_TOC(TIMER_DOPAIR); #endif @@ -320,7 +329,7 @@ void DOSELF ( struct runner *r , struct cell *c ) { pj = &parts[pjd]; /* Compute the pairwise distance. */ - r2 = 0.0; + r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k]*dx[k]; @@ -330,7 +339,7 @@ void DOSELF ( struct runner *r , struct cell *c ) { if ( r2 < hi2 || r2 < pj->h*pj->h ) { IACT( r2 , dx , hi , pj->h , pi , pj ); - + } } /* loop over all other particles. */ diff --git a/src/space.c b/src/space.c index 17dfa659caec1114dba33095077f5dffd8888b70..1eaec456a06bec080b06bc00d634155815efe6e2 100644 --- a/src/space.c +++ b/src/space.c @@ -41,9 +41,6 @@ /* Error macro. */ #define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } -/* Convert cell location to ID. */ -#define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) ) - /* Split size. */ int space_splitsize = space_splitsize_default; @@ -95,6 +92,10 @@ void space_map_clearsort ( struct cell *c , void *data ) { /** * @brief Mapping function to append a ghost task to each cell. + * + * Looks for the super cell, e.g. the highest-level cell above each + * cell for which a pair is defined. All ghosts below this cell will + * depend on the ghost of their parents (sounds spooky, but it isn't). */ void space_map_mkghosts ( struct cell *c , void *data ) { @@ -109,7 +110,7 @@ void space_map_mkghosts ( struct cell *c , void *data ) { if ( finger->nr_tasks > 0 ) c->super = finger; - /* Make the ghost task, if needed. */ + /* Make the ghost task */ if ( c->super != c || c->nr_tasks > 0 ) c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , NULL , 0 , NULL , 0 ); @@ -359,7 +360,7 @@ void space_splittasks ( struct space *s ) { hj = fmax( cj->h[0] , fmax( cj->h[1] , cj->h[2] ) ); /* Should this task be split-up? */ - if ( ci->split && cj->split && ci->r_max < hi/2 && cj->r_max < hj/2 ) { + if ( ci->split && cj->split && ci->h_max < hi/2 && cj->h_max < hj/2 ) { /* Get the relative distance between the pairs, wrapping. */ for ( k = 0 ; k < 3 ; k++ ) { @@ -800,7 +801,7 @@ void space_maketasks ( struct space *s , int do_sort ) { int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd; int *cdim = s->cdim; int nr_tasks_old = s->nr_tasks; - struct task *t, *t2; + struct task *t , *t2; int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } , { -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } , { -1 , -1 , -1 , 12 , 7 , 6 , 4 , 3 } , @@ -968,7 +969,7 @@ void space_maketasks ( struct space *s , int do_sort ) { if ( t->flags & ( 1 << i ) ) { for ( j = 0 ; j < 8 ; j++ ) if ( t->ci->progeny[j] != NULL ) - task_rmunlock( t->ci->progeny[j]->sorts[i] , t ); + task_rmunlock_blind( t->ci->progeny[j]->sorts[i] , t ); t->ci->sorts[i] = NULL; } t->type = task_type_none; @@ -976,7 +977,7 @@ void space_maketasks ( struct space *s , int do_sort ) { } /* Count the number of tasks associated with each cell. */ - space_map_cells( s , 1 , &space_map_mkghosts , NULL ); + space_map_cells( s , 1 , &space_map_clearnrtasks , NULL ); for ( k = 0 ; k < s->nr_tasks ; k++ ) { t = &s->tasks[k]; if ( t->type == task_type_self ) @@ -1010,8 +1011,6 @@ void space_maketasks ( struct space *s , int do_sort ) { /* Otherwise, pair interaction? */ else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) { - if ( t->ci->ghost == NULL ) - printf( "space_maketasks: cell at %lx has no ghost!\n" , (unsigned long int)t->ci ); task_addunlock( t , t->ci->super->ghost ); task_addunlock( t , t->cj->super->ghost ); t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 ); @@ -1069,7 +1068,7 @@ void space_maketasks ( struct space *s , int do_sort ) { void space_split ( struct space *s , struct cell *c ) { int k, count; - double h, h_limit, r_max = 0.0; + double h, h_limit, h_max = 0.0; struct cell *temp; /* Check the depth. */ @@ -1084,10 +1083,10 @@ void space_split ( struct space *s , struct cell *c ) { h = c->parts[k].h; if ( h <= h_limit ) count += 1; - if ( h > r_max ) - r_max = h; + if ( h > h_max ) + h_max = h; } - c->r_max = r_max; + c->h_max = h_max; /* Split or let it be? */ if ( count > c->count*space_splitratio && c->count > space_splitsize ) { @@ -1113,7 +1112,7 @@ void space_split ( struct space *s , struct cell *c ) { temp->loc[2] += temp->h[2]; temp->depth = c->depth + 1; temp->split = 0; - temp->r_max = 0.0; + temp->h_max = 0.0; temp->parent = c; c->progeny[k] = temp; } @@ -1237,7 +1236,7 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , if ( h_cells < h_max ) h_cells = h_max; for ( k = 0 ; k < 3 ; k++ ) { - cdim[k] = ceil( dim[k] / h_cells ); + cdim[k] = floor( dim[k] / h_cells ); h[k] = dim[k] / cdim[k]; ih[k] = 1.0 / h[k]; } diff --git a/src/space.h b/src/space.h index 426d6ec04d0b592390f21f67190b09e2deaa26d5..e358744474a9650b07abbc6ef7ca5be0b873dd40 100644 --- a/src/space.h +++ b/src/space.h @@ -24,10 +24,13 @@ #define space_maxdepth 10 #define space_cellallocchunk 1000 #define space_splitratio 0.875 -#define space_splitsize_default 800 +#define space_splitsize_default 400 #define space_dosub 1 +/* Convert cell location to ID. */ +#define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) ) + /* Split size. */ extern int space_splitsize; diff --git a/src/task.c b/src/task.c index 6ba4f9b71db48ca495b79dbcc43ae3287a82ccfb..7a0b8a5b94da32bce914dea53139631b6b3d8236 100644 --- a/src/task.c +++ b/src/task.c @@ -66,6 +66,30 @@ void task_rmunlock( struct task *ta , struct task *tb ) { } +/** + * @brief Remove an unlock_task from the given task. + * + * @param ta The unlocking #task. + * @param tb The #task that will be unlocked. + * + * Differs from #task_rmunlock in that it will not fail if + * the task @c tb is not in the unlocks of @c ta. + */ + +void task_rmunlock_blind( struct task *ta , struct task *tb ) { + + int k; + + for ( k = 0 ; k < ta->nr_unlock_tasks ; k++ ) + if ( ta->unlock_tasks[k] == tb ) { + ta->nr_unlock_tasks -= 1; + ta->unlock_tasks[k] = ta->unlock_tasks[ ta->nr_unlock_tasks ]; + return; + } + + } + + /** * @brief Add an unlock_task to the given task. * diff --git a/src/task.h b/src/task.h index 6ae474e116cc42eed4d99cb9485ca176ab3884ea..2a4d20fd55b69f947d5850211085b1c23e4f5071 100644 --- a/src/task.h +++ b/src/task.h @@ -62,4 +62,5 @@ struct task { /* Function prototypes. */ void task_rmunlock( struct task *ta , struct task *tb ); +void task_rmunlock_blind( struct task *ta , struct task *tb ); void task_addunlock( struct task *ta , struct task *tb );