diff --git a/src/cell.h b/src/cell.h index 96d39cb43d1c6e39a621df4766251944a5743f13..920ae422e5eb5fca6baa4c78d90e5559ef34b54a 100644 --- a/src/cell.h +++ b/src/cell.h @@ -67,6 +67,10 @@ struct cell { /* The tasks computing this cell's sorts. */ struct task *sorts[14]; + /* The tasks computing this cell's density. */ + struct task *density[27]; + int nr_density; + /* The ghost task to link density to interactions. */ struct task *ghost; diff --git a/src/engine.c b/src/engine.c index 71004d6bfa8b30068f85b6c2ebeda1f698f29f2f..c9433f4d11b4c310c389873e3737b23a02873f22 100644 --- a/src/engine.c +++ b/src/engine.c @@ -169,6 +169,14 @@ void engine_run ( struct engine *e , int sort_queues ) { int j, k; struct space *s = e->s; + /* Re-set the particle data. */ + for ( k = 0 ; k < s->nr_parts ; k++ ) { + s->parts[k].wcount = 0.0f; + s->parts[k].wcount_dh = 0.0f; + s->parts[k].rho = 0.0f; + s->parts[k].rho_dh = 0.0f; + } + /* Run throught the tasks and get all the waits right. */ for ( k = 0 ; k < s->nr_tasks ; k++ ) { s->tasks[k].done = 0; diff --git a/src/part.h b/src/part.h index e369295ce8654b2c8cc8f2f8f845785bbb090b63..b7338ab4b198b9e61e8be4ff7c2cf756ead5761a 100644 --- a/src/part.h +++ b/src/part.h @@ -72,6 +72,7 @@ struct part { /* Particle number density. */ int icount; float wcount; + float wcount_dh; } __attribute__((aligned (32))); diff --git a/src/runner.c b/src/runner.c index 2abcfd237b161a61f22e8e1db7b8757af6e4dfdd..4ab5ac78b618f2878c93daaa00470cbf90e0a98e 100644 --- a/src/runner.c +++ b/src/runner.c @@ -442,6 +442,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { void runner_doghost ( struct runner *r , struct cell *c ) { struct part *p; + struct cell *finger; int i, k, redo, count = c->count; int *pid; float ihg, ihg2; @@ -478,14 +479,18 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ihg2 = ihg * ihg; p->rho *= ihg * ihg2; p->rho_dh *= ihg2 * ihg2; + + /* Update the smoothing length. */ + p->h -= ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh; /* Did we get the right number density? */ if ( p->wcount + kernel_root > const_nwneigh + 1 || p->wcount + kernel_root < const_nwneigh - 1 ) { - printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root ); + printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root ); fflush(stdout); pid[redo] = pid[i]; redo += 1; p->wcount = 0.0; + p->wcount_dh = 0.0; p->rho = 0.0; p->rho_dh = 0.0; continue; @@ -512,8 +517,40 @@ void runner_doghost ( struct runner *r , struct cell *c ) { /* Re-set the counter for the next loop (potentially). */ count = redo; - if ( count > 0 ) - error( "Bad smoothing length, fixing this isn't implemented yet." ); + if ( count > 0 ) { + + // error( "Bad smoothing length, fixing this isn't implemented yet." ); + + /* Climb up the cell hierarchy. */ + for ( finger = c ; finger != NULL ; finger = finger->parent ) { + + /* Run through this cell's density interactions. */ + for ( k = 0 ; k < finger->nr_density ; k++ ) { + + /* Self-interaction? */ + if ( finger->density[k]->type == task_type_self ) + runner_doself_subset_density( r , finger , c->parts , pid , count ); + + /* Otherwise, pair interaction? */ + else if ( finger->density[k]->type == task_type_pair ) { + + /* Left or right? */ + if ( finger->density[k]->ci == finger ) + runner_dopair_subset_density( r , finger , c->parts , pid , count , finger->density[k]->cj ); + else + runner_dopair_subset_density( r , finger , c->parts , pid , count , finger->density[k]->ci ); + + } + + /* Otherwise, sub interaction? */ + else if ( finger->density[k]->type == task_type_sub ) + runner_dosub_subset_density( r , finger->density[k]->ci , finger->density[k]->cj , c , c->parts , pid , count , finger->density[k]->flags ); + + } + + } + + } } diff --git a/src/runner.h b/src/runner.h index 4d1a6ec64c6bf77276c5eacf078d54454596bff6..548165edf8a8913065c4629e1de30de04ff128eb 100644 --- a/src/runner.h +++ b/src/runner.h @@ -29,6 +29,7 @@ enum { runner_timer_dopair_force, runner_timer_dosub_density, runner_timer_dosub_force, + runner_timer_dopair_subset, runner_timer_doghost, runner_timer_getpair, runner_timer_steal, diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 2ef802978894c040fdfa936778fd7e7f12293d18..7ffa3fa14473e544de7668ec01e1476eab99f398 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -29,15 +29,24 @@ #define DOPAIR2(f) PASTE(runner_dopair,f) #define DOPAIR DOPAIR2(FUNCTION) +#define DOPAIR_SUBSET2(f) PASTE(runner_dopair_subset,f) +#define DOPAIR_SUBSET DOPAIR_SUBSET2(FUNCTION) + #define DOPAIR_NAIVE2(f) PASTE(runner_dopair_naive,f) #define DOPAIR_NAIVE DOPAIR_NAIVE2(FUNCTION) #define DOSELF2(f) PASTE(runner_doself,f) #define DOSELF DOSELF2(FUNCTION) +#define DOSELF_SUBSET2(f) PASTE(runner_doself_subset,f) +#define DOSELF_SUBSET DOSELF_SUBSET2(FUNCTION) + #define DOSUB2(f) PASTE(runner_dosub,f) #define DOSUB DOSUB2(FUNCTION) +#define DOSUB_SUBSET2(f) PASTE(runner_dosub_subset,f) +#define DOSUB_SUBSET DOSUB_SUBSET2(FUNCTION) + #define IACT2(f) PASTE(runner_iact,f) #define IACT IACT2(FUNCTION) @@ -50,6 +59,12 @@ #define TIMER_DOSUB2(f) PASTE(runner_timer_dosub,f) #define TIMER_DOSUB TIMER_DOSUB2(FUNCTION) +#define TIMER_DOSELF_SUBSET2(f) PASTE(runner_timer_doself_subset,f) +#define TIMER_DOSELF_SUBSET TIMER_DOSELF_SUBSET2(FUNCTION) + +#define TIMER_DOPAIR_SUBSET2(f) PASTE(runner_timer_dopair_subset,f) +#define TIMER_DOPAIR_SUBSET TIMER_DOPAIR_SUBSET2(FUNCTION) + /** * @brief Compute the interactions between a cell pair. @@ -126,6 +141,158 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *ci , struct cell *cj ) { } +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #parts to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ + +void DOPAIR_SUBSET ( struct runner *r , struct cell *ci , struct part *parts_i , int *ind , int count , struct cell *cj ) { + + struct engine *e = r->e; + int pid, pjd, k, count_j = cj->count; + double shift[3] = { 0.0 , 0.0 , 0.0 }; + struct part *pi, *pj, *parts_j = cj->parts; + double pix[3]; + float dx[3], hi, hi2, r2; + TIMER_TIC + + /* Get the relative distance between the pairs, wrapping. */ + for ( k = 0 ; k < 3 ; k++ ) { + if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 ) + shift[k] = e->s->dim[k]; + else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 ) + shift[k] = -e->s->dim[k]; + } + + /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" , + ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] , + ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); + tic = getticks(); */ + + /* Loop over the parts in ci. */ + for ( pid = 0 ; pid < count ; pid++ ) { + + /* Get a hold of the ith part in ci. */ + pi = &parts_i[ ind[ pid ] ]; + for ( k = 0 ; k < 3 ; k++ ) + pix[k] = pi->x[k] - shift[k]; + hi = pi->h; + hi2 = hi * hi; + + /* Loop over the parts in cj. */ + for ( pjd = 0 ; pjd < count_j ; pjd++ ) { + + /* Get a pointer to the jth particle. */ + pj = &parts_j[ pjd ]; + + /* Compute the pairwise distance. */ + r2 = 0.0f; + for ( k = 0 ; k < 3 ; k++ ) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k]*dx[k]; + } + + /* Hit or miss? */ + if ( r2 < hi2 ) { + + IACT( r2 , dx , hi , 0.0f , pi , pj ); + + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + #ifdef TIMER_VERBOSE + printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); + #else + TIMER_TOC(runner_timer_dopair_subset); + #endif + + + } + + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #parts to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ + +void DOSELF_SUBSET ( struct runner *r , struct cell *ci , struct part *parts , int *ind , int count ) { + + int pid, pjd, k, count_i = ci->count; + struct part *pi, *pj, *parts_i = ci->parts; + double pix[3]; + float dx[3], hi, hi2, r2; + TIMER_TIC + + /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" , + ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] , + ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); + tic = getticks(); */ + + /* Loop over the parts in ci. */ + for ( pid = 0 ; pid < count ; pid++ ) { + + /* Get a hold of the ith part in ci. */ + pi = &parts[ ind[ pid ] ]; + for ( k = 0 ; k < 3 ; k++ ) + pix[k] = pi->x[k]; + hi = pi->h; + hi2 = hi * hi; + + /* Loop over the parts in cj. */ + for ( pjd = 0 ; pjd < count_i ; pjd++ ) { + + /* Get a pointer to the jth particle. */ + pj = &parts_i[ pjd ]; + + /* Skip the particle itself. */ + if ( pj == pi ) + continue; + + /* Compute the pairwise distance. */ + r2 = 0.0f; + for ( k = 0 ; k < 3 ; k++ ) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k]*dx[k]; + } + + /* Hit or miss? */ + if ( r2 < hi2 ) { + + IACT( r2 , dx , hi , 0.0f , pi , pj ); + + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + #ifdef TIMER_VERBOSE + printf( "runner_doself_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); + #else + TIMER_TOC(runner_timer_dopair_subset); + #endif + + + } + + /** * @brief Compute the interactions between a cell pair. * @@ -562,3 +729,349 @@ void DOSUB ( struct runner *r , struct cell *ci , struct cell *cj , int flags ) } +void DOSUB_SUBSET ( struct runner *r , struct cell *ci , struct cell *cj , struct cell *sub , struct part *parts_i , int *ind , int count , int flags ) { + + int j, k; + + // TIMER_TIC + + /* Different types of flags. */ + switch ( flags ) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: + for ( j = 0 ; j < 7 ; j++ ) + for ( k = j + 1 ; k < 8 ; k++ ) + if ( ci->progeny[j] == sub && ci->progeny[k] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[j] , parts_i , ind , count , ci->progeny[k] ); + else if ( ci->progeny[k] == sub && ci->progeny[j] != NULL) + DOPAIR_SUBSET( r , ci->progeny[k] , parts_i , ind , count , ci->progeny[j] ); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if ( ci->progeny[6] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[0] ); + if ( ci->progeny[6] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] ); + if ( ci->progeny[7] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] ); + if ( ci->progeny[7] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[0] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[6] ); + if ( cj->progeny[1] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] ); + if ( cj->progeny[0] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] ); + if ( cj->progeny[1] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[7] ); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if ( ci->progeny[5] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[7] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[7] ); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if ( ci->progeny[4] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[4] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[4] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[4] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[5] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[6] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[7] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[7] ); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if ( ci->progeny[4] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[4] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[6] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[6] ); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if ( ci->progeny[4] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[4] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[4] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[4] ); + if ( ci->progeny[5] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[3] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[3] ); + if ( cj->progeny[3] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[5] ); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if ( ci->progeny[3] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[7] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[7] ); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if ( ci->progeny[2] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[2] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[2] ); + if ( ci->progeny[2] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[2] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[2] ); + if ( ci->progeny[2] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[2] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[2] ); + if ( ci->progeny[2] == sub && cj->progeny[5] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[5] ); + if ( cj->progeny[5] == sub && ci->progeny[2] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[2] ); + if ( ci->progeny[3] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[5] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[5] ); + if ( cj->progeny[5] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[6] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[5] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[5] ); + if ( cj->progeny[5] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[7] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[5] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[5] ); + if ( cj->progeny[5] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[7] ); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if ( ci->progeny[2] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[2] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[2] ); + if ( ci->progeny[2] == sub && cj->progeny[5] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[5] ); + if ( cj->progeny[5] == sub && ci->progeny[2] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[2] ); + if ( ci->progeny[6] == sub && cj->progeny[1] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] ); + if ( cj->progeny[1] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] ); + if ( ci->progeny[6] == sub && cj->progeny[5] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[5] ); + if ( cj->progeny[5] == sub && ci->progeny[6] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[6] ); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if ( ci->progeny[1] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[1] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[1] ); + if ( ci->progeny[1] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[1] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[1] ); + if ( ci->progeny[1] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[1] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[1] ); + if ( ci->progeny[1] == sub && cj->progeny[6] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[6] ); + if ( cj->progeny[6] == sub && ci->progeny[1] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[1] ); + if ( ci->progeny[3] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[3] == sub && cj->progeny[6] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[6] ); + if ( cj->progeny[6] == sub && ci->progeny[3] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[3] ); + if ( ci->progeny[5] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[5] == sub && cj->progeny[6] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[6] ); + if ( cj->progeny[6] == sub && ci->progeny[5] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[5] ); + if ( ci->progeny[7] == sub && cj->progeny[0] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] ); + if ( cj->progeny[0] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[2] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[2] ); + if ( cj->progeny[2] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[4] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[4] ); + if ( cj->progeny[4] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[7] ); + if ( ci->progeny[7] == sub && cj->progeny[6] != NULL ) + DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[6] ); + if ( cj->progeny[6] == sub && ci->progeny[7] != NULL ) + DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[7] ); + break; + + } + + + // #ifdef TIMER_VERBOSE + // printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 ); + // #else + // TIMER_TOC(timer_dopair_subset); + // #endif + + } + + diff --git a/src/runner_iact.h b/src/runner_iact.h index 64ef8d07643503e1941e99d5023dcf22c520e105..1b5bb787fee6e7f6af505c31e0562c51b408b72f 100644 --- a/src/runner_iact.h +++ b/src/runner_iact.h @@ -69,31 +69,35 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r float r = sqrtf( r2 ); float xi, xj; - float hg_inv; + float h_inv, hg_inv; float wi, wj, wi_dx, wj_dx; if ( r2 < hi*hi && pi != NULL ) { - hg_inv = kernel_igamma / hi; + h_inv = 1.0 / hi; + hg_inv = kernel_igamma * h_inv; xi = r * hg_inv; kernel_deval( xi , &wi , &wi_dx ); pi->rho += pj->mass * wi; pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx ); pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); + pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->icount += 1; } if ( r2 < hj*hj && pj != NULL ) { - hg_inv = kernel_igamma / hj; + h_inv = 1.0 / hj; + hg_inv = kernel_igamma * h_inv; xj = r * hg_inv; kernel_deval( xj , &wj , &wj_dx ); pj->rho += pi->mass * wj; pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx ); pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); + pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pj->icount += 1; } diff --git a/src/space.c b/src/space.c index 1eaec456a06bec080b06bc00d634155815efe6e2..52c582057086715cec098bf237e94d48008519a2 100644 --- a/src/space.c +++ b/src/space.c @@ -129,6 +129,7 @@ void space_map_mkghosts ( struct cell *c , void *data ) { void space_map_clearnrtasks ( struct cell *c , void *data ) { c->nr_tasks = 0; + c->nr_density = 0; } @@ -976,23 +977,43 @@ void space_maketasks ( struct space *s , int do_sort ) { } } - /* Count the number of tasks associated with each cell. */ + /* Count the number of tasks associated with each cell and + store the density tasks in each cell. */ space_map_cells( s , 1 , &space_map_clearnrtasks , NULL ); for ( k = 0 ; k < s->nr_tasks ; k++ ) { t = &s->tasks[k]; - if ( t->type == task_type_self ) + if ( t->type == task_type_self ) { t->ci->nr_tasks += 1; + if ( t->subtype == task_subtype_density ) { + t->ci->density[ t->ci->nr_density ] = t; + t->ci->nr_density += 1; + } + } else if ( t->type == task_type_pair ) { t->ci->nr_tasks += 1; t->cj->nr_tasks += 1; + if ( t->subtype == task_subtype_density ) { + t->ci->density[ t->ci->nr_density ] = t; + t->ci->nr_density += 1; + t->cj->density[ t->cj->nr_density ] = t; + t->cj->nr_density += 1; + } } else if ( t->type == task_type_sub ) { t->ci->nr_tasks += 1; if ( t->cj != NULL ) t->cj->nr_tasks += 1; + if ( t->subtype == task_subtype_density ) { + t->ci->density[ t->ci->nr_density ] = t; + t->ci->nr_density += 1; + if ( t->cj != NULL ) { + t->cj->density[ t->cj->nr_density ] = t; + t->cj->nr_density += 1; + } + } } } - + /* Append a ghost task to each cell. */ space_map_cells( s , 1 , &space_map_mkghosts , s );