diff --git a/src/cell.h b/src/cell.h index fc4954ea99d7e30421d15c5d077327b475cceffd..a560fcb1a54000188b454913b2776308c0fce24b 100644 --- a/src/cell.h +++ b/src/cell.h @@ -51,7 +51,7 @@ struct cell { float slack; /* Maximum particle movement in this cell. */ - float dx_max; + float dx_max, h2dx_max; /* The depth of this cell in the tree. */ int depth, split; diff --git a/src/debug.c b/src/debug.c index 2a32a443b569b90c2b77ac17edeac40463bd147f..5b3b1f7cad619257e5f738a129b3b4fa8d0acfd8 100644 --- a/src/debug.c +++ b/src/debug.c @@ -49,7 +49,7 @@ void printParticle ( struct part *parts , long long int id, int N ) { parts[i].a[0], parts[i].a[1], parts[i].a[2], parts[i].h, parts[i].force.h_dt, - parts[i].wcount, + parts[i].density.wcount, parts[i].mass, parts[i].rho, parts[i].rho_dh, parts[i].density.div_v, diff --git a/src/engine.c b/src/engine.c index 07a9d51e0099e3a34358b0879c70b611caae5728..5d23f431b9ee657cd857118b2cd59de6d842333f 100644 --- a/src/engine.c +++ b/src/engine.c @@ -164,7 +164,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { int j, k; struct engine *e = (struct engine *)data; float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; - float dt_min, dt_max, h_max, dx_max, a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3]; + float dt_min, dt_max, h_max, dx, h2dx_max, dx_max, a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3]; double x[3], x_old[3]; struct part *restrict p; struct xpart *restrict xp; @@ -178,6 +178,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { dt_max = 0.0f; h_max = 0.0f; dx_max = 0.0f; + h2dx_max = 0.0f; /* Loop over parts. */ for ( k = 0 ; k < c->count ; k++ ) { @@ -212,9 +213,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) { p->x[0] = x[0] += dt * v_old[0]; p->x[1] = x[1] += dt * v_old[1]; p->x[2] = x[2] += dt * v_old[2]; - dx_max = fmaxf( dx_max , sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) + - (x[1] - x_old[1])*(x[1] - x_old[1]) + - (x[2] - x_old[2])*(x[2] - x_old[2]) )*2 + h ); + dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) + + (x[1] - x_old[1])*(x[1] - x_old[1]) + + (x[2] - x_old[2])*(x[2] - x_old[2]) ); + dx_max = fmaxf( dx_max , dx ); + h2dx_max = fmaxf( h2dx_max , dx*2 + h ); /* Update positions and energies at the half-step. */ p->v[0] = v[0] += dt * a[0]; @@ -236,11 +239,10 @@ void engine_map_kick_first ( struct cell *c , void *data ) { /* Init fields for density calculation. */ if ( pdt > dt_step ) { rho = p->rho *= expf( -3.0f * h_dt / h * dt ); - // rho = p->rho += cbrt( h_dt ) * dt; p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + h * p->rho_dh / 3.0f ); } else { - p->wcount = 0.0f; + p->density.wcount = 0.0f; p->density.wcount_dh = 0.0f; p->rho = 0.0f; p->rho_dh = 0.0f; @@ -262,6 +264,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { dt_max = c->progeny[k]->dt_max; h_max = c->progeny[k]->h_max; dx_max = c->progeny[k]->dx_max; + h2dx_max = c->progeny[k]->h2dx_max; /* Loop over the remaining progeny. */ for ( k += 1 ; k < 8 ; k++ ) @@ -270,6 +273,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { dt_max = fmaxf( dt_max , c->progeny[k]->dt_max ); h_max = fmaxf( h_max , c->progeny[k]->h_max ); dx_max = fmaxf( dx_max , c->progeny[k]->dx_max ); + h2dx_max = fmaxf( h2dx_max , c->progeny[k]->h2dx_max ); } } @@ -279,7 +283,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { c->dt_max = dt_max; c->h_max = h_max; c->dx_max = dx_max; - c->sorted = 0; + c->h2dx_max = h2dx_max; /* Clean out the task pointers. */ // c->sorts[0] = NULL; @@ -340,7 +344,7 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__ /* Dump the result. */ ihg = kernel_igamma / p.h; - printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) ); + printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.density.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) ); fflush(stdout); } @@ -365,6 +369,8 @@ void engine_step ( struct engine *e , int sort_queues ) { double lang[3], ang[3] = { 0.0 , 0.0 , 0.0 }; // double lent, ent = 0.0; int threadID, nthreads, count = 0, lcount; + + TIMER_TIC2 /* Get the maximum dt. */ dt_step = 2.0f*dt; @@ -375,7 +381,7 @@ void engine_step ( struct engine *e , int sort_queues ) { /* Set the maximum dt. */ e->dt_step = dt_step; e->s->dt_step = dt_step; - printf( "engine_step: dt_step set to %.3e.\n" , dt_step ); fflush(stdout); + printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout); // printParticle( parts , 432626 ); @@ -415,7 +421,7 @@ void engine_step ( struct engine *e , int sort_queues ) { error( "Error while waiting for barrier." ); /* Stop the clock. */ - TIMER_TOC(timer_step); + TIMER_TOC(timer_runners); // for(k=0; k<10; ++k) // printParticle(parts, k); @@ -523,6 +529,8 @@ void engine_step ( struct engine *e , int sort_queues ) { e->step /= 2; printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt ); } + + TIMER_TOC2(timer_step); } @@ -533,12 +541,13 @@ void engine_step ( struct engine *e , int sort_queues ) { * * @param e The #engine. * @param s The #space in which this #runner will run. + * @param dt The initial time step to use. * @param nr_threads The number of threads to spawn. * @param nr_queues The number of task queues to create. * @param policy The queueing policy to use. */ -void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy ) { +void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy ) { #if defined(HAVE_SETAFFINITY) cpu_set_t cpuset; @@ -550,7 +559,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_ e->nr_threads = nr_threads; e->nr_queues = nr_queues; e->policy = policy; - e->dt_min = 0.0f; e->step = 0; /* First of all, init the barrier and lock it. */ @@ -562,6 +570,13 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_ error( "Failed to lock barrier mutex." ); e->barrier_count = 0; + /* Run through the parts and get the minimum time step. */ + for ( k = 0 ; k < s->nr_parts ; k++ ) + if ( s->parts[k].dt > 0.0f ) + while ( s->parts[k].dt < dt ) + dt *= 0.5; + e->dt_min = dt; + /* Allocate the queues. */ if ( posix_memalign( (void *)(&e->queues) , 64 , nr_queues * sizeof(struct queue) ) != 0 ) error( "Failed to allocate queues." ); diff --git a/src/engine.h b/src/engine.h index f872f3cef703dfc58b4ed6dddfc365cc9268cfe3..cb96b81510da307fbe0b4c8a55aa5f058041a766 100644 --- a/src/engine.h +++ b/src/engine.h @@ -72,6 +72,6 @@ struct engine { /* Function prototypes. */ void engine_barrier( struct engine *e ); -void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy ); +void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy ); void engine_prepare ( struct engine *e ); void engine_step ( struct engine *e , int sort_queues ); diff --git a/src/part.h b/src/part.h index c90d0fe2717200f9290f34c5c1cf0479f7bf1dea..0341fec1e2db3399179f613fa16e3ba246147146 100644 --- a/src/part.h +++ b/src/part.h @@ -57,11 +57,23 @@ struct xpart { /* Data of a single particle. */ struct part { + /* Particle acceleration. */ + float a[3]; + /* Particle velocity. */ float v[3]; - /* Particle mass. */ - float mass; + /* Particle position. */ + double x[3]; + + /* Particle cutoff radius. */ + float h; + + /* Particle time-step. */ + float dt; + + /* Particle internal energy. */ + float u; /* Particle density. */ float rho; @@ -83,6 +95,9 @@ struct part { /* Particle velocity curl. */ float curl_v[3]; + /* Particle number density. */ + float wcount; + } density; struct { @@ -112,14 +127,8 @@ struct part { /* Particle pressure. */ // float P; - /* Particle acceleration. */ - float a[3]; - - /* Particle number density. */ - float wcount; - - /* Particle internal energy. */ - float u; + /* Particle mass. */ + float mass; /* Particle ID. */ unsigned long long id; @@ -127,15 +136,6 @@ struct part { /* Pointer to extra particle data. */ struct xpart *xtras; - /* Particle position. */ - double x[3]; - - /* Particle cutoff radius. */ - float h; - - /* Particle time-step. */ - float dt; - } __attribute__((aligned (32))); diff --git a/src/runner.c b/src/runner.c index 6a74534237ebd6d034a3a4707e84993975018b5c..8fa2d2fbede7e343b53e69e1f15f2966b09780b5 100644 --- a/src/runner.c +++ b/src/runner.c @@ -165,9 +165,11 @@ void runner_dosort_ascending ( struct entry *sort , int N ) { * @param r The #runner. * @param c The #cell. * @param flags Cell flag. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. */ -void runner_dosort ( struct runner *r , struct cell *c , int flags ) { +void runner_dosort ( struct runner *r , struct cell *c , int flags , int clock ) { struct entry *finger; struct entry *fingers[8]; @@ -176,8 +178,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { int i, ind, off[8], inds[8], temp_i, missing; // float shift[3]; float buff[8], px[3]; + TIMER_TIC + /* Clean-up the flags, i.e. filter out what's already been sorted. */ + flags &= ~c->sorted; + if ( flags == 0 ) + return; + /* start by allocating the entry arrays. */ if ( lock_lock( &c->lock ) != 0 ) error( "Failed to lock cell." ); @@ -203,7 +211,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { else missing = ( c->progeny[k]->sorts->flags ^ flags ) & flags; if ( missing ) - runner_dosort( r , c->progeny[k] , missing ); + runner_dosort( r , c->progeny[k] , missing , 0 ); } /* Loop over the 13 different sort arrays. */ @@ -314,7 +322,8 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { (flags & 0x1000) >> 12 , (flags & 0x800) >> 11 , (flags & 0x400) >> 10 , (flags & 0x200) >> 9 , (flags & 0x100) >> 8 , (flags & 0x80) >> 7 , (flags & 0x40) >> 6 , (flags & 0x20) >> 5 , (flags & 0x10) >> 4 , (flags & 0x8) >> 3 , (flags & 0x4) >> 2 , (flags & 0x2) >> 1 , (flags & 0x1) >> 0 , ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000 ); fflush(stdout); #else - TIMER_TOC(timer_dosort); + if ( clock ) + TIMER_TOC(timer_dosort); #endif } @@ -377,31 +386,35 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ihg4 = ihg2 * ihg2; /* Adjust the computed weighted number of neighbours. */ - p->wcount += kernel_wroot; + p->density.wcount += kernel_wroot; /* Final operation on the density. */ p->rho = ihg * ihg2 * ( p->rho + p->mass*kernel_root ); p->rho_dh *= ihg4; /* Compute the smoothing length update (Newton step). */ - h_corr = ( const_nwneigh - p->wcount ) / p->density.wcount_dh; - - /* Truncate to the range [ -p->h/2 , p->h ]. */ - h_corr = fminf( h_corr , h ); - h_corr = fmaxf( h_corr , -h/2 ); + if ( p->density.wcount_dh == 0.0f ) + h_corr = p->h; + else { + h_corr = ( const_nwneigh - p->density.wcount ) / p->density.wcount_dh; + + /* Truncate to the range [ -p->h/2 , p->h ]. */ + h_corr = fminf( h_corr , h ); + h_corr = fmaxf( h_corr , -h/2 ); + } /* Apply the correction to p->h. */ p->h += h_corr; cp->h = p->h; /* Did we get the right number density? */ - if ( p->wcount > const_nwneigh + const_delta_nwneigh || - p->wcount < const_nwneigh - const_delta_nwneigh ) { - // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->wcount ); fflush(stdout); - // p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh; + if ( p->density.wcount > const_nwneigh + const_delta_nwneigh || + p->density.wcount < const_nwneigh - const_delta_nwneigh ) { + // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->density.wcount ); fflush(stdout); + // p->h += ( p->density.wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh; pid[redo] = pid[i]; redo += 1; - p->wcount = 0.0; + p->density.wcount = 0.0; p->density.wcount_dh = 0.0; p->rho = 0.0; p->rho_dh = 0.0; @@ -632,7 +645,7 @@ void *runner_main ( void *data ) { cell_unlocktree( cj ); break; case task_type_sort: - runner_dosort( r , ci , t->flags ); + runner_dosort( r , ci , t->flags , 1 ); break; case task_type_sub: if ( t->subtype == task_subtype_density ) diff --git a/src/runner.h b/src/runner.h index cf904a8e1ab926f024192279adc58e31aa37dfb2..f9a8cfd6b07b5b6a454f4765bd922be315ea39a0 100644 --- a/src/runner.h +++ b/src/runner.h @@ -81,5 +81,5 @@ void runner_doghost ( struct runner *r , struct cell *c ); void runner_dopair_density ( struct runner *r , struct cell *ci , struct cell *cj ); void runner_doself_density ( struct runner *r , struct cell *c ); void runner_dosub_density ( struct runner *r , struct cell *ci , struct cell *cj , int flags ); -void runner_dosort ( struct runner *r , struct cell *c , int flag ); +void runner_dosort ( struct runner *r , struct cell *c , int flag , int clock ); void *runner_main ( void *data ); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 15e079f6001abb4ece915eed8e4a058740c50b0e..45c03f597ad8555f8c835156e1628db252601c11 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -328,7 +328,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * struct part *restrict pi, *restrict parts_j = cj->parts; struct cpart *restrict cpj, *restrict cparts_j = cj->cparts; double pix[3]; - float dx[3], hi, hi2, r2, di; + float dx[3], hi, hi2, r2, di, dxj; struct entry *sort_j; #ifdef VECTORIZE int icount = 0; @@ -363,6 +363,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * /* Pick-out the sorted lists. */ sort_j = &cj->sort[ sid*(cj->count + 1) ]; + dxj = cj->dx_max; /* Parts are on the left? */ if ( !flipped ) { @@ -376,7 +377,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * pix[k] = pi->x[k] - shift[k]; hi = pi->h; hi2 = hi * hi; - di = hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; + di = hi + dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; /* Loop over the parts in cj. */ for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) { @@ -439,7 +440,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * pix[k] = pi->x[k] - shift[k]; hi = pi->h; hi2 = hi * hi; - di = -hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; + di = -hi - dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; /* Loop over the parts in cj. */ for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) { @@ -633,7 +634,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { struct cpart *restrict cpi, *restrict cparts_i; struct cpart *restrict cpj, *restrict cparts_j; double pix[3], pjx[3], di, dj; - float dx[3], hi, hi2, hj, hj2, r2; + float dx[3], hi, hi2, hj, hj2, r2, dx_max; double hi_max, hj_max; double di_max, dj_min; int count_i, count_j; @@ -674,13 +675,14 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { cparts_i = ci->cparts; cparts_j = cj->cparts; di_max = sort_i[count_i-1].d - rshift; dj_min = sort_j[0].d; + dx_max = ( ci->dx_max + cj->dx_max ); /* if ( ci->split && cj->split && sid == 4 ) printf( "boing!\n" ); */ /* Loop over the parts in ci. */ - for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) { + for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min ; pid-- ) { /* Get a hold of the ith part in ci. */ pi = &parts_i[ sort_i[ pid ].i ]; @@ -688,7 +690,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { if ( cpi->dt > dt_step ) continue; hi = cpi->h; - di = sort_i[pid].d + hi - rshift; + di = sort_i[pid].d + hi + dx_max - rshift; if ( di < dj_min ) continue; @@ -747,7 +749,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { tic = getticks(); */ /* Loop over the parts in cj. */ - for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) { + for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max ; pjd++ ) { /* Get a hold of the jth part in cj. */ pj = &parts_j[ sort_j[ pjd ].i ]; @@ -755,7 +757,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { if ( cpj->dt > dt_step ) continue; hj = cpj->h; - dj = sort_j[pjd].d - hj - rshift; + dj = sort_j[pjd].d - hj - dx_max - rshift; if ( dj > di_max ) continue; @@ -838,7 +840,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { struct cpart *restrict cpi, *restrict cparts_i; struct cpart *restrict cpj, *restrict cparts_j; double pix[3], pjx[3], di, dj; - float dx[3], hi, hi2, hj, hj2, r2; + float dx[3], hi, hi2, hj, hj2, r2, dx_max; double hi_max, hj_max; double di_max, dj_min; int count_i, count_j; @@ -885,6 +887,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { cparts_i = ci->cparts; cparts_j = cj->cparts; di_max = sort_i[count_i-1].d - rshift; dj_min = sort_j[0].d; + dx_max = ( ci->dx_max + cj->dx_max ); /* Collect the number of parts left and right below dt. */ if ( ci->dt_max <= dt_step ) { @@ -915,13 +918,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Loop over the parts in ci. */ - for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) { + for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min ; pid-- ) { /* Get a hold of the ith part in ci. */ pi = &parts_i[ sort_i[ pid ].i ]; cpi = &cparts_i[ sort_i[ pid ].i ]; hi = cpi->h; - di = sort_i[pid].d + hi - rshift; + di = sort_i[pid].d + hi + dx_max - rshift; if ( di < dj_min ) continue; @@ -1065,13 +1068,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { tic = getticks(); */ /* Loop over the parts in cj. */ - for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) { + for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max ; pjd++ ) { /* Get a hold of the jth part in cj. */ pj = &parts_j[ sort_j[ pjd ].i ]; cpj = &cparts_j[ sort_j[ pjd ].i ]; hj = cpj->h; - dj = sort_j[pjd].d - hj - rshift; + dj = sort_j[pjd].d - hj - dx_max - rshift; if ( dj > di_max ) continue; diff --git a/src/runner_iact.h b/src/runner_iact.h index 5ac5fd6c87d01ba3c07200e3a614d10a4444afd2..79708a311724959a9202ccd009a1b02f5cfc5659 100644 --- a/src/runner_iact.h +++ b/src/runner_iact.h @@ -68,7 +68,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r pi->rho += pj->mass * wi; pi->rho_dh -= pj->mass * ( 3.0*wi + xi*wi_dx ); - pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); + pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); // pi->icount += 1; @@ -87,7 +87,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r pj->rho += pi->mass * wj; pj->rho_dh -= pi->mass * ( 3.0*wj + xj*wj_dx ); - pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); + pj->density.wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pj->density.wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); // pj->icount += 1; @@ -192,14 +192,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo for ( k = 0 ; k < VEC_SIZE ; k++ ) { pi[k]->rho += rhoi.f[k]; pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->wcount += wcounti.f[k]; + pi[k]->density.wcount += wcounti.f[k]; pi[k]->density.wcount_dh -= wcounti_dh.f[k]; pi[k]->density.div_v += div_vi.f[k]; for( j = 0 ; j < 3 ; j++ ) pi[k]->density.curl_v[j] += curl_vi[j].f[k]; pj[k]->rho += rhoj.f[k]; pj[k]->rho_dh -= rhoj_dh.f[k]; - pj[k]->wcount += wcountj.f[k]; + pj[k]->density.wcount += wcountj.f[k]; pj[k]->density.wcount_dh -= wcountj_dh.f[k]; pj[k]->density.div_v += div_vj.f[k]; for( j = 0 ; j < 3 ; j++ ) @@ -251,7 +251,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density ( pi->rho += pj->mass * wi; pi->rho_dh -= pj->mass * ( 3.0*wi + xi*wi_dx ); - pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); + pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); // pi->icount += 1; @@ -329,7 +329,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit for ( k = 0 ; k < VEC_SIZE ; k++ ) { pi[k]->rho += rhoi.f[k]; pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->wcount += wcounti.f[k]; + pi[k]->density.wcount += wcounti.f[k]; pi[k]->density.wcount_dh -= wcounti_dh.f[k]; pi[k]->density.div_v += div_vi.f[k]; for( j = 0 ; j < 3 ; j++ ) diff --git a/src/space.c b/src/space.c index 61d278a4c1274aa0bc07dbbde7db0f9a73ab3bae..f5ae3c8becd35dbbec5ee5c2946200683f505b18 100644 --- a/src/space.c +++ b/src/space.c @@ -133,7 +133,7 @@ int space_marktasks ( struct space *s ) { /* Too much particle movement? */ if ( !t->skip && t->tight && - ( t->ci->dx_max > t->ci->dmin || t->cj->dx_max > t->cj->dmin ) ) + ( t->ci->h2dx_max > t->ci->dmin || t->cj->h2dx_max > t->cj->dmin ) ) return 1; /* Set the sort flags. */ @@ -176,94 +176,6 @@ int space_marktasks ( struct space *s ) { } -/** - * @brief Mapping function to set dt_min and dt_max. - */ - -void space_map_prepare ( struct cell *c , void *data ) { - - int k; - float dt_min, dt_max, h_max, dx_max; - struct part *restrict p; - struct xpart *restrict xp; - struct cpart *restrict cp; - - /* No children? */ - if ( !c->split ) { - - /* Init with first part. */ - p = &c->parts[0]; - xp = p->xtras; - cp = &c->cparts[0]; - - dt_min = p->dt; - dt_max = p->dt; - h_max = p->h; - dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + - (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + - (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h; - cp->x[0] = p->x[0]; - cp->x[1] = p->x[1]; - cp->x[2] = p->x[2]; - cp->h = p->h; - cp->dt = p->dt; - - /* Loop over parts. */ - for ( k = 1 ; k < c->count ; k++ ) { - p = &c->parts[k]; - xp = p->xtras; - cp = &c->cparts[k]; - dt_min = fminf( dt_min , p->dt ); - dt_max = fmaxf( dt_max , p->dt ); - h_max = fmaxf( h_max , p->h ); - dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + - (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + - (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h ); - cp->x[0] = p->x[0]; - cp->x[1] = p->x[1]; - cp->x[2] = p->x[2]; - cp->h = p->h; - cp->dt = p->dt; - } - - } - - /* Otherwise, agregate from children. */ - else { - - /* Init with the first non-null child. */ - for ( k = 0 ; c->progeny[k] == NULL ; k++ ); - dt_min = c->progeny[k]->dt_min; - dt_max = c->progeny[k]->dt_max; - h_max = c->progeny[k]->h_max; - dx_max = c->progeny[k]->dx_max; - - /* Loop over the remaining progeny. */ - for ( k += 1 ; k < 8 ; k++ ) - if ( c->progeny[k] != NULL ) { - dt_min = fminf( dt_min , c->progeny[k]->dt_min ); - dt_max = fmaxf( dt_max , c->progeny[k]->dt_max ); - h_max = fmaxf( h_max , c->progeny[k]->h_max ); - dx_max = fmaxf( dx_max , c->progeny[k]->dx_max ); - } - - } - - /* Store the values. */ - c->dt_min = dt_min; - c->dt_max = dt_max; - c->h_max = h_max; - c->dx_max = dx_max; - c->sorted = 0; - - /* Clean out the task pointers. */ - // c->sorts = NULL; - // c->nr_tasks = 0; - // c->nr_density = 0; - - } - - /** * @brief Check the integrity of the space and rebuild if necessary. * @@ -669,6 +581,8 @@ void space_rebuild ( struct space *s , double cell_max ) { s->cells[k].nr_tasks = 0; s->cells[k].nr_density = 0; s->cells[k].dx_max = 0.0f; + s->cells[k].h2dx_max = 0.0f; + s->cells[k].sorted = 0; } s->maxdepth = 0; diff --git a/src/space.h b/src/space.h index 8039411cafdd495b5efd96fe6e945703d7ff7e4c..c94bd86129a3ed425698c5b46893d2822cc54a45 100644 --- a/src/space.h +++ b/src/space.h @@ -26,7 +26,7 @@ #define space_splitratio 0.875 #define space_splitsize_default 400 #define space_subsize_default 1000 -#define space_dosub 1 +#define space_dosub 0 #define space_stretch 1.05 #define space_maxtaskspercell 30 @@ -64,6 +64,9 @@ struct space { /* Current time step for particles. */ float dt_step; + /* Current maximum displacement for particles. */ + float dx_max; + /* Number of cells. */ int nr_cells, tot_cells; diff --git a/src/timers.h b/src/timers.h index 3f109ae8077813631a0f8979c7cb5f3970ab8199..a2e50a07371f0cccedccd4086b5c06ea03c5a9e7 100644 --- a/src/timers.h +++ b/src/timers.h @@ -37,6 +37,7 @@ enum { timer_getpair, timer_steal, timer_stalled, + timer_runners, timer_step, timer_count, };