Commit d48d9285 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

switch to using pseudo-verlet lists, i.e. sorts are only re-computed when the grid is rebuilt.


Former-commit-id: 07817db9de2a4f724322d80ffaae29480835f556
parent 2c057c42
......@@ -51,7 +51,7 @@ struct cell {
float slack;
/* Maximum particle movement in this cell. */
float dx_max;
float dx_max, h2dx_max;
/* The depth of this cell in the tree. */
int depth, split;
......
......@@ -49,7 +49,7 @@ void printParticle ( struct part *parts , long long int id, int N ) {
parts[i].a[0], parts[i].a[1], parts[i].a[2],
parts[i].h,
parts[i].force.h_dt,
parts[i].wcount,
parts[i].density.wcount,
parts[i].mass,
parts[i].rho, parts[i].rho_dh,
parts[i].density.div_v,
......
......@@ -164,7 +164,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
int j, k;
struct engine *e = (struct engine *)data;
float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
float dt_min, dt_max, h_max, dx_max, a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3];
float dt_min, dt_max, h_max, dx, h2dx_max, dx_max, a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3];
double x[3], x_old[3];
struct part *restrict p;
struct xpart *restrict xp;
......@@ -178,6 +178,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
dt_max = 0.0f;
h_max = 0.0f;
dx_max = 0.0f;
h2dx_max = 0.0f;
/* Loop over parts. */
for ( k = 0 ; k < c->count ; k++ ) {
......@@ -212,9 +213,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
p->x[0] = x[0] += dt * v_old[0];
p->x[1] = x[1] += dt * v_old[1];
p->x[2] = x[2] += dt * v_old[2];
dx_max = fmaxf( dx_max , sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
(x[1] - x_old[1])*(x[1] - x_old[1]) +
(x[2] - x_old[2])*(x[2] - x_old[2]) )*2 + h );
dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
(x[1] - x_old[1])*(x[1] - x_old[1]) +
(x[2] - x_old[2])*(x[2] - x_old[2]) );
dx_max = fmaxf( dx_max , dx );
h2dx_max = fmaxf( h2dx_max , dx*2 + h );
/* Update positions and energies at the half-step. */
p->v[0] = v[0] += dt * a[0];
......@@ -236,11 +239,10 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
/* Init fields for density calculation. */
if ( pdt > dt_step ) {
rho = p->rho *= expf( -3.0f * h_dt / h * dt );
// rho = p->rho += cbrt( h_dt ) * dt;
p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + h * p->rho_dh / 3.0f );
}
else {
p->wcount = 0.0f;
p->density.wcount = 0.0f;
p->density.wcount_dh = 0.0f;
p->rho = 0.0f;
p->rho_dh = 0.0f;
......@@ -262,6 +264,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
dt_max = c->progeny[k]->dt_max;
h_max = c->progeny[k]->h_max;
dx_max = c->progeny[k]->dx_max;
h2dx_max = c->progeny[k]->h2dx_max;
/* Loop over the remaining progeny. */
for ( k += 1 ; k < 8 ; k++ )
......@@ -270,6 +273,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
dt_max = fmaxf( dt_max , c->progeny[k]->dt_max );
h_max = fmaxf( h_max , c->progeny[k]->h_max );
dx_max = fmaxf( dx_max , c->progeny[k]->dx_max );
h2dx_max = fmaxf( h2dx_max , c->progeny[k]->h2dx_max );
}
}
......@@ -279,7 +283,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
c->dt_max = dt_max;
c->h_max = h_max;
c->dx_max = dx_max;
c->sorted = 0;
c->h2dx_max = h2dx_max;
/* Clean out the task pointers. */
// c->sorts[0] = NULL;
......@@ -340,7 +344,7 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__
/* Dump the result. */
ihg = kernel_igamma / p.h;
printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) );
printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.density.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) );
fflush(stdout);
}
......@@ -365,6 +369,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
double lang[3], ang[3] = { 0.0 , 0.0 , 0.0 };
// double lent, ent = 0.0;
int threadID, nthreads, count = 0, lcount;
TIMER_TIC2
/* Get the maximum dt. */
dt_step = 2.0f*dt;
......@@ -375,7 +381,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
/* Set the maximum dt. */
e->dt_step = dt_step;
e->s->dt_step = dt_step;
printf( "engine_step: dt_step set to %.3e.\n" , dt_step ); fflush(stdout);
printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout);
// printParticle( parts , 432626 );
......@@ -415,7 +421,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
error( "Error while waiting for barrier." );
/* Stop the clock. */
TIMER_TOC(timer_step);
TIMER_TOC(timer_runners);
// for(k=0; k<10; ++k)
// printParticle(parts, k);
......@@ -523,6 +529,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
e->step /= 2;
printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
}
TIMER_TOC2(timer_step);
}
......@@ -533,12 +541,13 @@ void engine_step ( struct engine *e , int sort_queues ) {
*
* @param e The #engine.
* @param s The #space in which this #runner will run.
* @param dt The initial time step to use.
* @param nr_threads The number of threads to spawn.
* @param nr_queues The number of task queues to create.
* @param policy The queueing policy to use.
*/
void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy ) {
void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy ) {
#if defined(HAVE_SETAFFINITY)
cpu_set_t cpuset;
......@@ -550,7 +559,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
e->nr_threads = nr_threads;
e->nr_queues = nr_queues;
e->policy = policy;
e->dt_min = 0.0f;
e->step = 0;
/* First of all, init the barrier and lock it. */
......@@ -562,6 +570,13 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
error( "Failed to lock barrier mutex." );
e->barrier_count = 0;
/* Run through the parts and get the minimum time step. */
for ( k = 0 ; k < s->nr_parts ; k++ )
if ( s->parts[k].dt > 0.0f )
while ( s->parts[k].dt < dt )
dt *= 0.5;
e->dt_min = dt;
/* Allocate the queues. */
if ( posix_memalign( (void *)(&e->queues) , 64 , nr_queues * sizeof(struct queue) ) != 0 )
error( "Failed to allocate queues." );
......
......@@ -72,6 +72,6 @@ struct engine {
/* Function prototypes. */
void engine_barrier( struct engine *e );
void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy );
void engine_prepare ( struct engine *e );
void engine_step ( struct engine *e , int sort_queues );
......@@ -57,11 +57,23 @@ struct xpart {
/* Data of a single particle. */
struct part {
/* Particle acceleration. */
float a[3];
/* Particle velocity. */
float v[3];
/* Particle mass. */
float mass;
/* Particle position. */
double x[3];
/* Particle cutoff radius. */
float h;
/* Particle time-step. */
float dt;
/* Particle internal energy. */
float u;
/* Particle density. */
float rho;
......@@ -83,6 +95,9 @@ struct part {
/* Particle velocity curl. */
float curl_v[3];
/* Particle number density. */
float wcount;
} density;
struct {
......@@ -112,14 +127,8 @@ struct part {
/* Particle pressure. */
// float P;
/* Particle acceleration. */
float a[3];
/* Particle number density. */
float wcount;
/* Particle internal energy. */
float u;
/* Particle mass. */
float mass;
/* Particle ID. */
unsigned long long id;
......@@ -127,15 +136,6 @@ struct part {
/* Pointer to extra particle data. */
struct xpart *xtras;
/* Particle position. */
double x[3];
/* Particle cutoff radius. */
float h;
/* Particle time-step. */
float dt;
} __attribute__((aligned (32)));
......@@ -165,9 +165,11 @@ void runner_dosort_ascending ( struct entry *sort , int N ) {
* @param r The #runner.
* @param c The #cell.
* @param flags Cell flag.
* @param clock Flag indicating whether to record the timing or not, needed
* for recursive calls.
*/
void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
void runner_dosort ( struct runner *r , struct cell *c , int flags , int clock ) {
struct entry *finger;
struct entry *fingers[8];
......@@ -176,8 +178,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
int i, ind, off[8], inds[8], temp_i, missing;
// float shift[3];
float buff[8], px[3];
TIMER_TIC
/* Clean-up the flags, i.e. filter out what's already been sorted. */
flags &= ~c->sorted;
if ( flags == 0 )
return;
/* start by allocating the entry arrays. */
if ( lock_lock( &c->lock ) != 0 )
error( "Failed to lock cell." );
......@@ -203,7 +211,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
else
missing = ( c->progeny[k]->sorts->flags ^ flags ) & flags;
if ( missing )
runner_dosort( r , c->progeny[k] , missing );
runner_dosort( r , c->progeny[k] , missing , 0 );
}
/* Loop over the 13 different sort arrays. */
......@@ -314,7 +322,8 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
(flags & 0x1000) >> 12 , (flags & 0x800) >> 11 , (flags & 0x400) >> 10 , (flags & 0x200) >> 9 , (flags & 0x100) >> 8 , (flags & 0x80) >> 7 , (flags & 0x40) >> 6 , (flags & 0x20) >> 5 , (flags & 0x10) >> 4 , (flags & 0x8) >> 3 , (flags & 0x4) >> 2 , (flags & 0x2) >> 1 , (flags & 0x1) >> 0 ,
((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000 ); fflush(stdout);
#else
TIMER_TOC(timer_dosort);
if ( clock )
TIMER_TOC(timer_dosort);
#endif
}
......@@ -377,31 +386,35 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
ihg4 = ihg2 * ihg2;
/* Adjust the computed weighted number of neighbours. */
p->wcount += kernel_wroot;
p->density.wcount += kernel_wroot;
/* Final operation on the density. */
p->rho = ihg * ihg2 * ( p->rho + p->mass*kernel_root );
p->rho_dh *= ihg4;
/* Compute the smoothing length update (Newton step). */
h_corr = ( const_nwneigh - p->wcount ) / p->density.wcount_dh;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr = fminf( h_corr , h );
h_corr = fmaxf( h_corr , -h/2 );
if ( p->density.wcount_dh == 0.0f )
h_corr = p->h;
else {
h_corr = ( const_nwneigh - p->density.wcount ) / p->density.wcount_dh;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr = fminf( h_corr , h );
h_corr = fmaxf( h_corr , -h/2 );
}
/* Apply the correction to p->h. */
p->h += h_corr;
cp->h = p->h;
/* Did we get the right number density? */
if ( p->wcount > const_nwneigh + const_delta_nwneigh ||
p->wcount < const_nwneigh - const_delta_nwneigh ) {
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->wcount ); fflush(stdout);
// p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
if ( p->density.wcount > const_nwneigh + const_delta_nwneigh ||
p->density.wcount < const_nwneigh - const_delta_nwneigh ) {
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->density.wcount ); fflush(stdout);
// p->h += ( p->density.wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
pid[redo] = pid[i];
redo += 1;
p->wcount = 0.0;
p->density.wcount = 0.0;
p->density.wcount_dh = 0.0;
p->rho = 0.0;
p->rho_dh = 0.0;
......@@ -632,7 +645,7 @@ void *runner_main ( void *data ) {
cell_unlocktree( cj );
break;
case task_type_sort:
runner_dosort( r , ci , t->flags );
runner_dosort( r , ci , t->flags , 1 );
break;
case task_type_sub:
if ( t->subtype == task_subtype_density )
......
......@@ -81,5 +81,5 @@ void runner_doghost ( struct runner *r , struct cell *c );
void runner_dopair_density ( struct runner *r , struct cell *ci , struct cell *cj );
void runner_doself_density ( struct runner *r , struct cell *c );
void runner_dosub_density ( struct runner *r , struct cell *ci , struct cell *cj , int flags );
void runner_dosort ( struct runner *r , struct cell *c , int flag );
void runner_dosort ( struct runner *r , struct cell *c , int flag , int clock );
void *runner_main ( void *data );
......@@ -328,7 +328,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
struct part *restrict pi, *restrict parts_j = cj->parts;
struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
double pix[3];
float dx[3], hi, hi2, r2, di;
float dx[3], hi, hi2, r2, di, dxj;
struct entry *sort_j;
#ifdef VECTORIZE
int icount = 0;
......@@ -363,6 +363,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
/* Pick-out the sorted lists. */
sort_j = &cj->sort[ sid*(cj->count + 1) ];
dxj = cj->dx_max;
/* Parts are on the left? */
if ( !flipped ) {
......@@ -376,7 +377,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
pix[k] = pi->x[k] - shift[k];
hi = pi->h;
hi2 = hi * hi;
di = hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
di = hi + dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) {
......@@ -439,7 +440,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
pix[k] = pi->x[k] - shift[k];
hi = pi->h;
hi2 = hi * hi;
di = -hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
di = -hi - dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
/* Loop over the parts in cj. */
for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) {
......@@ -633,7 +634,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
struct cpart *restrict cpi, *restrict cparts_i;
struct cpart *restrict cpj, *restrict cparts_j;
double pix[3], pjx[3], di, dj;
float dx[3], hi, hi2, hj, hj2, r2;
float dx[3], hi, hi2, hj, hj2, r2, dx_max;
double hi_max, hj_max;
double di_max, dj_min;
int count_i, count_j;
......@@ -674,13 +675,14 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
cparts_i = ci->cparts; cparts_j = cj->cparts;
di_max = sort_i[count_i-1].d - rshift;
dj_min = sort_j[0].d;
dx_max = ( ci->dx_max + cj->dx_max );
/* if ( ci->split && cj->split && sid == 4 )
printf( "boing!\n" ); */
/* Loop over the parts in ci. */
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min ; pid-- ) {
/* Get a hold of the ith part in ci. */
pi = &parts_i[ sort_i[ pid ].i ];
......@@ -688,7 +690,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
if ( cpi->dt > dt_step )
continue;
hi = cpi->h;
di = sort_i[pid].d + hi - rshift;
di = sort_i[pid].d + hi + dx_max - rshift;
if ( di < dj_min )
continue;
......@@ -747,7 +749,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
tic = getticks(); */
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max ; pjd++ ) {
/* Get a hold of the jth part in cj. */
pj = &parts_j[ sort_j[ pjd ].i ];
......@@ -755,7 +757,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
if ( cpj->dt > dt_step )
continue;
hj = cpj->h;
dj = sort_j[pjd].d - hj - rshift;
dj = sort_j[pjd].d - hj - dx_max - rshift;
if ( dj > di_max )
continue;
......@@ -838,7 +840,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
struct cpart *restrict cpi, *restrict cparts_i;
struct cpart *restrict cpj, *restrict cparts_j;
double pix[3], pjx[3], di, dj;
float dx[3], hi, hi2, hj, hj2, r2;
float dx[3], hi, hi2, hj, hj2, r2, dx_max;
double hi_max, hj_max;
double di_max, dj_min;
int count_i, count_j;
......@@ -885,6 +887,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
cparts_i = ci->cparts; cparts_j = cj->cparts;
di_max = sort_i[count_i-1].d - rshift;
dj_min = sort_j[0].d;
dx_max = ( ci->dx_max + cj->dx_max );
/* Collect the number of parts left and right below dt. */
if ( ci->dt_max <= dt_step ) {
......@@ -915,13 +918,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
}
/* Loop over the parts in ci. */
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min ; pid-- ) {
/* Get a hold of the ith part in ci. */
pi = &parts_i[ sort_i[ pid ].i ];
cpi = &cparts_i[ sort_i[ pid ].i ];
hi = cpi->h;
di = sort_i[pid].d + hi - rshift;
di = sort_i[pid].d + hi + dx_max - rshift;
if ( di < dj_min )
continue;
......@@ -1065,13 +1068,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
tic = getticks(); */
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max ; pjd++ ) {
/* Get a hold of the jth part in cj. */
pj = &parts_j[ sort_j[ pjd ].i ];
cpj = &cparts_j[ sort_j[ pjd ].i ];
hj = cpj->h;
dj = sort_j[pjd].d - hj - rshift;
dj = sort_j[pjd].d - hj - dx_max - rshift;
if ( dj > di_max )
continue;
......
......@@ -68,7 +68,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pi->rho += pj->mass * wi;
pi->rho_dh -= pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
// pi->icount += 1;
......@@ -87,7 +87,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pj->rho += pi->mass * wj;
pj->rho_dh -= pi->mass * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->density.wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->density.wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
// pj->icount += 1;
......@@ -192,14 +192,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo
for ( k = 0 ; k < VEC_SIZE ; k++ ) {
pi[k]->rho += rhoi.f[k];
pi[k]->rho_dh -= rhoi_dh.f[k];
pi[k]->wcount += wcounti.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v += div_vi.f[k];
for( j = 0 ; j < 3 ; j++ )
pi[k]->density.curl_v[j] += curl_vi[j].f[k];
pj[k]->rho += rhoj.f[k];
pj[k]->rho_dh -= rhoj_dh.f[k];
pj[k]->wcount += wcountj.f[k];
pj[k]->density.wcount += wcountj.f[k];
pj[k]->density.wcount_dh -= wcountj_dh.f[k];
pj[k]->density.div_v += div_vj.f[k];
for( j = 0 ; j < 3 ; j++ )
......@@ -251,7 +251,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
pi->rho += pj->mass * wi;
pi->rho_dh -= pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
// pi->icount += 1;
......@@ -329,7 +329,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit
for ( k = 0 ; k < VEC_SIZE ; k++ ) {
pi[k]->rho += rhoi.f[k];
pi[k]->rho_dh -= rhoi_dh.f[k];
pi[k]->wcount += wcounti.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v += div_vi.f[k];
for( j = 0 ; j < 3 ; j++ )
......
......@@ -133,7 +133,7 @@ int space_marktasks ( struct space *s ) {
/* Too much particle movement? */
if ( !t->skip && t->tight &&
( t->ci->dx_max > t->ci->dmin || t->cj->dx_max > t->cj->dmin ) )
( t->ci->h2dx_max > t->ci->dmin || t->cj->h2dx_max > t->cj->dmin ) )
return 1;
/* Set the sort flags. */
......@@ -176,94 +176,6 @@ int space_marktasks ( struct space *s ) {
}
/**
* @brief Mapping function to set dt_min and dt_max.
*/
void space_map_prepare ( struct cell *c , void *data ) {
int k;
float dt_min, dt_max, h_max, dx_max;
struct part *restrict p;
struct xpart *restrict xp;
struct cpart *restrict cp;
/* No children? */
if ( !c->split ) {
/* Init with first part. */
p = &c->parts[0];
xp = p->xtras;
cp = &c->cparts[0];
dt_min = p->dt;
dt_max = p->dt;
h_max = p->h;
dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h;
cp->x[0] = p->x[0];
cp->x[1] = p->x[1];
cp->x[2] = p->x[2];
cp->h = p->h;
cp->dt = p->dt;
/* Loop over parts. */
for ( k = 1 ; k < c->count ; k++ ) {
p = &c->parts[k];
xp = p->xtras;
cp = &c->cparts[k];
dt_min = fminf( dt_min , p->dt );
dt_max = fmaxf( dt_max , p->dt );
h_max = fmaxf( h_max , p->h );
dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h );
cp->x[0] = p->x[0];
cp->x[1] = p->x[1];
cp->x[2] = p->x[2];
cp->h = p->h;
cp->dt = p->dt;
}
}
/* Otherwise, agregate from children. */
else {
/* Init with the first non-null child. */