Commit c207d539 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

fix bug in runner_doghost.


Former-commit-id: cbd37d27f2028c64a464e5251fb0cffe6077ae0b
parent f17cb9fc
......@@ -30,7 +30,7 @@ void printParticle ( struct part *parts , long long int id ) {
/* Look for the particle. */
for ( i = 0 ; parts[i].id != id ; i++ );
printf("## Particle[%d]: id=%lld, x=[%f,%f,%f], v=[%f,%f,%f], a=[%f,%f,%f], h=%.3e, h_dt=%.3e, wcount=%f, m=%.3e, rho=%f, u=%f, dudt=%f, dt=%.3e\n",
printf("## Particle[%d]: id=%lld, x=[%.3e,%.3e,%.3e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e, dt=%.3e\n",
i,
parts[i].id,
parts[i].x[0], parts[i].x[1], parts[i].x[2],
......@@ -40,9 +40,13 @@ void printParticle ( struct part *parts , long long int id ) {
parts[i].force.h_dt,
parts[i].wcount,
parts[i].mass,
parts[i].rho,
parts[i].rho, parts[i].rho_dh,
parts[i].density.div_v,
parts[i].u,
parts[i].force.u_dt,
parts[i].force.balsara,
parts[i].force.POrho2,
parts[i].force.v_sig,
parts[i].dt
);
}
......
......@@ -97,8 +97,6 @@ void engine_prepare ( struct engine *e ) {
continue;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 );
for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_cells[j]->wait , 1 );
}
// printf( "engine_prepare: preparing task dependencies took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
......@@ -278,6 +276,59 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
}
void engine_single2 ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) {
int i, k;
double r2, dx[3];
float fdx[3], ihg;
struct part p, p2;
/* Find "our" part. */
for ( k = 0 ; k < N && parts[k].id != pid ; k++ );
if ( k == N )
error( "Part not found." );
p = parts[k];
/* Clear accumulators. */
ihg = kernel_igamma / p.h;
p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f;
p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
/* Loop over all particle pairs (force). */
for ( k = 0 ; k < N ; k++ ) {
if ( parts[k].id == p.id )
continue;
for ( i = 0 ; i < 3 ; i++ ) {
dx[i] = p.x[i] - parts[k].x[i];
if ( periodic ) {
if ( dx[i] < -dim[i]/2 )
dx[i] += dim[i];
else if ( dx[i] > dim[i]/2 )
dx[i] -= dim[i];
}
fdx[i] = dx[i];
}
r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
if ( r2 < p.h*p.h || r2 < parts[k].h*parts[k].h ) {
p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f;
p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
p2 = parts[k];
runner_iact_nonsym_force( r2 , fdx , p.h , p2.h , &p , &p2 );
printf( "pairs_simple: interacting particles %lli and %lli (rho=%.3e,r=%e): a=[%.3e,%.3e,%.3e], u_dt=%.3e, h_dt=%.3e, v_sig=%.3e.\n" ,
pid , p2.id , p2.rho , sqrtf(r2) ,
p.a[0] , p.a[1] , p.a[2] ,
p.force.u_dt , p.force.h_dt , p.force.v_sig );
}
}
/* Dump the result. */
ihg = kernel_igamma / p.h;
printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) );
fflush(stdout);
}
/**
* @brief Let the #engine loose to compute the forces.
*
......@@ -307,6 +358,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
e->s->dt_step = dt_step;
printf( "engine_step: dt_step set to %.3e.\n" , dt_step ); fflush(stdout);
// printParticle( parts , 432626 );
/* First kick. */
TIMER_TIC
space_map_cells_post( e->s , 1 , &engine_map_kick_first , e );
......@@ -314,7 +367,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
// printParticle( parts , 432626 );
// printParticle( parts , 432628 );
/* Prepare the space. */
engine_prepare( e );
......@@ -346,12 +400,14 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
// engine_single2( e->s->dim , 432626 , e->s->parts , e->s->nr_parts , e->s->periodic );
// printParticle( parts , 432626 );
// printParticle( parts , 432628 );
/* Second kick. */
TIMER_TIC_ND
dt_min = FLT_MAX; dt_max = 0.0f;
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,lent,lekin,lepot,threadID,nthreads)
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,lent,lekin,lepot,threadID,nthreads)
{
threadID = omp_get_thread_num();
nthreads = omp_get_num_threads();
......@@ -372,7 +428,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
/* Update the particle's time step. */
p->dt = const_cfl * p->h / ( p->force.v_sig );
p->dt = const_cfl * p->h / ( p->force.v_sig );
/* Update positions and energies at the half-step. */
p->v[0] = xp->v_old[0] + hdt * p->a[0];
......@@ -393,13 +449,13 @@ void engine_step ( struct engine *e , int sort_queues ) {
lmom[1] += p->mass * p->v[1];
lmom[2] += p->mass * p->v[2];
/* Collect angular momentum */
lang[0] += p->mass * ( p->x[1]*p->v[2] - p->v[2]*p->x[1] );
lang[1] += p->mass * ( p->x[2]*p->v[0] - p->v[0]*p->x[2] );
lang[2] += p->mass * ( p->x[0]*p->v[1] - p->v[1]*p->x[0] );
/* Collect angular momentum */
lang[0] += p->mass * ( p->x[1]*p->v[2] - p->v[2]*p->x[1] );
lang[1] += p->mass * ( p->x[2]*p->v[0] - p->v[0]*p->x[2] );
lang[2] += p->mass * ( p->x[0]*p->v[1] - p->v[1]*p->x[0] );
/* Collect entropic function */
lent += p->u * pow( p->rho, 1.f-const_gamma );
/* Collect entropic function */
lent += p->u * pow( p->rho, 1.f-const_gamma );
}
#pragma omp critical
......@@ -412,7 +468,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
ang[0] += lang[0];
ang[1] += lang[1];
ang[2] += lang[2];
ent += (const_gamma -1.) * lent;
ent += (const_gamma -1.) * lent;
epot += lepot;
ekin += lekin;
count += lcount;
......@@ -420,6 +476,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
TIMER_TOC( timer_kick2 );
e->dt_min = dt_min;
// printParticle( parts , 432626 );
printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout);
printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout);
printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout);
......
......@@ -70,7 +70,7 @@ struct part {
float rho_dh;
/* Store density/force specific stuff. */
union {
// union {
struct {
......@@ -107,7 +107,7 @@ struct part {
} force;
};
// };
/* Particle pressure. */
// float P;
......
......@@ -334,7 +334,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
struct cell *finger;
int i, k, redo, count = c->count;
int *pid;
float ihg, ihg2, ihg4, h_corr;
float h, hg, ihg, ihg2, ihg4, h_corr;
float normDiv_v, normCurl_v;
float dt_step = r->e->dt_step;
TIMER_TIC
......@@ -368,21 +368,27 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Is this part within the timestep? */
if ( cp->dt <= dt_step ) {
/* Some smoothing length multiples. */
ihg = kernel_igamma / p->h;
h = p->h;
hg = kernel_gamma * h;
ihg = 1.0f / hg;
ihg2 = ihg * ihg;
ihg4 = ihg2 * ihg2;
/* Adjust the computed weighted number of neighbours. */
p->wcount += kernel_wroot;
/* Final operation on the density. */
p->rho = ihg * ihg2 * ( p->rho + p->mass*kernel_root );
p->rho_dh *= ihg4;
/* Compute the smoothing length update (Newton step). */
h_corr = ( const_nwneigh - p->wcount ) / p->density.wcount_dh;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr = fminf( h_corr , p->h );
h_corr = fmaxf( h_corr , -p->h/2 );
h_corr = fminf( h_corr , h );
h_corr = fmaxf( h_corr , -h/2 );
/* Apply the correction to p->h. */
p->h += h_corr;
......@@ -391,7 +397,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Did we get the right number density? */
if ( p->wcount > const_nwneigh + const_delta_nwneigh ||
p->wcount < const_nwneigh - const_delta_nwneigh ) {
// printf( "runner_doghost: particle %lli (h=%e,h_dt=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , p->h_dt , c->depth , p->wcount ); fflush(stdout);
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->wcount ); fflush(stdout);
// p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
pid[redo] = pid[i];
redo += 1;
......@@ -405,22 +411,24 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
continue;
}
/* Final operation on the density. */
p->rho = ihg * ihg2 * ( p->rho + p->mass*kernel_root );
p->rho_dh *= ihg4;
if ( p->id == 432626 )
printf( "runner_doghost: updating particle %lli.\n" , p->id );
/* Pre-compute some stuff for the balsara switch. */
normDiv_v = fabs( p->density.div_v / p->rho * ihg4 );
normCurl_v = sqrtf( p->density.curl_v[0] * p->density.curl_v[0] + p->density.curl_v[1] * p->density.curl_v[1] + p->density.curl_v[2] * p->density.curl_v[2] ) / p->rho * ihg4;
/* As of here, particle force variables will be set. Do _NOT_
try to read any particle density variables! */
/* Compute this particle's sound speed. */
p->force.c = sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
/* Compute the P/Omega/rho2. */
p->force.POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
p->force.POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + hg * p->rho_dh / 3.0f );
/* Balsara switch */
p->force.balsara = normCurl_v / ( normDiv_v + normCurl_v + 0.0001f * p->force.c / p->h );
p->force.balsara = normCurl_v / ( normDiv_v + normCurl_v + 0.0001f * p->force.c * ihg );
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
......@@ -652,8 +660,6 @@ void *runner_main ( void *data ) {
for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
if ( __sync_fetch_and_sub( &t->unlock_tasks[k]->wait , 1 ) == 0 )
abort();
for ( k = 0 ; k < t->nr_unlock_cells ; k++ )
__sync_fetch_and_sub( &t->unlock_cells[k]->wait , 1 );
} /* main loop. */
......
......@@ -663,46 +663,6 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ];
/* for ( k = 0 ; k < ci->count ; k++ )
if ( ci->parts[k].id == 5245989477229 )
break;
if ( k < ci->count ) {
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout);
for ( pjd = 0 ; pjd < cj->count ; pjd++ ) {
pi = &ci->parts[k];
pj = &cj->parts[pjd];
r2 = sqrtf( (pi->x[0] - pj->x[0])*(pi->x[0] - pj->x[0]) + (pi->x[1] - pj->x[1])*(pi->x[1] - pj->x[1]) + (pi->x[2] - pj->x[2])*(pi->x[2] - pj->x[2]) );
printf( "runner_dopair1: inspecting particles %lli [%i,%i,%i] and %lli [%i,%i,%i], r=%e.\n" ,
pi->id , (int)(ci->loc[0]/ci->h[0]) , (int)(ci->loc[1]/ci->h[1]) , (int)(ci->loc[2]/ci->h[2]) ,
pj->id , (int)(cj->loc[0]/cj->h[0]) , (int)(cj->loc[1]/cj->h[1]) , (int)(cj->loc[2]/cj->h[2]) ,
r2 );
}
} */
/* for ( k = 0 ; k < cj->count ; k++ )
if ( cj->parts[k].id == 5245989477229 )
break;
if ( k < cj->count ) {
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout);
for ( pjd = 0 ; pjd < ci->count ; pjd++ ) {
pj = &cj->parts[k];
pi = &ci->parts[pjd];
r2 = sqrtf( (pi->x[0] - pj->x[0])*(pi->x[0] - pj->x[0]) + (pi->x[1] - pj->x[1])*(pi->x[1] - pj->x[1]) + (pi->x[2] - pj->x[2])*(pi->x[2] - pj->x[2]) );
printf( "runner_dopair1: inspecting particles %lli [%i,%i,%i] and %lli [%i,%i,%i], r=%e.\n" ,
pi->id , (int)(ci->loc[0]/ci->h[0]) , (int)(ci->loc[1]/ci->h[1]) , (int)(ci->loc[2]/ci->h[2]) ,
pj->id , (int)(cj->loc[0]/cj->h[0]) , (int)(cj->loc[1]/cj->h[1]) , (int)(cj->loc[2]/cj->h[2]) ,
r2 );
}
} */
/* for ( hi = 0 , k = 0 ; k < ci->count ; k++ )
hi += ci->parts[k].r;
for ( hj = 0 , k = 0 ; k < cj->count ; k++ )
hj += cj->parts[k].r;
printf( "runner_dopair: avg. radii %g/%g for h=%g at depth=%i.\n" , hi/ci->count , hj/cj->count , ci->h[0] , ci->depth ); fflush(stdout); */
/* Pick-out the sorted lists. */
sort_i = &ci->sort[ sid*(ci->count + 1) ];
sort_j = &cj->sort[ sid*(cj->count + 1) ];
......@@ -914,23 +874,6 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ];
/* for ( k = 0 ; k < ci->count ; k++ )
if ( ci->parts[k].id == 561590 )
break;
if ( k == ci->count )
for ( k = 0 ; k < cj->count ; k++ )
if ( cj->parts[k].id == 561590 )
break;
if ( k < cj->count )
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout); */
/* for ( hi = 0 , k = 0 ; k < ci->count ; k++ )
hi += ci->parts[k].r;
for ( hj = 0 , k = 0 ; k < cj->count ; k++ )
hj += cj->parts[k].r;
printf( "runner_dopair: avg. radii %g/%g for h=%g at depth=%i.\n" , hi/ci->count , hj/cj->count , ci->h[0] , ci->depth ); fflush(stdout); */
/* Pick-out the sorted lists. */
sort_i = &ci->sort[ sid*(ci->count + 1) ];
sort_j = &cj->sort[ sid*(cj->count + 1) ];
......@@ -971,9 +914,6 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
}
}
/* if ( ci->split && cj->split && sid == 4 )
printf( "boing!\n" ); */
/* Loop over the parts in ci. */
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
......
......@@ -59,7 +59,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
for ( k = 0 ; k < 3 ; k++ )
curlvr[k] *= ri;
if ( r2 < hi*hi && pi != NULL ) {
if ( r2 < hi*hi ) {
h_inv = 1.0 / hi;
hg_inv = kernel_igamma * h_inv;
......@@ -78,7 +78,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
}
if ( r2 < hj*hj && pj != NULL ) {
if ( r2 < hj*hj ) {
h_inv = 1.0 / hj;
hg_inv = kernel_igamma * h_inv;
......@@ -242,7 +242,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
for ( k = 0 ; k < 3 ; k++ )
curlvr[k] *= ri;
if ( r2 < hi*hi && pi != NULL ) {
if ( r2 < hi*hi ) {
h_inv = 1.0 / hi;
hg_inv = kernel_igamma * h_inv;
......@@ -255,9 +255,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
// pi->icount += 1;
pi->density.div_v += pj->mass * dvdr * wi_dx;
for ( k = 0 ; k < 3 ; k++ )
pi->density.curl_v[k] += pj->mass * curlvr[k] * wi_dx;
pi->density.div_v += pj->mass * dvdr * wi_dx;
for ( k = 0 ; k < 3 ; k++ )
pi->density.curl_v[k] += pj->mass * curlvr[k] * wi_dx;
}
}
......@@ -380,7 +380,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
dvdr *= ri;
/* Compute the relative velocity. (This is 0 if the particles move away from each other and negative otherwise) */
omega_ij = fminf(dvdr, 0.f);
omega_ij = fminf( dvdr , 0.f );
/* Compute signal velocity */
v_sig = pi->force.c + pj->force.c - 3.*omega_ij;
......@@ -392,7 +392,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
Pi_ij *= (pi->force.balsara + pj->force.balsara);
/* Get the common factor out. */
w = ri * ( ( pi->force.POrho2 * wi_dr + pj->force.POrho2 * wj_dr ) - 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
w = ri * ( ( pi->force.POrho2 * wi_dr + pj->force.POrho2 * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
/* Use the force, Luke! */
for ( k = 0 ; k < 3 ; k++ ) {
......@@ -402,12 +402,12 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
}
/* Get the time derivative for u. */
pi->force.u_dt += pi->force.POrho2 * pj->mass * dvdr * wi_dr + 0.125f * pj->mass * Pi_ij * dvdr * ( wi_dr + wj_dr );
pj->force.u_dt += pj->force.POrho2 * pi->mass * dvdr * wj_dr + 0.125f * pi->mass * Pi_ij * dvdr * ( wi_dr + wj_dr );
pi->force.u_dt += pj->mass * dvdr * ( pi->force.POrho2 * wi_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
pj->force.u_dt += pi->mass * dvdr * ( pj->force.POrho2 * wj_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
/* Get the time derivative for h. */
pi->force.h_dt -= pj->mass / pj->rho * dvdr * wi_dr;
pj->force.h_dt -= pi->mass / pi->rho * dvdr * wj_dr;
pi->force.h_dt -= pj->mass * dvdr / pj->rho * wi_dr;
pj->force.h_dt -= pi->mass * dvdr / pi->rho * wj_dr;
/* Update the signal velocity. */
pi->force.v_sig = fmaxf( pi->force.v_sig , v_sig );
......@@ -538,7 +538,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
Pi_ij.v *= ( wi_dr.v + wj_dr.v );
/* Get the common factor out. */
w.v = ri.v * ( ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v ) - vec_set1( 0.25f ) * Pi_ij.v );
w.v = ri.v * ( ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v ) + vec_set1( 0.25f ) * Pi_ij.v );
/* Use the force, Luke! */
for ( k = 0 ; k < 3 ; k++ ) {
......@@ -590,8 +590,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
float hi_inv, hi2_inv;
float hj_inv, hj2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float v_sig, omega_ij, Pi_ij;
float f;
float omega_ij, Pi_ij, v_sig;
int k;
/* Get the kernel for hi. */
......@@ -607,13 +607,13 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
xj = r * hj_inv * kernel_igamma;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hj2_inv * hj2_inv * wj_dx;
/* Compute dv dot r. */
dvdr = ( pi->v[0] - pj->v[0] ) * dx[0] + ( pi->v[1] - pj->v[1] ) * dx[1] + ( pi->v[2] - pj->v[2] ) * dx[2];
dvdr *= ri;
/* Compute the relative velocity. (This is 0 if the particles move away from each other and negative otherwise) */
omega_ij = fminf(dvdr, 0.f);
omega_ij = fminf( dvdr , 0.f );
/* Compute signal velocity */
v_sig = pi->force.c + pj->force.c - 3.*omega_ij;
......@@ -625,25 +625,31 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
Pi_ij *= (pi->force.balsara + pj->force.balsara);
/* Get the common factor out. */
w = ri * ( ( pi->force.POrho2 * wi_dr + pj->force.POrho2 * wj_dr ) - 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
w = ri * ( ( pi->force.POrho2 * wi_dr + pj->force.POrho2 * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
/* Use the force, Luke! */
for ( k = 0 ; k < 3 ; k++ ) {
f = dx[k] * w;
pi->a[k] -= pj->mass * f;
}
/* Get the time derivative for u. */
pi->force.u_dt += pi->force.POrho2 * pj->mass * dvdr * wi_dr + 0.125f * pj->mass * Pi_ij * dvdr * ( wi_dr + wj_dr );
pi->force.u_dt += pj->mass * dvdr * ( pi->force.POrho2 * wi_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
/* Get the time derivative for h. */
pi->force.h_dt -= pj->mass / pj->rho * dvdr * wi_dr;
pi->force.h_dt -= pj->mass * dvdr / pj->rho * wi_dr;
/* Update the signal velocity. */
pi->force.v_sig = fmaxf( pi->force.v_sig , v_sig );
pj->force.v_sig = fmaxf( pj->force.v_sig , v_sig );
#ifdef HIST
if ( hi > hj )
runner_hist_hit( hi / hj );
else
runner_hist_hit( hj / hi );
#endif
}
......@@ -759,7 +765,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
Pi_ij.v *= ( wi_dr.v + wj_dr.v );
/* Get the common factor out. */
w.v = ri.v * ( ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v ) - vec_set1( 0.25f ) * Pi_ij.v );
w.v = ri.v * ( ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v ) + vec_set1( 0.25f ) * Pi_ij.v );
/* Use the force, Luke! */
for ( k = 0 ; k < 3 ; k++ ) {
......
......@@ -134,7 +134,7 @@ int space_marktasks ( struct space *s ) {
/* Too much particle movement? */
if ( !t->skip && t->tight &&
( t->ci->dx_max > t->ci->dmin || t->cj->dx_max > t->cj->dmin ) )
break;
return 1;
/* Set the sort flags. */
if ( !t->skip ) {
......@@ -154,10 +154,6 @@ int space_marktasks ( struct space *s ) {
}
/* Did this not go through? */
if ( k < s->nr_tasks )
return 1;
/* Run through the tasks and mark as skip or not. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
......@@ -295,7 +291,7 @@ void space_prepare ( struct space *s ) {
/* Run through the tasks and mark as skip or not. */
tic = getticks();
rebuild = space_marktasks( s );
printf( "space_prepare: space_marktasks tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Did this not go through? */
if ( rebuild ) {
......@@ -305,15 +301,11 @@ void space_prepare ( struct space *s ) {
space_rebuild( s , 0.0 );
printf( "space_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Traverse the cells and set their dt_min and dx_max. */
tic = getticks();
space_map_cells_post( s , 1 , &space_map_prepare , NULL );
printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Run through the tasks and mark as skip or not. */
tic = getticks();
rebuild = space_marktasks( s );
printf( "space_prepare: space_marktasks tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
if ( space_marktasks( s ) )
error( "space_marktasks failed after space_rebuild." );
printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
}
......@@ -503,6 +495,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
c->h_max = h_max;
c->dt_min = dt_min;
c->dt_max = dt_max;
c->dx_max = 0;
/* Un-split? */
if ( count < c->count*space_splitratio || c->count < space_splitsize ) {
......@@ -529,6 +522,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
temp->h[0] = c->h[0]/2;
temp->h[1] = c->h[1]/2;
temp->h[2] = c->h[2]/2;
temp->dmin = c->dmin/2;
if ( k & 4 )
temp->loc[0] += temp->h[0];
if ( k & 2 )
......@@ -538,6 +532,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
temp->depth = c->depth + 1;
temp->split = 0;
temp->h_max = 0.0;
temp->dx_max = 0.0;
temp->parent = c;
c->progeny[k] = temp;
}
......@@ -589,7 +584,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
int i, j, k, cdim[3];
struct cell *restrict c;
struct part *restrict finger, *restrict p;
struct cpart *restrict cfinger;
struct cpart *restrict cp, *restrict cfinger;
int *ind;
ticks tic;
......@@ -664,6 +659,20 @@ void space_rebuild ( struct space *s , double cell_max ) {
} /* re-build upper-level cells? */
// printf( "space_rebuild: rebuilding upper-level cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Otherwise, just clean up the cells. */
else {
/* Free the old cells, if they were allocated. */
for ( k = 0 ; k < s->nr_cells ; k++ ) {
space_rebuild_recycle( s , &s->cells[k] );
s->cells[k].sorts = NULL;
s->cells[k].nr_tasks = 0;
s->cells[k].nr_density = 0;