Commit 6f6f3f54 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several fixes to dopair2 and doself2 for multiple time-stepping.


Former-commit-id: fa77a1b6804434b679dc88ffed39f6b96ceaa5f4
parent 3f62f44a
......@@ -20,11 +20,11 @@
AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
# AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
# -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
-funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
-DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0
......
/*******************************************************************************
* This file is part of SWIFT.
* Coypright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
* Coypright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
......@@ -29,7 +30,7 @@ void printParticle ( struct part *parts , long long int id ) {
/* Look for the particle. */
for ( i = 0 ; parts[i].id != id ; i++ );
printf("## Particle[%d]: id=%lld, x=(%f,%f,%f), v=(%f,%f,%f), a=(%f,%f,%f), h=%f, h_dt=%f, wcount=%f, m=%f, rho=%f, u=%f, dudt=%f, dt=%.3e\n",
printf("## Particle[%d]: id=%lld, x=[%f,%f,%f], v=[%f,%f,%f], a=[%f,%f,%f], h=%.3e, h_dt=%.3e, wcount=%f, m=%.3e, rho=%f, u=%f, dudt=%f, dt=%.3e\n",
i,
parts[i].id,
parts[i].x[0], parts[i].x[1], parts[i].x[2],
......
......@@ -66,7 +66,7 @@ void engine_prepare ( struct engine *e ) {
int j, k, qid;
struct space *s = e->s;
struct queue *q;
float dt_max = e->dt_max;
float dt_step = e->dt_step;
TIMER_TIC
......@@ -95,7 +95,7 @@ void engine_prepare ( struct engine *e ) {
// tic = getticks();
#pragma omp parallel for schedule(static)
for ( k = 0 ; k < s->nr_parts ; k++ )
if ( s->parts[k].dt <= dt_max ) {
if ( s->parts[k].dt <= dt_step ) {
s->parts[k].wcount = 0.0f;
s->parts[k].wcount_dh = 0.0f;
s->parts[k].rho = 0.0f;
......@@ -179,71 +179,70 @@ void engine_step ( struct engine *e , int sort_queues ) {
int k, nr_parts = e->s->nr_parts;
struct part *restrict parts = e->s->parts, *restrict p;
float *restrict v_bar;
float dt = e->dt, hdt = 0.5*dt, dt_max, dt_min, ldt_min, ldt_max;
double etot = 0.0, letot, lmom[3], mom[3] = { 0.0 , 0.0 , 0.0 };
int threadID, nthreads;
struct xpart *restrict xp;
float dt = e->dt, hdt = 0.5*dt, dt_step, dt_max, dt_min, ldt_min, ldt_max;
double epot = 0.0, ekin = 0.0, lepot, lekin, lmom[3], mom[3] = { 0.0 , 0.0 , 0.0 };
int threadID, nthreads, count = 0, lcount;
// #ifdef __SSE2__
// VEC_MACRO(4,float) hdtv = _mm_set1_ps( hdt );
// #endif
/* Get the maximum dt. */
dt_max = 2.0f*dt;
dt_step = 2.0f*dt;
for ( k = 0 ; k < 32 && (e->step & (1 << k)) == 0 ; k++ )
dt_max *= 2;
dt_max = 1;
dt_step *= 2;
// dt_step = FLT_MAX;
/* Set the maximum dt. */
e->dt_max = dt_max;
e->s->dt_max = dt_max;
printf( "engine_step: dt_max set to %.3e.\n" , dt_max ); fflush(stdout);
e->dt_step = dt_step;
e->s->dt_step = dt_step;
printf( "engine_step: dt_step set to %.3e.\n" , dt_step ); fflush(stdout);
/* Allocate a buffer for the old velocities. */
if ( posix_memalign( (void **)&v_bar , 16 , sizeof(float) * nr_parts * 4 ) != 0 )
error( "Failed to allocate v_old buffer." );
/* First kick. */
TIMER_TIC
// #pragma omp parallel for schedule(static) private(p)
#pragma omp parallel for schedule(static) private(p,xp)
for ( k = 0 ; k < nr_parts ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
xp = p->xtras;
/* Step and store the velocity and internal energy. */
// #ifdef __SSE__
// _mm_store_ps( &v_bar[4*k] , _mm_add_ps( _mm_load_ps( &p->v[0] ) , _mm_mul_ps( hdtv , _mm_load_ps( &p->a[0] ) ) ) );
// _mm_store_ps( &v_bar[4*k] , _mm_load_ps( &p->v[0] ) + hdtv * _mm_load_ps( &p->a[0] ) );
// #else
v_bar[4*k+0] = p->v[0] + hdt * p->a[0];
v_bar[4*k+1] = p->v[1] + hdt * p->a[1];
v_bar[4*k+2] = p->v[2] + hdt * p->a[2];
xp->v_old[0] = p->v[0] + hdt * p->a[0];
xp->v_old[1] = p->v[1] + hdt * p->a[1];
xp->v_old[2] = p->v[2] + hdt * p->a[2];
// #endif
v_bar[4*k+3] = p->u + hdt * p->u_dt;
// xp->u_old = fmaxf( p->u + hdt * p->u_dt , FLT_EPSILON );
xp->u_old = p->u + hdt * p->u_dt;
/* Move the particles with the velocitie at the half-step. */
p->x[0] += dt * v_bar[4*k+0];
p->x[1] += dt * v_bar[4*k+1];
p->x[2] += dt * v_bar[4*k+2];
p->x[0] += dt * xp->v_old[0];
p->x[1] += dt * xp->v_old[1];
p->x[2] += dt * xp->v_old[2];
/* Update positions and energies at the half-step. */
p->v[0] += dt * p->a[0];
p->v[1] += dt * p->a[1];
p->v[2] += dt * p->a[2];
p->u *= expf( p->u_dt / p->u * dt );
// p->h *= expf( p->h_dt / p->h * dt );
p->h *= expf( p->h_dt / p->h * dt );
/* Integrate other values if this particle will not be updated. */
// if ( p->dt > dt_max ) {
// p->rho *= expf( -3.0f * p->h_dt / p->h * dt );
// p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
// }
if ( p->dt > dt_step ) {
p->rho *= expf( -3.0f * p->h_dt / p->h * dt );
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
}
TIMER_TOC( timer_kick1 );
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
/* Prepare the space. */
engine_prepare( e );
......@@ -274,44 +273,52 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
/* Second kick. */
TIMER_TIC_ND
dt_min = FLT_MAX; dt_max = 0.0f;
#pragma omp parallel private(p,k,ldt_min,ldt_max,lmom,letot,threadID,nthreads)
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lekin,lepot,threadID,nthreads)
{
threadID = omp_get_thread_num();
nthreads = omp_get_num_threads();
ldt_min = FLT_MAX; ldt_max = 0.0f;
lmom[0] = 0.0; lmom[1] = 0.0; lmom[2] = 0.0;
letot = 0.0;
lekin = 0.0; lepot = 0.0; lcount = 0;
for ( k = nr_parts * threadID / nthreads ; k < nr_parts * (threadID + 1) / nthreads ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
xp = p->xtras;
/* Scale the derivatives if they're freshly computed. */
if ( p->dt <= dt_max ) {
if ( p->dt <= dt_step ) {
p->u_dt *= p->POrho2;
p->h_dt *= p->h * 0.333333333f;
lcount += 1;
}
/* Update the particle's time step. */
p->dt = const_cfl * p->h / ( p->c + p->v_sig );
/* Update positions and energies at the half-step. */
// #ifdef __SSE__
// _mm_store_ps( &p->v[0] , _mm_add_ps( _mm_load_ps( &v_bar[4*k] ) , _mm_mul_ps( hdtv , _mm_load_ps( &p->a[0] ) ) ) );
// _mm_store_ps( &p->v[0] , _mm_load_ps( &v_bar[4*k] ) + hdtv * _mm_load_ps( &p->a[0] ) );
// #else
p->v[0] = v_bar[4*k+0] + hdt * p->a[0];
p->v[1] = v_bar[4*k+1] + hdt * p->a[1];
p->v[2] = v_bar[4*k+2] + hdt * p->a[2];
p->v[0] = xp->v_old[0] + hdt * p->a[0];
p->v[1] = xp->v_old[1] + hdt * p->a[1];
p->v[2] = xp->v_old[2] + hdt * p->a[2];
// #endif
p->u = v_bar[4*k+3] + hdt * p->u_dt;
// p->u = fmaxf( xp->u_old + hdt * p->u_dt , FLT_EPSILON );
p->u = xp->u_old + hdt * p->u_dt;
/* Get the smallest/largest dt. */
ldt_min = fminf( ldt_min , p->dt );
ldt_max = fmaxf( ldt_max , p->dt );
/* Collect total energy. */
letot += 0.5 * p->mass * ( p->v[0]*p->v[0] + p->v[1]*p->v[1] + p->v[2]*p->v[2] ) + p->mass * p->u;
lekin += 0.5 * p->mass * ( p->v[0]*p->v[0] + p->v[1]*p->v[1] + p->v[2]*p->v[2] );
lepot += p->mass * p->u;
/* Collect momentum */
lmom[0] += p->mass * p->v[0];
......@@ -326,31 +333,33 @@ void engine_step ( struct engine *e , int sort_queues ) {
mom[0] += lmom[0];
mom[1] += lmom[1];
mom[2] += lmom[2];
etot += letot;
epot += lepot;
ekin += lekin;
count += lcount;
}
}
TIMER_TOC( timer_kick2 );
e->dt_min = dt_min;
printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout);
printf( "engine_step: etot is %e.\n" , etot ); fflush(stdout);
printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout);
printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout);
printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout);
/* Increase the step counter. */
e->step += 1;
/* Does the time step need adjusting? */
if ( dt_min < e->dt ) {
while ( dt_min < e->dt ) {
e->dt *= 0.5;
e->step *= 2;
printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt );
}
else if ( dt_min > 2*e->dt ) {
while ( dt_min > 2*e->dt && (e->step & 1) == 0 ) {
e->dt *= 2.0;
e->step /= 2;
printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
}
/* Clean up. */
free( v_bar );
/* Increase the step counter. */
e->step += 1;
}
......
......@@ -50,8 +50,10 @@ struct engine {
/* The queues. */
struct queue *queues;
/* The maximum dt to step. */
float dt_max;
/* The maximum dt to step (current). */
float dt_step;
/* The minimum dt over all particles in the system. */
float dt_min;
/* The system time step. */
......
......@@ -38,6 +38,21 @@ struct cpart {
} __attribute__((aligned (32)));
/* Extra particle data not needed during the computation. */
struct xpart {
/* Old position, at last tree rebuild. */
double x_old[3];
/* Old velocity. */
float v_old[3];
/* Old entropy. */
float u_old;
} __attribute__((aligned (32)));
/* Data of a single particle. */
struct part {
......@@ -66,6 +81,9 @@ struct part {
/* Particle acceleration. */
float a[3] __attribute__((aligned (16)));
/* Maximum neighbouring u. */
float c, v_sig;
/* Derivative of the density with respect to this particle's smoothing length. */
float rho_dh;
......@@ -80,8 +98,8 @@ struct part {
/* Particle ID. */
unsigned long long id;
/* Old position, at last tree rebuild. */
double x_old[3];
/* Pointer to extra particle data. */
struct xpart *xtras;
/* Particle position. */
double x[3];
......
......@@ -329,8 +329,8 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
struct cell *finger;
int i, k, redo, count = c->count;
int *pid;
float ihg, ihg2;
float dt_max = r->e->dt_max;
float ihg, ihg2, h_corr;
float dt_step = r->e->dt_step;
TIMER_TIC
/* Recurse? */
......@@ -361,7 +361,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
cp = &c->cparts[ pid[i] ];
/* Is this part within the timestep? */
if ( cp->dt <= dt_max ) {
if ( cp->dt <= dt_step ) {
/* Adjust the computed rho. */
ihg = kernel_igamma / p->h;
......@@ -370,8 +370,15 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
p->rho_dh *= ihg2 * ihg2;
p->wcount += kernel_wroot;
/* Update the smoothing length. */
p->h -= ( p->wcount - const_nwneigh ) / p->wcount_dh;
/* Compute the smoothing length update (Newton step). */
h_corr = ( const_nwneigh - p->wcount ) / p->wcount_dh;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr = fminf( h_corr , p->h );
h_corr = fmaxf( h_corr , -p->h/2 );
/* Apply the correction to p->h. */
p->h += h_corr;
cp->h = p->h;
/* Did we get the right number density? */
......@@ -389,8 +396,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
}
/* Compute this particle's time step. */
p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
cp->dt = p->dt;
p->c = sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
/* Compute the pressure. */
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
......@@ -405,6 +411,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
p->v_sig = 0.0f;
}
......
This diff is collapsed.
......@@ -329,26 +329,26 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
float r = sqrtf( r2 ), ri = 1.0f / r;
float xi, xj;
float hig_inv, hig2_inv;
float hjg_inv, hjg2_inv;
float hi_inv, hi2_inv;
float hj_inv, hj2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float f;
int k;
/* Get the kernel for hi. */
hig_inv = kernel_igamma / hi;
hig2_inv = hig_inv * hig_inv;
xi = r * hig_inv;
hi_inv = 1.0f / hi;
hi2_inv = hi_inv * hi_inv;
xi = r * hi_inv * kernel_igamma;
kernel_deval( xi , &wi , &wi_dx );
wi_dr = hig2_inv * hig2_inv * wi_dx;
wi_dr = hi2_inv * hi2_inv * wi_dx;
/* Get the kernel for hj. */
hjg_inv = kernel_igamma / hj;
hjg2_inv = hjg_inv * hjg_inv;
xj = r * hjg_inv;
hj_inv = 1.0f / hj;
hj2_inv = hj_inv * hj_inv;
xj = r * hj_inv * kernel_igamma;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hjg2_inv * hjg2_inv * wj_dx;
wj_dr = hj2_inv * hj2_inv * wj_dx;
/* Get the common factor out. */
w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr );
......@@ -370,7 +370,11 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
/* Get the time derivative for h. */
pi->h_dt -= pj->mass / pj->rho * dvdr * wi_dr;
pj->h_dt -= pi->mass / pi->rho * dvdr * wj_dr;
/* Update the signal velocity. */
pi->v_sig = fmaxf( pi->v_sig , pj->c - 3*dvdr );
pj->v_sig = fmaxf( pj->v_sig , pi->c - 3*dvdr );
#ifdef HIST
if ( hi > hj )
runner_hist_hit( hi / hj );
......@@ -388,8 +392,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hig_inv, hig2_inv;
vector hjg_inv, hjg2_inv;
vector hig_inv, hi2_inv;
vector hjg_inv, hj2_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho;
......@@ -444,20 +448,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
hi_inv.v = vec_rcp( hi.v );
hi_inv.v = hi_inv.v - hi_inv.v * ( hi.v * hi_inv.v - vec_set1( 1.0f ) );
hig_inv.v = vec_set1( kernel_igamma ) * hi_inv.v;
hig2_inv.v = hig_inv.v * hig_inv.v;
hi2_inv.v = hi_inv.v * hi_inv.v;
xi.v = r.v * hig_inv.v;
kernel_deval_vec( &xi , &wi , &wi_dx );
wi_dr.v = hig2_inv.v * hig2_inv.v * wi_dx.v;
wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load( Hj );
hj_inv.v = vec_rcp( hj.v );
hj_inv.v = hj_inv.v - hj_inv.v * ( hj.v * hj_inv.v - vec_set1( 1.0f ) );
hjg_inv.v = vec_set1( kernel_igamma ) * hj_inv.v;
hjg2_inv.v = hjg_inv.v * hjg_inv.v;
hj2_inv.v = hj_inv.v * hj_inv.v;
xj.v = r.v * hjg_inv.v;
kernel_deval_vec( &xj , &wj , &wj_dx );
wj_dr.v = hjg2_inv.v * hjg2_inv.v * wj_dx.v;
wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v;
/* Get the common factor out. */
w.v = ri.v * ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v );
......@@ -508,25 +512,25 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
float r = sqrtf( r2 ), ri = 1.0f / r;
float xi, xj;
float hig_inv, hig2_inv;
float hjg_inv, hjg2_inv;
float hi_inv, hi2_inv;
float hj_inv, hj2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float f;
int k;
/* Get the kernel for hi. */
hig_inv = kernel_igamma / hi;
hig2_inv = hig_inv * hig_inv;
xi = r * hig_inv;
hi_inv = 1.0f / hi;
hi2_inv = hi_inv * hi_inv;
xi = r * hi_inv * kernel_igamma;
kernel_deval( xi , &wi , &wi_dx );
wi_dr = hig2_inv * hig2_inv * wi_dx;
wi_dr = hi2_inv * hi2_inv * wi_dx;
/* Get the kernel for hj. */
hjg_inv = kernel_igamma / hj;
hjg2_inv = hjg_inv * hjg_inv;
xj = r * hjg_inv;
hj_inv = 1.0f / hj;
hj2_inv = hj_inv * hj_inv;
xj = r * hj_inv * kernel_igamma;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hjg2_inv * hjg2_inv * wj_dx;
wj_dr = hj2_inv * hj2_inv * wj_dx;
/* Get the common factor out. */
w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr );
......@@ -547,6 +551,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
/* Get the time derivative for h. */
pi->h_dt -= pj->mass / pj->rho * dvdr * wi_dr;
/* Update the signal velocity (this is always symmetrical). */
pi->v_sig = fmaxf( pi->v_sig , pj->c - 3*dvdr );
pj->v_sig = fmaxf( pj->v_sig , pi->c - 3*dvdr );
}
......@@ -557,8 +565,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hig_inv, hig2_inv;
vector hjg_inv, hjg2_inv;
vector hig_inv, hi2_inv;
vector hjg_inv, hj2_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pjrho;
......@@ -609,20 +617,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
hi_inv.v = vec_rcp( hi.v );
hi_inv.v = hi_inv.v - hi_inv.v * ( hi.v * hi_inv.v - vec_set1( 1.0f ) );
hig_inv.v = vec_set1( kernel_igamma ) * hi_inv.v;
hig2_inv.v = hig_inv.v * hig_inv.v;
hi2_inv.v = hi_inv.v * hi_inv.v;
xi.v = r.v * hig_inv.v;
kernel_deval_vec( &xi , &wi , &wi_dx );
wi_dr.v = hig2_inv.v * hig2_inv.v * wi_dx.v;
wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load( Hj );
hj_inv.v = vec_rcp( hj.v );
hj_inv.v = hj_inv.v - hj_inv.v * ( hj.v * hj_inv.v - vec_set1( 1.0f ) );
hjg_inv.v = vec_set1( kernel_igamma ) * hj_inv.v;
hjg2_inv.v = hjg_inv.v * hjg_inv.v;
hj2_inv.v = hj_inv.v * hj_inv.v;
xj.v = r.v * hjg_inv.v;
kernel_deval_vec( &xj , &wj , &wj_dx );
wj_dr.v = hjg2_inv.v * hjg2_inv.v * wj_dx.v;
wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v;
/* Get the common factor out. */
w.v = ri.v * ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v );
......
......@@ -86,28 +86,31 @@ void space_map_prepare ( struct cell *c , void *data ) {
int k;
float dt_min, dt_max, h_max, dx_max;
struct part *p;
struct xpart *xp;
/* No children? */
if ( !c->split ) {
/* Init with first part. */
p = &c->parts[0];
xp = p->xtras;
dt_min = p->dt;
dt_max = p->dt;
h_max = p->h;
dx_max = sqrtf( (p->x[0] - p->x_old[0])*(p->x[0] - p->x_old[0]) +
(p->x[1] - p->x_old[1])*(p->x[1] - p->x_old[1]) +
(p->x[2] - p->x_old[2])*(p->x[2] - p->x_old[2]) )*2 + p->h;
dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h;
/* Loop over parts. */
for ( k = 1 ; k < c->count ; k++ ) {
p = &c->parts[k];
xp = p->xtras;
dt_min = fminf( dt_min , p->dt );
dt_max = fmaxf( dt_max , p->dt );
h_max = fmaxf( h_max , p->h );
dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - p->x_old[0])*(p->x[0] - p->x_old[0]) +
(p->x[1] - p->x_old[1])*(p->x[1] - p->x_old[1]) +
(p->x[2] - p->x_old[2])*(p->x[2] - p->x_old[2]) )*2 + p->h );
dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h );
}
}
......@@ -161,10 +164,10 @@ void space_prepare ( struct space *s ) {
int k;
struct task *t;
float dt_max = s->dt_max, dx_max = 0.0f;
float dt_step = s->dt_step, dx_max = 0.0f;
int counts[ task_type_count + 1 ];
/* Traverse the cells and set their dt_min and dt_max. */
/* Traverse the cells and set their dt_min and dx_max. */
space_map_cells_post( s , 1 , &space_map_prepare , NULL );
/* Get the maximum displacement in the whole system. */
......@@ -179,9 +182,9 @@ void space_prepare ( struct space *s ) {
t->type == task_type_self ||
t->type == task_type_ghost ||
( t->type == task_type_sub && t->cj == NULL ) )
t->skip = ( t->ci->dt_min > dt_max );
t->skip = ( t->ci->dt_min > dt_step );
else if ( t->type == task_type_pair