Commit 0a34510d authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

major re-vamp of the time stepper, cells are now re-constructed only when...

major re-vamp of the time stepper, cells are now re-constructed only when needed. task are still re-built every step.


Former-commit-id: 9ad93a4770e58ce6f8a20e7823e900f3fcbe3293
parent 914dd1c1
......@@ -515,13 +515,14 @@ void pairs_single ( double *dim , long long int pid , struct part *__restrict__
// double maxratio = 1.0;
double r2, dx[3];
struct part *p;
double ih = 15.0/6.25;
double ih = 12.0/6.25;
/* Find "our" part. */
for ( k = 0 ; k < N && parts[k].id != pid ; k++ );
if ( k == N )
error( "Part not found." );
p = &parts[k];
printf( "pairs_single: part[%i].id == %lli.\n" , k , pid );
/* Loop over all particle pairs. */
for ( k = 0 ; k < N ; k++ ) {
......@@ -868,7 +869,7 @@ int main ( int argc , char *argv[] ) {
/* Get the brute-force number of pairs. */
// pairs_n2( dim , parts , N , periodic );
// pairs_single( dim , 1168833436525 , parts , N , periodic );
// pairs_single( dim , 5245989477229 , parts , N , periodic );
// fflush( stdout );
/* Set default number of queues. */
......@@ -894,21 +895,27 @@ int main ( int argc , char *argv[] ) {
/* Verify that each particle is in it's propper cell. */
icount = 0;
space_map_cells( &s , 0 , &map_cellcheck , &icount );
space_map_cells_pre( &s , 0 , &map_cellcheck , &icount );
printf( "main: map_cellcheck picked up %i parts.\n" , icount );
data[0] = s.maxdepth; data[1] = 0;
space_map_cells( &s , 0 , &map_maxdepth , data );
space_map_cells_pre( &s , 0 , &map_maxdepth , data );
printf( "main: nr of cells at depth %i is %i.\n" , data[0] , data[1] );
/* Dump the particle positions. */
// space_map_parts( &s , &map_dump , shift );
/* Dump the acceleration of the first particle. */
printf( "main: parts[%lli].a is [ %.16e %.16e %.16e ], wcount=%.3f.\n" , s.parts[103].id , s.parts[103].a[0] , s.parts[103].a[1] , s.parts[103].a[2] , s.parts[103].wcount + 32.0/3 );
/* Initialize the runner with this space. */
tic = getticks();
engine_init( &e , &s , nr_threads , nr_queues , engine_policy_steal | engine_policy_keep );
printf( "main: engine_init took %.3f ms.\n" , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
/* set the time step. */
e.dt = dt_max;
/* Init the runner history. */
#ifdef HIST
for ( k = 0 ; k < runner_hist_N ; k++ )
......@@ -917,29 +924,23 @@ int main ( int argc , char *argv[] ) {
/* Let loose a runner on the space. */
for ( j = 0 ; j < runs ; j++ ) {
printf( "main: starting run %i/%i with %i threads and %i queues...\n" , j+1 , runs , e.nr_threads , e.nr_queues ); fflush(stdout);
#ifdef TIMER
for ( k = 0 ; k < runner_timer_count ; k++ )
runner_timer[k] = 0;
for ( k = 0 ; k < queue_timer_count ; k++ )
queue_timer[k] = 0;
for ( k = 0 ; k < cell_timer_count ; k++ )
cell_timer[k] = 0;
#endif
timers_reset( timers_mask_all );
#ifdef COUNTER
for ( k = 0 ; k < runner_counter_count ; k++ )
runner_counter[k] = 0;
#endif
tic = getticks();
engine_prepare( &e , 1 );
printf( "main: engine_prepare took %.3f ms.\n" , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
tic = getticks();
engine_run( &e , 0 , dt_max );
/* Take a step. */
engine_step( &e , 0 );
/* Output. */
#ifdef TIMER
printf( "main: runner timers are [ %.3f" , runner_timer[0]/CPU_TPS*1000 );
for ( k = 1 ; k < runner_timer_count ; k++ )
printf( " %.3f" , ((double)runner_timer[k])/CPU_TPS*1000 );
printf( " %.3f ] ms.\n" , ((double)(getticks() - tic)) / CPU_TPS * 1000 );
printf( "main: runner timers are [ %.3f" , timers[0]/CPU_TPS*1000 );
for ( k = 1 ; k < timer_count ; k++ )
printf( " %.3f" , ((double)timers[k])/CPU_TPS*1000 );
printf( " ] ms.\n" );
printf( "main: queue timers are [ %.3f" , queue_timer[0]/CPU_TPS*1000 );
for ( k = 1 ; k < queue_timer_count ; k++ )
printf( " %.3f" , ((double)queue_timer[k])/CPU_TPS*1000 );
......@@ -962,6 +963,7 @@ int main ( int argc , char *argv[] ) {
printf( " %i" , e.queues[k].count );
printf( " ].\n" );
fflush(stdout);
}
/* Print the values of the runner histogram. */
......@@ -989,39 +991,6 @@ int main ( int argc , char *argv[] ) {
printf( "main: particle %lli/%i at [ %e %e %e ] (h=%e) has minimum wcount %.3f.\n" ,
p->id , (int)(p - s.parts) , p->x[0] , p->x[1] , p->x[2] , p->h , p->wcount + 32.0/3 );
/* Loop over all the tasks and dump the ones containing p. */
/* for ( k = 0 ; k < s.nr_tasks ; k++ ) {
if ( s.tasks[k].type == task_type_self ) {
struct cell *c = s.tasks[k].ci;
if ( c->loc[0] <= p->x[0] && c->loc[1] <= p->x[1] && c->loc[2] <= p->x[2] &&
c->loc[0]+c->h[0] >= p->x[0] && c->loc[1]+c->h[1] > p->x[1] && c->loc[2]+c->h[2] > p->x[2] ) {
printf( "main: found self-interaction for part %i!\n" , p->id );
// map_cells_plot( c , &c->depth );
}
}
else if ( s.tasks[k].type == task_type_pair ) {
struct cell *ci = s.tasks[k].ci;
struct cell *cj = s.tasks[k].cj;
if ( ( ci->loc[0] <= p->x[0] && ci->loc[1] <= p->x[1] && ci->loc[2] <= p->x[2] &&
ci->loc[0]+ci->h[0] >= p->x[0] && ci->loc[1]+ci->h[1] > p->x[1] && ci->loc[2]+ci->h[2] > p->x[2] ) ||
( cj->loc[0] <= p->x[0] && cj->loc[1] <= p->x[1] && cj->loc[2] <= p->x[2] &&
cj->loc[0]+cj->h[0] >= p->x[0] && cj->loc[1]+cj->h[1] > p->x[1] && cj->loc[2]+cj->h[2] > p->x[2] ) ) {
printf( "%e %e %e\n%e %e %e\n\n\n" ,
ci->loc[0]+ci->h[0]/2 , ci->loc[1]+ci->h[1]/2 , ci->loc[2]+ci->h[2]/2 ,
cj->loc[0]+cj->h[0]/2 , cj->loc[1]+cj->h[1]/2 , cj->loc[2]+cj->h[2]/2 );
// map_cells_plot( ci , &ci->depth );
// map_cells_plot( cj , &cj->depth );
}
}
}
for ( int ii = -1 ; ii <= 1 ; ii++ )
for ( int jj = -1 ; jj <= 1 ; jj++ )
for ( int kk = -1 ; kk <= 1 ; kk++ ) {
int cid = cell_getid( s.cdim , ((int)(p->x[0]*s.ih[0])+ii+s.cdim[0]) % s.cdim[0] , ((int)(p->x[1]*s.ih[1])+jj+s.cdim[1]) % s.cdim[1] , ((int)(p->x[2]*s.ih[2])+kk+s.cdim[2]) % s.cdim[2] );
map_cells_plot( &s.cells[cid] , &s.maxdepth );
} */
/* Get the particle with the highest wcount. */
p = &s.parts[0];
space_map_parts( &s , &map_wcount_max , &p );
......@@ -1034,11 +1003,11 @@ int main ( int argc , char *argv[] ) {
// printf( "main: average neighbours per particle is %.3f.\n" , (double)icount / s.nr_parts );
/* Dump the acceleration of the first particle. */
printf( "main: parts[%lli].a is [ %.16e %.16e %.16e ].\n" , s.parts[6178].id , s.parts[6178].a[0] , s.parts[6178].a[1] , s.parts[6178].a[2] );
printf( "main: parts[%lli].a is [ %.16e %.16e %.16e ], wcount=%.3f.\n" , s.parts[103].id , s.parts[103].a[0] , s.parts[103].a[1] , s.parts[103].a[2] , s.parts[103].wcount + 32.0/3 );
/* Get all the cells of a certain depth. */
// icount = 1;
// space_map_cells( &s , 0 , &map_cells_plot , &icount );
// space_map_cells_pre( &s , 0 , &map_cells_plot , &icount );
/* Check for outliers. */
// space_map_parts( &s , &map_check , NULL );
......
......@@ -21,17 +21,19 @@ AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
-funroll-loops $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) \
-funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
-DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0
# Build the libswiftsim library
lib_LTLIBRARIES = libswiftsim.la
libswiftsim_la_SOURCES = space.c runner.c queue.c task.c cell.c engine.c ic.c
libswiftsim_la_SOURCES = space.c runner.c queue.c task.c cell.c engine.c ic.c timers.c
# List required headers
include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
engine.h swift.h ic.h
engine.h swift.h ic.h timers.h
......@@ -263,15 +263,6 @@ void cell_split ( struct cell *c ) {
c->progeny[k]->cparts = &c->cparts[ left[k] ];
}
/* Update the condensed particle data. */
for ( k = 0 ; k < c->count ; k++ ) {
c->cparts[k].x[0] = c->parts[k].x[0];
c->cparts[k].x[1] = c->parts[k].x[1];
c->cparts[k].x[2] = c->parts[k].x[2];
c->cparts[k].h = c->parts[k].h;
c->cparts[k].dt = c->parts[k].dt;
}
/* Verify a few sub-cells. */
/* for ( k = 0 ; k < c->progeny[0]->count ; k++ )
if ( c->progeny[0]->parts[k].x[0] > pivot[0] ||
......
......@@ -44,6 +44,15 @@ struct cell {
/* Minimum and maximum dt in this cell. */
double dt_min, dt_max;
/* Minimum dimension, i.e. smallest edge of this cell. */
float dmin;
/* Maximum slack allowed for particle movement. */
float slack;
/* Maximum particle movement in this cell. */
float dx_max;
/* The depth of this cell in the tree. */
int depth, split;
......
......@@ -33,6 +33,7 @@
/* Local headers. */
#include "cycle.h"
#include "timers.h"
#include "const.h"
#include "lock.h"
#include "task.h"
......@@ -58,38 +59,32 @@
* @param force Flag to force re-building the cell and task structure.
*/
void engine_prepare ( struct engine *e , int force ) {
void engine_prepare ( struct engine *e ) {
int j, k, qid, changes, count;
int j, k, qid;
struct space *s = e->s;
// ticks tic;
struct queue *q;
TIMER_TIC
/* Rebuild the space. */
// tic = getticks();
changes = space_rebuild( e->s , force , 0 );
// printf( "engine_prepare: space_rebuild with %i changes took %.3f ms.\n" , changes , (double)(getticks() - tic) / CPU_TPS * 1000 );
space_prepare( e->s );
// printf( "engine_prepare: space_prepare with %i changes took %.3f ms.\n" , changes , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Has anything changed? */
// tic = getticks();
if ( changes ) {
/* Rank the tasks in topological order. */
engine_ranktasks( e );
/* Fill the queues (round-robin). */
#pragma omp parallel for schedule(static) private(count,k)
for ( qid = 0 ; qid < e->nr_queues ; qid++ ) {
queue_init( &e->queues[qid] , s->nr_tasks , s->tasks );
for ( count = 0 , k = qid ; k < s->nr_tasks ; k += e->nr_queues ) {
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
continue;
e->queues[qid].tid[ count ] = s->tasks_ind[k];
count += 1;
}
e->queues[qid].count = count;
e->queues[qid].next = 0;
}
/* Init the queues (round-robin). */
for ( qid = 0 ; qid < e->nr_queues ; qid++ )
queue_init( &e->queues[qid] , s->nr_tasks , s->tasks );
/* Fill the queues (round-robin). */
for ( qid = 0 , k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[ s->tasks_ind[k] ].skip )
continue;
q = &e->queues[qid];
qid = ( qid + 1 ) % e->nr_queues;
q->tid[ q->count ] = s->tasks_ind[k];
q->count += 1;
}
// printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
......@@ -118,56 +113,9 @@ void engine_prepare ( struct engine *e , int force ) {
/* Re-set the queues.*/
for ( k = 0 ; k < e->nr_queues ; k++ )
e->queues[k].next = 0;
}
/**
* @brief Sort the tasks in topological order over all queues.
*
* @param e The #engine.
*/
void engine_ranktasks ( struct engine *e ) {
int i, j = 0, k, temp, left = 0, rank;
struct task *t;
struct space *s = e->s;
int *tid = s->tasks_ind;
/* Run throught the tasks and get all the waits right. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
tid[k] = k;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
s->tasks[k].unlock_tasks[j]->wait += 1;
}
/* Main loop. */
for ( j = 0 , rank = 0 ; left < s->nr_tasks ; rank++ ) {
/* Load the tids of tasks with no waits. */
for ( k = left ; k < s->nr_tasks ; k++ )
if ( s->tasks[ tid[k] ].wait == 0 ) {
temp = tid[j]; tid[j] = tid[k]; tid[k] = temp;
j += 1;
}
/* Traverse the task tree and add tasks with no weight. */
for ( i = left ; i < j ; i++ ) {
t = &s->tasks[ tid[i] ];
t->rank = rank;
s->tasks_ind[i] = t - s->tasks;
/* printf( "engine_ranktasks: task %i of type %s has rank %i.\n" , i ,
(t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */
for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
t->unlock_tasks[k]->wait -= 1;
}
/* The new left (no, not tony). */
left = j;
}
TIMER_TOC( timer_prepare );
}
......@@ -223,11 +171,66 @@ void engine_barrier( struct engine *e ) {
* @param sort_queues Flag to try to sort the queues topologically.
*/
void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
void engine_step ( struct engine *e , int sort_queues ) {
int k;
int k, nr_parts = e->s->nr_parts;
struct part *restrict parts = e->s->parts, *restrict p;
float *v_bar, *u_bar;
float dt = e->dt, hdt = 0.5*dt, dt_max;
/* Re-set the queues.*/
/* Get the maximum dt. */
dt_max = dt;
for ( k = 0 ; k < 32 && (e->step & (1 << k)) == 0 ; k++ )
dt_max *= 2;
/* Set the maximum dt. */
e->dt_max = dt_max;
e->s->dt_max = dt_max;
printf( "engine_step: dt_max set to %.3e.\n" , dt_max ); fflush(stdout);
/* Allocate a buffer for the old velocities. */
if ( ( v_bar = (float *)malloc( sizeof(float) * nr_parts * 3 ) ) == NULL )
error( "Failed to allocate v_old buffer." );
if ( ( u_bar = (float *)malloc( sizeof(float) * nr_parts ) ) == NULL )
error( "Failed to allocate v_old buffer." );
/* First kick. */
#pragma omp parallel for schedule(static) private(p)
for ( k = 0 ; k < nr_parts ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
/* Step and store the velocity and internal energy. */
v_bar[3*k+0] = p->v[0] + hdt * p->a[0];
v_bar[3*k+1] = p->v[1] + hdt * p->a[1];
v_bar[3*k+2] = p->v[2] + hdt * p->a[2];
u_bar[k] = p->u + hdt * p->u_dt;
/* Move the particles with the velocitie at the half-step. */
// p->x[0] += dt * v_bar[3*k+0];
// p->x[1] += dt * v_bar[3*k+1];
// p->x[2] += dt * v_bar[3*k+2];
/* Update positions and energies at the half-step. */
p->v[0] += dt * p->a[0];
p->v[1] += dt * p->a[1];
p->v[2] += dt * p->a[2];
// p->u *= expf( p->u_dt / p->u * dt );
// p->h *= expf( -1.0f * p->h_dt / p->h * dt );
/* Integrate other values if this particle will not be updated. */
if ( p->dt > dt_max ) {
p->rho *= expf( -3.0f * p->h_dt / p->h * dt );
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
}
/* Prepare the space. */
engine_prepare( e );
/* Sort the queues?*/
if ( sort_queues ) {
#pragma omp parallel for default(none), shared(e)
for ( k = 0 ; k < e->nr_queues ; k++ ) {
......@@ -236,9 +239,8 @@ void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
}
}
/* Set the maximum dt. */
e->dt_max = dt_max;
e->s->dt_max = dt_max;
/* Start the clock. */
TIMER_TIC
/* Cry havoc and let loose the dogs of war. */
e->barrier_count = -e->barrier_count;
......@@ -249,6 +251,47 @@ void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
while ( e->barrier_count < e->nr_threads )
if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
error( "Error while waiting for barrier." );
/* Stop the clock. */
TIMER_TOC(timer_step);
/* Second kick. */
e->dt_min = FLT_MAX;
#pragma omp parallel private(p,k)
{
int threadID = omp_get_thread_num();
int nthreads = omp_get_num_threads();
float dt_min = FLT_MAX;
for ( k = nr_parts * threadID / nthreads ; k < nr_parts * (threadID + 1) / nthreads ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
/* Scale the derivatives. */
p->u_dt *= p->POrho2;
p->h_dt *= p->h * 0.333333333f;
/* Update positions and energies at the half-step. */
p->v[0] = v_bar[3*k+0] + hdt * p->a[0];
p->v[1] = v_bar[3*k+0] + hdt * p->a[1];
p->v[2] = v_bar[3*k+0] + hdt * p->a[2];
// p->u = u_bar[k] + hdt * p->u_dt;
/* Get the smallest dt. */
dt_min = fminf( dt_min , p->dt );
}
#pragma omp critical
e->dt_min = fminf( e->dt_min , dt_min );
}
printf( "engine_step: dt_min is %e.\n" , e->dt_min ); fflush(stdout);
/* Clean up. */
free( v_bar );
free( u_bar );
/* Increase the step counter. */
e->step += 1;
}
......@@ -276,6 +319,8 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
e->nr_threads = nr_threads;
e->nr_queues = nr_queues;
e->policy = policy;
e->dt_min = 0.0f;
e->step = 0;
/* First of all, init the barrier and lock it. */
if ( pthread_mutex_init( &e->barrier_mutex , NULL ) != 0 )
......@@ -295,9 +340,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
for ( k = 0 ; k < nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* Rank the tasks in topological order. */
engine_ranktasks( e );
/* How many queues to fill initially? */
for ( nrq = 0 , k = nr_queues ; k > 0 ; k = k / 2 )
nrq += 1;
......
......@@ -52,6 +52,13 @@ struct engine {
/* The maximum dt to step. */
float dt_max;
float dt_min;
/* The system time step. */
float dt;
/* The current step number. */
int step;
/* Data for the threads' barrier. */
pthread_mutex_t barrier_mutex;
......@@ -64,6 +71,5 @@ struct engine {
/* Function prototypes. */
void engine_barrier( struct engine *e );
void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
void engine_prepare ( struct engine *e , int force );
void engine_ranktasks ( struct engine *e );
void engine_run ( struct engine *e , int sort_queues , float dt_max );
void engine_prepare ( struct engine *e );
void engine_step ( struct engine *e , int sort_queues );
......@@ -332,6 +332,8 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
readArray(h_grp, "InternalEnergy", FLOAT, *N, 1, *parts, u, COMPULSORY);
readArray(h_grp, "TimeStep", FLOAT, *N, 1, *parts, dt, OPTIONAL);
readArray(h_grp, "ParticleIDs", ULONGLONG, *N, 1, *parts, id, COMPULSORY);
readArray(h_grp, "TimeStep", FLOAT, *N, 1, *parts, dt, OPTIONAL );
readArray(h_grp, "Acceleration", FLOAT, *N, 3, *parts, a, OPTIONAL );
/* Close particle group */
H5Gclose(h_grp);
......
......@@ -80,6 +80,9 @@ struct part {
/* Particle ID. */
unsigned long long id;
/* Old position, at last tree rebuild. */
double x_old[3];
/* Particle position. */
double x[3];
......
......@@ -33,6 +33,7 @@
/* Local headers. */
#include "cycle.h"
#include "timers.h"
#include "const.h"
#include "lock.h"
#include "task.h"
......@@ -50,9 +51,6 @@
/* Convert cell location to ID. */
#define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) )
/* The timers. */
ticks runner_timer[ runner_timer_count ];
/* The counters. */
int runner_counter[ runner_counter_count ];
......@@ -305,9 +303,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
printf( "runner_dosort[%02i]: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) took %.3f ms.\n" ,
r->id , c->count , c->depth ,
(flags & 0x1000) >> 12 , (flags & 0x800) >> 11 , (flags & 0x400) >> 10 , (flags & 0x200) >> 9 , (flags & 0x100) >> 8 , (flags & 0x80) >> 7 , (flags & 0x40) >> 6 , (flags & 0x20) >> 5 , (flags & 0x10) >> 4 , (flags & 0x8) >> 3 , (flags & 0x4) >> 2 , (flags & 0x2) >> 1 , (flags & 0x1) >> 0 ,
((double)TIMER_TOC(runner_timer_dosort)) / CPU_TPS * 1000 ); fflush(stdout);
((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000 ); fflush(stdout);
#else
TIMER_TOC(runner_timer_dosort);
TIMER_TOC(timer_dosort);
#endif
}
......@@ -371,7 +369,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Did we get the right number density? */
if ( p->wcount + kernel_root > const_nwneigh + 1 ||
p->wcount + kernel_root < const_nwneigh - 1 ) {
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%f.\n" , p->id , p->h , c->depth , p->wcount + kernel_root ); fflush(stdout);
printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%f.\n" , p->id , p->h , c->depth , p->wcount + kernel_root ); fflush(stdout);
// p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
pid[redo] = pid[i];
redo += 1;
......@@ -452,9 +450,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
#ifdef TIMER_VERBOSE
printf( "runner_doghost[%02i]: %i parts at depth %i took %.3f ms.\n" ,
r->id , c->count , c->depth ,
((double)TIMER_TOC(runner_timer_doghost)) / CPU_TPS * 1000 ); fflush(stdout);
((double)TIMER_TOC(timer_doghost)) / CPU_TPS * 1000 ); fflush(stdout);
#else
TIMER_TOC(runner_timer_doghost);
TIMER_TOC(timer_doghost);
#endif
}
......@@ -537,7 +535,7 @@ void *runner_main ( void *data ) {
t = queue_gettask( queues[qid] , r->id , 0 , keep );
if ( t != NULL && keep )
queue_insert( myq , t );
TIMER_TOC2(runner_timer_steal);
TIMER_TOC2(timer_steal);
}
}
else if ( e->policy & engine_policy_rand ) {
......@@ -547,7 +545,7 @@ void *runner_main ( void *data ) {
else {
t = queue_gettask( &e->queues[threadID] , r->id , e->policy & engine_policy_block , 0 );
}
TIMER_TOC(runner_timer_getpair);
TIMER_TOC(timer_getpair);
/* Did I get anything? */
if ( t == NULL ) {
......@@ -560,8 +558,7 @@ void *runner_main ( void *data ) {
}
#ifdef TIMER
else if ( stalled ) {
stalled = getticks() - stalled;
__sync_add_and_fetch( &runner_timer[runner_timer_stalled] , stalled );
timers_toc( timer_stalled , stalled );
#ifdef TIMER_VERBOSE
printf( "runner_main[%02i]: stalled %.3f ms\n" , r->id , ((double)stalled) / CPU_TPS * 1000 );
fflush(stdout);
......@@ -629,8 +626,7 @@ void *runner_main ( void *data ) {
/* Any leftover stalls? */
#ifdef TIMER
if ( stalled ) {
stalled = getticks() - stalled;
__sync_add_and_fetch( &runner_timer[runner_timer_stalled] , stalled );
timers_toc( timer_stalled , stalled );
#ifdef TIMER_VERBOSE
printf( "runner_main[%02i]: stalled %.3f ms\n" , r->id , ((double)stalled) / CPU_TPS * 1000 );
</