Commit f6e86c59 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

preliminary implementation of variable time stepping. still problems with...

preliminary implementation of variable time stepping. still problems with sorting, should be more careful building the task tree.


Former-commit-id: 903851927b5bf4b34cfda269d488386c2378e445
parent 189416cb
......@@ -41,6 +41,9 @@ struct cell {
/* Max radii in this cell. */
double h_max;
/* Minimum and maximum dt in this cell. */
double dt_min, dt_max;
/* The depth of this cell in the tree. */
int depth, split;
......
......@@ -77,6 +77,10 @@ void engine_prepare ( struct engine *e , int force ) {
for ( k = 0 ; k < e->nr_queues ; k++ )
e->queues[k].count = 0;
/* Re-allocate the queue buffers? */
for ( k = 0 ; k < e->nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* Fill the queues (round-robin). */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
......@@ -213,7 +217,7 @@ void engine_barrier( struct engine *e ) {
* @param sort_queues Flag to try to sort the queues topologically.
*/
void engine_run ( struct engine *e , int sort_queues ) {
void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
int k;
......@@ -226,6 +230,10 @@ void engine_run ( struct engine *e , int sort_queues ) {
}
}
/* Set the maximum dt. */
e->dt_max = dt_max;
e->s->dt_max = dt_max;
/* Cry havoc and let loose the dogs of war. */
e->barrier_count = -e->barrier_count;
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
......
......@@ -50,6 +50,9 @@ struct engine {
/* The queues. */
struct queue *queues;
/* The maximum dt to step. */
float dt_max;
/* Data for the threads' barrier. */
pthread_mutex_t barrier_mutex;
pthread_cond_t barrier_cond;
......@@ -63,4 +66,4 @@ void engine_barrier( struct engine *e );
void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
void engine_prepare ( struct engine *e , int force );
void engine_ranktasks ( struct engine *e );
void engine_run ( struct engine *e , int sort_queues );
void engine_run ( struct engine *e , int sort_queues , float dt_max );
......@@ -278,6 +278,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Allocate memory to store particles */
if(posix_memalign( (void*)parts , 32 , *N * sizeof(struct part)) != 0)
error("Error while allocating memory for particles");
bzero( *parts , *N * sizeof(struct part) );
printf("read_ic: Allocated %8.2f MB for particles.\n", *N * sizeof(struct part) / (1024.*1024.));
......
......@@ -48,9 +48,6 @@ struct part {
/* Particle velocity. */
float v[3];
/* Particle acceleration. */
float a[3];
/* Particle density. */
float rho;
......@@ -69,6 +66,9 @@ struct part {
/* Derivative of the density with respect to this particle's smoothing length. */
float rho_dh;
/* Particle acceleration. */
float a[3];
/* Particle number density. */
// int icount;
float wcount;
......
......@@ -124,10 +124,16 @@ void queue_insert ( struct queue *q , struct task *t ) {
void queue_init ( struct queue *q , int size , struct task *tasks ) {
/* Allocate the task list. */
/* Allocate the task list if needed. */
if ( q->tid == NULL || q->size < size ) {
if ( q->tid != NULL )
free( q->tid );
q->size = size;
if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL )
error( "Failed to allocate queue tids." );
}
/* Set the tasks pointer. */
q->tasks = tasks;
/* Init counters. */
......
......@@ -325,6 +325,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
int i, k, redo, count = c->count;
int *pid;
float ihg, ihg2;
float dt_max = r->e->dt_max;
TIMER_TIC
/* Recurse? */
......@@ -353,6 +354,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Get a direct pointer on the part. */
p = &c->parts[ pid[i] ];
/* Is this part within the timestep? */
if ( p->dt <= dt_max ) {
/* Adjust the computed rho. */
ihg = kernel_igamma / p->h;
ihg2 = ihg * ihg;
......@@ -376,14 +380,6 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
continue;
}
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
/* Compute this particle's time step. */
p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
......@@ -395,6 +391,16 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
}
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
}
/* Re-set the counter for the next loop (potentially). */
count = redo;
if ( count > 0 ) {
......@@ -570,18 +576,18 @@ void *runner_main ( void *data ) {
switch ( t->type ) {
case task_type_self:
if ( t->subtype == task_subtype_density )
runner_doself_density( r , ci );
runner_doself1_density( r , ci );
else if ( t->subtype == task_subtype_force )
runner_doself_force( r , ci );
runner_doself2_force( r , ci );
else
error( "Unknown task subtype." );
cell_unlocktree( ci );
break;
case task_type_pair:
if ( t->subtype == task_subtype_density )
runner_dopair_density( r , ci , cj );
runner_dopair1_density( r , ci , cj );
else if ( t->subtype == task_subtype_force )
runner_dopair_force( r , ci , cj );
runner_dopair2_force( r , ci , cj );
else
error( "Unknown task subtype." );
cell_unlocktree( ci );
......@@ -592,9 +598,9 @@ void *runner_main ( void *data ) {
break;
case task_type_sub:
if ( t->subtype == task_subtype_density )
runner_dosub_density( r , ci , cj , t->flags );
runner_dosub1_density( r , ci , cj , t->flags );
else if ( t->subtype == task_subtype_force )
runner_dosub_force( r , ci , cj , t->flags );
runner_dosub2_force( r , ci , cj , t->flags );
else
error( "Unknown task subtype." );
cell_unlocktree( ci );
......
......@@ -75,6 +75,7 @@ enum {
runner_counter_steal_stall,
runner_counter_steal_empty,
runner_counter_keep,
runner_counter_iact,
runner_counter_count,
};
extern int runner_counter[ runner_counter_count ];
......
......@@ -26,53 +26,65 @@
#define PASTE(x,y) x ## _ ## y
#define DOPAIR2(f) PASTE(runner_dopair,f)
#define DOPAIR DOPAIR2(FUNCTION)
#define _DOPAIR1(f) PASTE(runner_dopair1,f)
#define DOPAIR1 _DOPAIR1(FUNCTION)
#define DOPAIR_SUBSET2(f) PASTE(runner_dopair_subset,f)
#define DOPAIR_SUBSET DOPAIR_SUBSET2(FUNCTION)
#define _DOPAIR2(f) PASTE(runner_dopair2,f)
#define DOPAIR2 _DOPAIR2(FUNCTION)
#define DOPAIR_NAIVE2(f) PASTE(runner_dopair_naive,f)
#define DOPAIR_NAIVE DOPAIR_NAIVE2(FUNCTION)
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset,f)
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
#define DOSELF2(f) PASTE(runner_doself,f)
#define DOSELF DOSELF2(FUNCTION)
#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive,f)
#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION)
#define DOSELF_SUBSET2(f) PASTE(runner_doself_subset,f)
#define DOSELF_SUBSET DOSELF_SUBSET2(FUNCTION)
#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive,f)
#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)
#define DOSUB2(f) PASTE(runner_dosub,f)
#define DOSUB DOSUB2(FUNCTION)
#define _DOSELF1(f) PASTE(runner_doself1,f)
#define DOSELF1 _DOSELF1(FUNCTION)
#define DOSUB_SUBSET2(f) PASTE(runner_dosub_subset,f)
#define DOSUB_SUBSET DOSUB_SUBSET2(FUNCTION)
#define _DOSELF2(f) PASTE(runner_doself2,f)
#define DOSELF2 _DOSELF2(FUNCTION)
#define IACT_NONSYM2(f) PASTE(runner_iact_nonsym,f)
#define IACT_NONSYM IACT_NONSYM2(FUNCTION)
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset,f)
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
#define IACT2(f) PASTE(runner_iact,f)
#define IACT IACT2(FUNCTION)
#define _DOSUB1(f) PASTE(runner_dosub1,f)
#define DOSUB1 _DOSUB1(FUNCTION)
#define TIMER_DOSELF2(f) PASTE(runner_timer_doself,f)
#define TIMER_DOSELF TIMER_DOSELF2(FUNCTION)
#define _DOSUB2(f) PASTE(runner_dosub2,f)
#define DOSUB2 _DOSUB2(FUNCTION)
#define TIMER_DOPAIR2(f) PASTE(runner_timer_dopair,f)
#define TIMER_DOPAIR TIMER_DOPAIR2(FUNCTION)
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset,f)
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
#define TIMER_DOSUB2(f) PASTE(runner_timer_dosub,f)
#define TIMER_DOSUB TIMER_DOSUB2(FUNCTION)
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym,f)
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
#define TIMER_DOSELF_SUBSET2(f) PASTE(runner_timer_doself_subset,f)
#define TIMER_DOSELF_SUBSET TIMER_DOSELF_SUBSET2(FUNCTION)
#define _IACT(f) PASTE(runner_iact,f)
#define IACT _IACT(FUNCTION)
#define TIMER_DOPAIR_SUBSET2(f) PASTE(runner_timer_dopair_subset,f)
#define TIMER_DOPAIR_SUBSET TIMER_DOPAIR_SUBSET2(FUNCTION)
#define _TIMER_DOSELF(f) PASTE(runner_timer_doself,f)
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
#define IACT_NONSYM_VEC2(f) PASTE(runner_iact_nonsym_vec,f)
#define IACT_NONSYM_VEC IACT_NONSYM_VEC2(FUNCTION)
#define _TIMER_DOPAIR(f) PASTE(runner_timer_dopair,f)
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
#define IACT_VEC2(f) PASTE(runner_iact_vec,f)
#define IACT_VEC IACT_VEC2(FUNCTION)
#define _TIMER_DOSUB(f) PASTE(runner_timer_dosub,f)
#define TIMER_DOSUB _TIMER_DOSUB(FUNCTION)
#define _TIMER_DOSELF_SUBSET(f) PASTE(runner_timer_doself_subset,f)
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
#define _TIMER_DOPAIR_SUBSET(f) PASTE(runner_timer_dopair_subset,f)
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec,f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
#define _IACT_VEC(f) PASTE(runner_iact_vec,f)
#define IACT_VEC _IACT_VEC(FUNCTION)
......@@ -94,6 +106,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
struct cpart *restrict cpi, *restrict cparts_i = ci->cparts;
double pix[3];
float dx[3], hi, hi2, r2;
float dt_max = e->dt_max;
#ifdef VECTORIZE
int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
......@@ -104,6 +117,10 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
#endif
TIMER_TIC
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
......@@ -191,6 +208,106 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
}
void DOSELF_NAIVE ( struct runner *r , struct cell *restrict c ) {
int pid, pjd, k, count = c->count;
struct part *restrict parts = c->parts;
struct cpart *restrict cpi, *restrict cpj,*restrict cparts = c->cparts;
double pix[3];
float dx[3], hi, hi2, r2;
float dt_max = r->e->dt_max;
#ifdef VECTORIZE
int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
float hiq[VEC_SIZE] __attribute__ ((aligned (16)));
float hjq[VEC_SIZE] __attribute__ ((aligned (16)));
float dxq[3*VEC_SIZE] __attribute__ ((aligned (16)));
struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
TIMER_TIC
/* Anything to do here? */
if ( c->dt_min > dt_max )
return;
/* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
tic = getticks(); */
/* Loop over the parts in ci. */
for ( pid = 0 ; pid < count ; pid++ ) {
/* Get a hold of the ith part in ci. */
cpi = &cparts[ pid ];
for ( k = 0 ; k < 3 ; k++ )
pix[k] = cpi->x[k];
hi = cpi->h;
hi2 = hi * hi;
/* Loop over the parts in cj. */
for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
/* Get a pointer to the jth particle. */
cpj = &cparts[ pjd ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
#ifndef VECTORIZE
IACT( r2 , dx , hi , cpj->h , &parts[ pid ] , &parts[pjd] );
#else
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3*icount+0] = dx[0];
dxq[3*icount+1] = dx[1];
dxq[3*icount+2] = dx[2];
hiq[icount] = hi;
hjq[icount] = cpj->h;
piq[icount] = &parts[ pid ];
pjq[icount] = &parts[ pjd ];
icount += 1;
/* Flush? */
if ( icount == VEC_SIZE ) {
IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
icount = 0;
}
#endif
}
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
#ifdef VECTORIZE
/* Pick up any leftovers. */
if ( icount > 0 )
for ( k = 0 ; k < icount ; k++ )
IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
#endif
#ifdef TIMER_VERBOSE
printf( "runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
#else
TIMER_TOC(TIMER_DOSELF);
#endif
}
/**
* @brief Compute the interactions between a cell pair, but only for the
* given indices in ci.
......@@ -507,7 +624,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
* @param cj The second #cell.
*/
void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
struct engine *restrict e = r->e;
int pid, pjd, k, sid;
......@@ -522,6 +639,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
double hi_max, hj_max;
double di_max, dj_min;
int count_i, count_j;
float dt_max = e->dt_max;
#ifdef VECTORIZE
int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
......@@ -532,6 +650,10 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
#endif
TIMER_TIC
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
......@@ -594,6 +716,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Get a hold of the ith part in ci. */
pi = &parts_i[ sort_i[ pid ].i ];
cpi = &cparts_i[ sort_i[ pid ].i ];
if ( cpi->dt > dt_max )
continue;
hi = cpi->h;
di = sort_i[pid].d + hi - rshift;
if ( di < dj_min )
......@@ -621,7 +745,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
#ifndef VECTORIZE
IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] );
IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[pjd].i ] );
#else
......@@ -638,7 +762,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Flush? */
if ( icount == VEC_SIZE ) {
IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
icount = 0;
}
......@@ -659,6 +783,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Get a hold of the jth part in cj. */
pj = &parts_j[ sort_j[ pjd ].i ];
cpj = &cparts_j[ sort_j[ pjd ].i ];
if ( cpj->dt > dt_max )
continue;
hj = cpj->h;
dj = sort_j[pjd].d - hj - rshift;
if ( dj > di_max )
......@@ -682,11 +808,11 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
}
/* Hit or miss? */
if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
if ( r2 < hj2 ) {
#ifndef VECTORIZE
IACT( r2 , dx , hj , cpi->h , &parts_j[ sort_j[ pjd ].i ] , &parts_i[ sort_i[pid].i ] );
IACT_NONSYM( r2 , dx , hj , cpi->h , pj , &parts_i[ sort_i[pid].i ] );
#else
......@@ -703,7 +829,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Flush? */
if ( icount == VEC_SIZE ) {
IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
icount = 0;
}
......@@ -719,7 +845,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Pick up any leftovers. */
if ( icount > 0 )
for ( k = 0 ; k < icount ; k++ )
IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
IACT_NONSYM( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
#endif
#ifdef TIMER_VERBOSE
......@@ -731,20 +857,24 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
}
/**
* @brief Compute the cell self-interaction.
*
* @param r The #runner.
* @param c The #cell.
*/
void DOPAIR2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
void DOSELF ( struct runner *r , struct cell *restrict c ) {
int k, pid, pjd, count = c->count;
double pix[3];
float dx[3], hi, hi2, r2;
struct part *restrict parts = c->parts;
struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
struct engine *restrict e = r->e;
int pid, pjd, k, sid;
double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
struct cell *temp;
struct entry *restrict sort_i, *restrict sort_j;
struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
int countdt_i = 0, countdt_j = 0;
struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
struct cpart *restrict cpi, *restrict cparts_i;
struct cpart *restrict cpj, *restrict cparts_j;
double pix[3], pjx[3], di, dj;
float dx[3], hi, hi2, hj, hj2, r2;
double hi_max, hj_max;
double di_max, dj_min;
int count_i, count_j;
float dt_max = e->dt_max;
#ifdef VECTORIZE
int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
......@@ -755,23 +885,117 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
#endif
TIMER_TIC
/* Loop over the particles in the cell. */
for ( pid = 0 ; pid < count ; pid++ ) {
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get a pointer to the ith particle. */
cpi = &cparts[pid];
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
shift[k] = e->s->dim[k];
else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 )
shift[k] = -e->s->dim[k];
}
/* Get the particle position and radius. */
/* Get the sorting index. */
for ( sid = 0 , k = 0 ; k < 3 ; k++ )
sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
/* Switch the cells around? */
if ( runner_flip[sid] ) {
temp = ci; ci = cj; cj = temp;
for ( k = 0 ; k < 3 ; k++ )
pix[k] = cpi->x[k];
shift[k] = -shift[k];
}
sid = sortlistID[sid];
/* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ];
/* for ( k = 0 ; k < ci->count ; k++ )
if ( ci->parts[k].id == 561590 )
break;
if ( k == ci->count )
for ( k = 0 ; k < cj->count ; k++ )
if ( cj->parts[k].id == 561590 )
break;
if ( k < cj->count )
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout); */
/* for ( hi = 0 , k = 0 ; k < ci->count ; k++ )
hi += ci->parts[k].r;
for ( hj = 0 , k = 0 ; k < cj->count ; k++ )
hj += cj->parts[k].r;
printf( "runner_dopair: avg. radii %g/%g for h=%g at depth=%i.\n" , hi/ci->count , hj/cj->count , ci->h[0] , ci->depth ); fflush(stdout); */
/* Pick-out the sorted lists. */
sort_i = &ci->sort[ sid*(ci->count + 1) ];
sort_j = &cj->sort[ sid*(cj->count + 1) ];
/* Get some other useful values. */
hi_max = ci->h_max - rshift; hj_max = cj->h_max;
count_i = ci->count; count_j = cj->count;
parts_i = ci->parts; parts_j = cj->parts;
cparts_i = ci->cparts; cparts_j = cj->cparts;
di_max = sort_i[count_i-1].d - rshift;
dj_min = sort_j[0].d;
/* Collect the number of parts left and right below dt. */
if ( cj->dt_min > dt_max ) {
sortdt_i = sort_i;