Commit f6e86c59 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

preliminary implementation of variable time stepping. still problems with...

preliminary implementation of variable time stepping. still problems with sorting, should be more careful building the task tree.


Former-commit-id: 903851927b5bf4b34cfda269d488386c2378e445
parent 189416cb
...@@ -41,6 +41,9 @@ struct cell { ...@@ -41,6 +41,9 @@ struct cell {
/* Max radii in this cell. */ /* Max radii in this cell. */
double h_max; double h_max;
/* Minimum and maximum dt in this cell. */
double dt_min, dt_max;
/* The depth of this cell in the tree. */ /* The depth of this cell in the tree. */
int depth, split; int depth, split;
......
...@@ -77,6 +77,10 @@ void engine_prepare ( struct engine *e , int force ) { ...@@ -77,6 +77,10 @@ void engine_prepare ( struct engine *e , int force ) {
for ( k = 0 ; k < e->nr_queues ; k++ ) for ( k = 0 ; k < e->nr_queues ; k++ )
e->queues[k].count = 0; e->queues[k].count = 0;
/* Re-allocate the queue buffers? */
for ( k = 0 ; k < e->nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* Fill the queues (round-robin). */ /* Fill the queues (round-robin). */
for ( k = 0 ; k < s->nr_tasks ; k++ ) { for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none ) if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
...@@ -213,7 +217,7 @@ void engine_barrier( struct engine *e ) { ...@@ -213,7 +217,7 @@ void engine_barrier( struct engine *e ) {
* @param sort_queues Flag to try to sort the queues topologically. * @param sort_queues Flag to try to sort the queues topologically.
*/ */
void engine_run ( struct engine *e , int sort_queues ) { void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
int k; int k;
...@@ -226,6 +230,10 @@ void engine_run ( struct engine *e , int sort_queues ) { ...@@ -226,6 +230,10 @@ void engine_run ( struct engine *e , int sort_queues ) {
} }
} }
/* Set the maximum dt. */
e->dt_max = dt_max;
e->s->dt_max = dt_max;
/* Cry havoc and let loose the dogs of war. */ /* Cry havoc and let loose the dogs of war. */
e->barrier_count = -e->barrier_count; e->barrier_count = -e->barrier_count;
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 ) if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
......
...@@ -50,6 +50,9 @@ struct engine { ...@@ -50,6 +50,9 @@ struct engine {
/* The queues. */ /* The queues. */
struct queue *queues; struct queue *queues;
/* The maximum dt to step. */
float dt_max;
/* Data for the threads' barrier. */ /* Data for the threads' barrier. */
pthread_mutex_t barrier_mutex; pthread_mutex_t barrier_mutex;
pthread_cond_t barrier_cond; pthread_cond_t barrier_cond;
...@@ -63,4 +66,4 @@ void engine_barrier( struct engine *e ); ...@@ -63,4 +66,4 @@ void engine_barrier( struct engine *e );
void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy ); void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
void engine_prepare ( struct engine *e , int force ); void engine_prepare ( struct engine *e , int force );
void engine_ranktasks ( struct engine *e ); void engine_ranktasks ( struct engine *e );
void engine_run ( struct engine *e , int sort_queues ); void engine_run ( struct engine *e , int sort_queues , float dt_max );
...@@ -278,6 +278,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int* ...@@ -278,6 +278,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Allocate memory to store particles */ /* Allocate memory to store particles */
if(posix_memalign( (void*)parts , 32 , *N * sizeof(struct part)) != 0) if(posix_memalign( (void*)parts , 32 , *N * sizeof(struct part)) != 0)
error("Error while allocating memory for particles"); error("Error while allocating memory for particles");
bzero( *parts , *N * sizeof(struct part) );
printf("read_ic: Allocated %8.2f MB for particles.\n", *N * sizeof(struct part) / (1024.*1024.)); printf("read_ic: Allocated %8.2f MB for particles.\n", *N * sizeof(struct part) / (1024.*1024.));
......
...@@ -48,9 +48,6 @@ struct part { ...@@ -48,9 +48,6 @@ struct part {
/* Particle velocity. */ /* Particle velocity. */
float v[3]; float v[3];
/* Particle acceleration. */
float a[3];
/* Particle density. */ /* Particle density. */
float rho; float rho;
...@@ -69,6 +66,9 @@ struct part { ...@@ -69,6 +66,9 @@ struct part {
/* Derivative of the density with respect to this particle's smoothing length. */ /* Derivative of the density with respect to this particle's smoothing length. */
float rho_dh; float rho_dh;
/* Particle acceleration. */
float a[3];
/* Particle number density. */ /* Particle number density. */
// int icount; // int icount;
float wcount; float wcount;
......
...@@ -124,10 +124,16 @@ void queue_insert ( struct queue *q , struct task *t ) { ...@@ -124,10 +124,16 @@ void queue_insert ( struct queue *q , struct task *t ) {
void queue_init ( struct queue *q , int size , struct task *tasks ) { void queue_init ( struct queue *q , int size , struct task *tasks ) {
/* Allocate the task list. */ /* Allocate the task list if needed. */
if ( q->tid == NULL || q->size < size ) {
if ( q->tid != NULL )
free( q->tid );
q->size = size; q->size = size;
if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL ) if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL )
error( "Failed to allocate queue tids." ); error( "Failed to allocate queue tids." );
}
/* Set the tasks pointer. */
q->tasks = tasks; q->tasks = tasks;
/* Init counters. */ /* Init counters. */
......
...@@ -325,6 +325,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ...@@ -325,6 +325,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
int i, k, redo, count = c->count; int i, k, redo, count = c->count;
int *pid; int *pid;
float ihg, ihg2; float ihg, ihg2;
float dt_max = r->e->dt_max;
TIMER_TIC TIMER_TIC
/* Recurse? */ /* Recurse? */
...@@ -353,6 +354,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ...@@ -353,6 +354,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Get a direct pointer on the part. */ /* Get a direct pointer on the part. */
p = &c->parts[ pid[i] ]; p = &c->parts[ pid[i] ];
/* Is this part within the timestep? */
if ( p->dt <= dt_max ) {
/* Adjust the computed rho. */ /* Adjust the computed rho. */
ihg = kernel_igamma / p->h; ihg = kernel_igamma / p->h;
ihg2 = ihg * ihg; ihg2 = ihg * ihg;
...@@ -376,14 +380,6 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ...@@ -376,14 +380,6 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
continue; continue;
} }
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
/* Compute this particle's time step. */ /* Compute this particle's time step. */
p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u ); p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
...@@ -395,6 +391,16 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ...@@ -395,6 +391,16 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
} }
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
}
/* Re-set the counter for the next loop (potentially). */ /* Re-set the counter for the next loop (potentially). */
count = redo; count = redo;
if ( count > 0 ) { if ( count > 0 ) {
...@@ -570,18 +576,18 @@ void *runner_main ( void *data ) { ...@@ -570,18 +576,18 @@ void *runner_main ( void *data ) {
switch ( t->type ) { switch ( t->type ) {
case task_type_self: case task_type_self:
if ( t->subtype == task_subtype_density ) if ( t->subtype == task_subtype_density )
runner_doself_density( r , ci ); runner_doself1_density( r , ci );
else if ( t->subtype == task_subtype_force ) else if ( t->subtype == task_subtype_force )
runner_doself_force( r , ci ); runner_doself2_force( r , ci );
else else
error( "Unknown task subtype." ); error( "Unknown task subtype." );
cell_unlocktree( ci ); cell_unlocktree( ci );
break; break;
case task_type_pair: case task_type_pair:
if ( t->subtype == task_subtype_density ) if ( t->subtype == task_subtype_density )
runner_dopair_density( r , ci , cj ); runner_dopair1_density( r , ci , cj );
else if ( t->subtype == task_subtype_force ) else if ( t->subtype == task_subtype_force )
runner_dopair_force( r , ci , cj ); runner_dopair2_force( r , ci , cj );
else else
error( "Unknown task subtype." ); error( "Unknown task subtype." );
cell_unlocktree( ci ); cell_unlocktree( ci );
...@@ -592,9 +598,9 @@ void *runner_main ( void *data ) { ...@@ -592,9 +598,9 @@ void *runner_main ( void *data ) {
break; break;
case task_type_sub: case task_type_sub:
if ( t->subtype == task_subtype_density ) if ( t->subtype == task_subtype_density )
runner_dosub_density( r , ci , cj , t->flags ); runner_dosub1_density( r , ci , cj , t->flags );
else if ( t->subtype == task_subtype_force ) else if ( t->subtype == task_subtype_force )
runner_dosub_force( r , ci , cj , t->flags ); runner_dosub2_force( r , ci , cj , t->flags );
else else
error( "Unknown task subtype." ); error( "Unknown task subtype." );
cell_unlocktree( ci ); cell_unlocktree( ci );
......
...@@ -75,6 +75,7 @@ enum { ...@@ -75,6 +75,7 @@ enum {
runner_counter_steal_stall, runner_counter_steal_stall,
runner_counter_steal_empty, runner_counter_steal_empty,
runner_counter_keep, runner_counter_keep,
runner_counter_iact,
runner_counter_count, runner_counter_count,
}; };
extern int runner_counter[ runner_counter_count ]; extern int runner_counter[ runner_counter_count ];
......
...@@ -26,53 +26,65 @@ ...@@ -26,53 +26,65 @@
#define PASTE(x,y) x ## _ ## y #define PASTE(x,y) x ## _ ## y
#define DOPAIR2(f) PASTE(runner_dopair,f) #define _DOPAIR1(f) PASTE(runner_dopair1,f)
#define DOPAIR DOPAIR2(FUNCTION) #define DOPAIR1 _DOPAIR1(FUNCTION)
#define DOPAIR_SUBSET2(f) PASTE(runner_dopair_subset,f) #define _DOPAIR2(f) PASTE(runner_dopair2,f)
#define DOPAIR_SUBSET DOPAIR_SUBSET2(FUNCTION) #define DOPAIR2 _DOPAIR2(FUNCTION)
#define DOPAIR_NAIVE2(f) PASTE(runner_dopair_naive,f) #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset,f)
#define DOPAIR_NAIVE DOPAIR_NAIVE2(FUNCTION) #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
#define DOSELF2(f) PASTE(runner_doself,f) #define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive,f)
#define DOSELF DOSELF2(FUNCTION) #define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION)
#define DOSELF_SUBSET2(f) PASTE(runner_doself_subset,f) #define _DOSELF_NAIVE(f) PASTE(runner_doself_naive,f)
#define DOSELF_SUBSET DOSELF_SUBSET2(FUNCTION) #define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)
#define DOSUB2(f) PASTE(runner_dosub,f) #define _DOSELF1(f) PASTE(runner_doself1,f)
#define DOSUB DOSUB2(FUNCTION) #define DOSELF1 _DOSELF1(FUNCTION)
#define DOSUB_SUBSET2(f) PASTE(runner_dosub_subset,f) #define _DOSELF2(f) PASTE(runner_doself2,f)
#define DOSUB_SUBSET DOSUB_SUBSET2(FUNCTION) #define DOSELF2 _DOSELF2(FUNCTION)
#define IACT_NONSYM2(f) PASTE(runner_iact_nonsym,f) #define _DOSELF_SUBSET(f) PASTE(runner_doself_subset,f)
#define IACT_NONSYM IACT_NONSYM2(FUNCTION) #define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
#define IACT2(f) PASTE(runner_iact,f) #define _DOSUB1(f) PASTE(runner_dosub1,f)
#define IACT IACT2(FUNCTION) #define DOSUB1 _DOSUB1(FUNCTION)
#define TIMER_DOSELF2(f) PASTE(runner_timer_doself,f) #define _DOSUB2(f) PASTE(runner_dosub2,f)
#define TIMER_DOSELF TIMER_DOSELF2(FUNCTION) #define DOSUB2 _DOSUB2(FUNCTION)
#define TIMER_DOPAIR2(f) PASTE(runner_timer_dopair,f) #define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset,f)
#define TIMER_DOPAIR TIMER_DOPAIR2(FUNCTION) #define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
#define TIMER_DOSUB2(f) PASTE(runner_timer_dosub,f) #define _IACT_NONSYM(f) PASTE(runner_iact_nonsym,f)
#define TIMER_DOSUB TIMER_DOSUB2(FUNCTION) #define IACT_NONSYM _IACT_NONSYM(FUNCTION)
#define TIMER_DOSELF_SUBSET2(f) PASTE(runner_timer_doself_subset,f) #define _IACT(f) PASTE(runner_iact,f)
#define TIMER_DOSELF_SUBSET TIMER_DOSELF_SUBSET2(FUNCTION) #define IACT _IACT(FUNCTION)
#define TIMER_DOPAIR_SUBSET2(f) PASTE(runner_timer_dopair_subset,f) #define _TIMER_DOSELF(f) PASTE(runner_timer_doself,f)
#define TIMER_DOPAIR_SUBSET TIMER_DOPAIR_SUBSET2(FUNCTION) #define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
#define IACT_NONSYM_VEC2(f) PASTE(runner_iact_nonsym_vec,f) #define _TIMER_DOPAIR(f) PASTE(runner_timer_dopair,f)
#define IACT_NONSYM_VEC IACT_NONSYM_VEC2(FUNCTION) #define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
#define IACT_VEC2(f) PASTE(runner_iact_vec,f) #define _TIMER_DOSUB(f) PASTE(runner_timer_dosub,f)
#define IACT_VEC IACT_VEC2(FUNCTION) #define TIMER_DOSUB _TIMER_DOSUB(FUNCTION)
#define _TIMER_DOSELF_SUBSET(f) PASTE(runner_timer_doself_subset,f)
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
#define _TIMER_DOPAIR_SUBSET(f) PASTE(runner_timer_dopair_subset,f)
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec,f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
#define _IACT_VEC(f) PASTE(runner_iact_vec,f)
#define IACT_VEC _IACT_VEC(FUNCTION)
...@@ -94,6 +106,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r ...@@ -94,6 +106,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
struct cpart *restrict cpi, *restrict cparts_i = ci->cparts; struct cpart *restrict cpi, *restrict cparts_i = ci->cparts;
double pix[3]; double pix[3];
float dx[3], hi, hi2, r2; float dx[3], hi, hi2, r2;
float dt_max = e->dt_max;
#ifdef VECTORIZE #ifdef VECTORIZE
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16))); float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
...@@ -104,6 +117,10 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r ...@@ -104,6 +117,10 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
#endif #endif
TIMER_TIC TIMER_TIC
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get the relative distance between the pairs, wrapping. */ /* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 ) if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
...@@ -191,6 +208,106 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r ...@@ -191,6 +208,106 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
} }
void DOSELF_NAIVE ( struct runner *r , struct cell *restrict c ) {
int pid, pjd, k, count = c->count;
struct part *restrict parts = c->parts;
struct cpart *restrict cpi, *restrict cpj,*restrict cparts = c->cparts;
double pix[3];
float dx[3], hi, hi2, r2;
float dt_max = r->e->dt_max;
#ifdef VECTORIZE
int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
float hiq[VEC_SIZE] __attribute__ ((aligned (16)));
float hjq[VEC_SIZE] __attribute__ ((aligned (16)));
float dxq[3*VEC_SIZE] __attribute__ ((aligned (16)));
struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
TIMER_TIC
/* Anything to do here? */
if ( c->dt_min > dt_max )
return;
/* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
tic = getticks(); */
/* Loop over the parts in ci. */
for ( pid = 0 ; pid < count ; pid++ ) {
/* Get a hold of the ith part in ci. */
cpi = &cparts[ pid ];
for ( k = 0 ; k < 3 ; k++ )
pix[k] = cpi->x[k];
hi = cpi->h;
hi2 = hi * hi;
/* Loop over the parts in cj. */
for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
/* Get a pointer to the jth particle. */
cpj = &cparts[ pjd ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
#ifndef VECTORIZE
IACT( r2 , dx , hi , cpj->h , &parts[ pid ] , &parts[pjd] );
#else
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3*icount+0] = dx[0];
dxq[3*icount+1] = dx[1];
dxq[3*icount+2] = dx[2];
hiq[icount] = hi;
hjq[icount] = cpj->h;
piq[icount] = &parts[ pid ];
pjq[icount] = &parts[ pjd ];
icount += 1;
/* Flush? */
if ( icount == VEC_SIZE ) {
IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
icount = 0;
}
#endif
}
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
#ifdef VECTORIZE
/* Pick up any leftovers. */
if ( icount > 0 )
for ( k = 0 ; k < icount ; k++ )
IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
#endif
#ifdef TIMER_VERBOSE
printf( "runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
#else
TIMER_TOC(TIMER_DOSELF);
#endif
}
/** /**
* @brief Compute the interactions between a cell pair, but only for the * @brief Compute the interactions between a cell pair, but only for the
* given indices in ci. * given indices in ci.
...@@ -507,7 +624,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * ...@@ -507,7 +624,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
* @param cj The second #cell. * @param cj The second #cell.
*/ */
void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) { void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
struct engine *restrict e = r->e; struct engine *restrict e = r->e;
int pid, pjd, k, sid; int pid, pjd, k, sid;
...@@ -522,6 +639,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -522,6 +639,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
double hi_max, hj_max; double hi_max, hj_max;
double di_max, dj_min; double di_max, dj_min;
int count_i, count_j; int count_i, count_j;
float dt_max = e->dt_max;
#ifdef VECTORIZE #ifdef VECTORIZE
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__ ((aligned (16))); float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
...@@ -532,6 +650,10 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -532,6 +650,10 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
#endif #endif
TIMER_TIC TIMER_TIC
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get the relative distance between the pairs, wrapping. */ /* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 ) if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
...@@ -594,6 +716,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -594,6 +716,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Get a hold of the ith part in ci. */ /* Get a hold of the ith part in ci. */
pi = &parts_i[ sort_i[ pid ].i ]; pi = &parts_i[ sort_i[ pid ].i ];
cpi = &cparts_i[ sort_i[ pid ].i ]; cpi = &cparts_i[ sort_i[ pid ].i ];
if ( cpi->dt > dt_max )
continue;
hi = cpi->h; hi = cpi->h;
di = sort_i[pid].d + hi - rshift; di = sort_i[pid].d + hi - rshift;
if ( di < dj_min ) if ( di < dj_min )
...@@ -621,7 +745,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -621,7 +745,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
#ifndef VECTORIZE #ifndef VECTORIZE
IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] ); IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[pjd].i ] );
#else #else
...@@ -638,7 +762,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -638,7 +762,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Flush? */ /* Flush? */
if ( icount == VEC_SIZE ) { if ( icount == VEC_SIZE ) {
IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq ); IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
icount = 0; icount = 0;
} }
...@@ -659,6 +783,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -659,6 +783,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
/* Get a hold of the jth part in cj. */ /* Get a hold of the jth part in cj. */
pj = &parts_j[ sort_j[ pjd ].i ]; pj = &parts_j[ sort_j[ pjd ].i ];
cpj = &cparts_j[ sort_j[ pjd ].i ]; cpj = &cparts_j[ sort_j[ pjd ].i ];
if ( cpj->dt > dt_max )
continue;
hj = cpj->h;