printf("runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n",r->id,count_i,count_j,ci->depth,ci->h_max,cj->h_max,((double)TIMER_TOC(TIMER_DOPAIR))/CPU_TPS*1000);
#else
...
...
@@ -168,6 +213,14 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
doublepix[3];
floatdx[3],hi,hi2,r2,di;
structentry*sort_j;
#ifdef VECTORIZE
inticount=0;
floatr2q[VEC_SIZE]__attribute__((aligned(16)));
floathiq[VEC_SIZE]__attribute__((aligned(16)));
floathjq[VEC_SIZE]__attribute__((aligned(16)));
floatdxq[3*VEC_SIZE]__attribute__((aligned(16)));
structpart*piq[VEC_SIZE],*pjq[VEC_SIZE];
#endif
TIMER_TIC
/* Get the relative distance between the pairs, wrapping. */
...
...
@@ -224,7 +277,30 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
printf("runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n",r->id,count_i,count_j,ci->depth,ci->h_max,cj->h_max,((double)TIMER_TOC(TIMER_DOPAIR))/CPU_TPS*1000);
#else
...
...
@@ -304,6 +410,14 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
printf("runner_doself_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n",r->id,count_i,count_j,ci->depth,ci->h_max,cj->h_max,((double)TIMER_TOC(TIMER_DOPAIR))/CPU_TPS*1000);
printf("runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) took %.3f ms.\n",r->id,count_i,count_j,ci->depth,ci->h_max,cj->h_max,fmax(ci->h[0],fmax(ci->h[1],ci->h[2])),((double)(TIMER_TOC(TIMER_DOPAIR)))/CPU_TPS*1000);