diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 7432f48d0087086212e4786386417f480c3b05f2..cabf2cc69a8c93a9f667126e5767c7150d3ac71e 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -35,6 +35,9 @@ #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset,f) #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) +#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive,f) +#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) + #define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive,f) #define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION) @@ -511,6 +514,123 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * } +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #parts to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ + +void DOPAIR_SUBSET_NAIVE ( struct runner *r , struct cell *restrict ci , struct part *restrict parts_i , int *restrict ind , int count , struct cell *restrict cj ) { + + struct engine *e = r->e; + int pid, pjd, k, count_j = cj->count; + double shift[3] = { 0.0 , 0.0 , 0.0 }; + struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts; + double pix[3]; + float dx[3], hi, hig2, r2; + #ifdef VECTORIZE + int icount = 0; + float r2q[VEC_SIZE] __attribute__ ((aligned (16))); + float hiq[VEC_SIZE] __attribute__ ((aligned (16))); + float hjq[VEC_SIZE] __attribute__ ((aligned (16))); + float dxq[3*VEC_SIZE] __attribute__ ((aligned (16))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; + #endif + TIMER_TIC + + /* Get the relative distance between the pairs, wrapping. */ + for ( k = 0 ; k < 3 ; k++ ) { + if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 ) + shift[k] = e->s->dim[k]; + else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 ) + shift[k] = -e->s->dim[k]; + } + + /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" , + ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] , + ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); + tic = getticks(); */ + + /* Loop over the parts_i. */ + for ( pid = 0 ; pid < count ; pid++ ) { + + /* Get a hold of the ith part in ci. */ + pi = &parts_i[ ind[ pid ] ]; + for ( k = 0 ; k < 3 ; k++ ) + pix[k] = pi->x[k] - shift[k]; + hi = pi->h; + hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for ( pjd = 0 ; pjd < count_j ; pjd++ ) { + + /* Get a pointer to the jth particle. */ + pj = &parts_j[ pjd ]; + + /* Compute the pairwise distance. */ + r2 = 0.0f; + for ( k = 0 ; k < 3 ; k++ ) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k]*dx[k]; + } + + /* Hit or miss? */ + if ( r2 < hig2 ) { + + #ifndef VECTORIZE + + IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); + + #else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3*icount+0] = dx[0]; + dxq[3*icount+1] = dx[1]; + dxq[3*icount+2] = dx[2]; + hiq[icount] = hi; + hjq[icount] = pj->h; + piq[icount] = pi; + pjq[icount] = pj; + icount += 1; + + /* Flush? */ + if ( icount == VEC_SIZE ) { + IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq ); + icount = 0; + } + + #endif + + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + #ifdef VECTORIZE + /* Pick up any leftovers. */ + if ( icount > 0 ) + for ( k = 0 ; k < icount ; k++ ) + IACT_NONSYM( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] ); + #endif + + #ifdef TIMER_VERBOSE + printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 ); + #else + TIMER_TOC(timer_dopair_subset); + #endif + + + } + + /** * @brief Compute the interactions between a cell pair, but only for the * given indices in ci.