diff --git a/src/runner.c b/src/runner.c index 9be0a9ee2ce23888d04346679ccb36fdc6f13a02..36074144e14ec31741ee5964d8e2b98c9673192b 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1818,13 +1818,8 @@ void *runner_main(void *data) { break; case task_type_pair: - if (t->subtype == task_subtype_density) { -#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) - runner_dopair1_density_vec(r, ci, cj); -#else - runner_dopair1_density(r, ci, cj); -#endif - } + if (t->subtype == task_subtype_density) + runner_dopair1_branch_density(r, ci, cj); #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) runner_dopair1_gradient(r, ci, cj); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index f4513ca75b994c51a74011f0da4ee6863a625968..03fb0c07b70a98deda37f4f94a37d468240c94b4 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -885,7 +885,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, * @param ci The first #cell. * @param cj The second #cell. */ -void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double *shift) { const struct engine *restrict e = r->e; @@ -900,22 +900,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { TIMER_TIC; - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) - runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) - runner_do_sort(r, cj, (1 << sid), 1); - /* Get the cutoff shift. */ double rshift = 0.0; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; @@ -1116,6 +1100,49 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { TIMER_TOC(TIMER_DOPAIR); } +/** + * @brief Determine which version of DOPAIR1 needs to be called depending on the orientation of the cells or whether DOPAIR1 needs to be called at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + /* Check that cells are drifted. */ + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells."); + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + /* Have the cells been sorted? */ + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); + + /* Have the cells been sorted? */ + if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) + error("Trying to interact unsorted cells."); + +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && (DOPAIR1_BRANCH == runner_dopair1_density_branch) + if(!sort_is_corner(sid)) + runner_dopair1_density_vec(r, ci, cj, sid, shift); + else + DOPAIR1(r, ci, cj, sid, shift); +#else + DOPAIR1(r, ci, cj, sid, shift); +#endif +} + /** * @brief Compute the interactions between a cell pair (symmetric) * @@ -2291,12 +2318,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ -#if (DOPAIR1 == runner_dopair1_density) && defined(WITH_VECTORIZATION) && \ - defined(GADGET2_SPH) - runner_dopair1_density_vec(r, ci, cj); -#else - DOPAIR1(r, ci, cj); -#endif + DOPAIR1_BRANCH(r, ci, cj); } if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index a302817f7fec4089f1046eec5cf7ff09aadd25a6..84c9b3a4bae78d5b5592e54afd962d560bda7b0d 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -284,7 +284,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions( __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( const struct cell *ci, const struct cell *cj, const struct entry *restrict sort_i, const struct entry *restrict sort_j, - const float dx_max, const float rshift, float *max_di, float *max_dj, + const float dx_max, const float rshift, const double hi_max, const double hj_max, + const double di_max, const double dj_min, float *max_di, float *max_dj, int *init_pi, int *init_pj, const struct engine *e) { struct part *restrict parts_i = ci->parts; @@ -293,10 +294,6 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( float h, d; - /* Get the distance of the last pi and the first pj on the sorted axis.*/ - const float di_max = sort_i[ci->count - 1].d - rshift; - const float dj_min = sort_j[0].d; - int first_pi = 0, last_pj = cj->count - 1; /* Find the first active particle in ci to interact with any particle in cj. @@ -306,13 +303,13 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( for (int k = ci->count - 1; k >= 0; k--) { p = &parts_i[sort_i[k].i]; h = p->h; - d = sort_i[k].d + h * kernel_gamma + dx_max - rshift; + d = sort_i[k].d + dx_max; - max_di[k] = d; + max_di[k] = d + h * kernel_gamma - rshift; /* If the particle is out of range set the index to * the last active particle within range. */ - if (d < dj_min) { + if (d + hi_max < dj_min) { first_pi = active_id; break; } else { @@ -331,13 +328,14 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( for (int k = 0; k < cj->count; k++) { p = &parts_j[sort_j[k].i]; h = p->h; - d = sort_j[k].d - h * kernel_gamma - dx_max - rshift; + d = sort_j[k].d - dx_max; - max_dj[k] = d; + /*TODO: don't think rshift should be taken off here, waiting on Pedro. */ + max_dj[k] = d - h * kernel_gamma - rshift; /* If the particle is out of range set the index to * the last active particle within range. */ - if (d > di_max) { + if (d - hj_max > di_max) { last_pj = active_id; break; } else { @@ -613,7 +611,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( * @param cj The second #cell. */ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, - struct cell *cj) { + struct cell *cj, const int sid, const double *shift) { #ifdef WITH_VECTORIZATION const struct engine *restrict e = r->e; @@ -622,22 +620,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, TIMER_TIC; - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) - runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) - runner_do_sort(r, cj, (1 << sid), 1); - /* Get the cutoff shift. */ double rshift = 0.0; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; @@ -726,8 +708,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* Also find the first pi that interacts with any particle in cj and the last * pj that interacts with any particle in ci. */ - populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di, - max_dj, &first_pi, &last_pj, e); + populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max, + hj_max, di_max, dj_min, max_di, max_dj, &first_pi, &last_pj, e); /* Find the maximum index into cj that is required by a particle in ci. */ /* Find the maximum index into ci that is required by a particle in cj. */ @@ -777,6 +759,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct part *restrict pi = &parts_i[sort_i[pid].i]; if (!part_is_active(pi, e)) continue; + /* Set the cache index. */ + int ci_cache_idx = pid - first_pi_align; + + const float hi = ci_cache->h[ci_cache_idx]; + const double di_test = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; + if (di_test < dj_min) continue; + /* Determine the exit iteration of the interaction loop. */ dj = sort_j[max_ind_j].d; while (max_ind_j > 0 && max_di[pid] < dj) { @@ -786,12 +775,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } int exit_iteration = max_ind_j + 1; - /* Set the cache index. */ - int ci_cache_idx = pid - first_pi_align; - - const float hi = ci_cache->h[ci_cache_idx]; const float hig2 = hi * hi * kernel_gamma2; - + vector pix, piy, piz; /* Fill particle pi vectors. */ @@ -910,6 +895,14 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct part *restrict pj = &parts_j[sort_j[pjd].i]; if (!part_is_active(pj, e)) continue; + /* Set the cache index. */ + int cj_cache_idx = pjd; + + /*TODO: rshift term. */ + const float hj = cj_cache->h[cj_cache_idx]; + const double dj_test = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; + if (dj_test > di_max) continue; + /* Determine the exit iteration of the interaction loop. */ di = sort_i[max_ind_i].d; while (max_ind_i < count_i - 1 && max_dj[pjd] > di) { @@ -919,10 +912,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } int exit_iteration = max_ind_i; - /* Set the cache index. */ - int cj_cache_idx = pjd; - - const float hj = cj_cache->h[cj_cache_idx]; const float hjg2 = hj * hj * kernel_gamma2; vector pjx, pjy, pjz; diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_vec.h index e252083ae743248a5f23d1772c2e770c5e1c6c14..9e0ed83a167c1b78c5e51221ced20a7dea792e72 100644 --- a/src/runner_doiact_vec.h +++ b/src/runner_doiact_vec.h @@ -35,8 +35,7 @@ /* Function prototypes. */ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c); -void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c); void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj); + struct cell *restrict cj, const int sid, const double *shift); #endif /* SWIFT_RUNNER_VEC_H */ diff --git a/tests/test27cells.c b/tests/test27cells.c index 2377cef7b36ca347e8e8729b9f1f3a690d92d164..5e133ab960de4dd25d2f83775a6ff8ceda6a706f 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -34,7 +34,8 @@ #if defined(WITH_VECTORIZATION) #define DOSELF1 runner_doself1_density_vec -#define DOPAIR1 runner_dopair1_density_vec +//#define DOPAIR1 runner_dopair1_density_vec +#define DOPAIR1 runner_dopair1_branch_density #define DOSELF1_NAME "runner_doself1_density_vec" #define DOPAIR1_NAME "runner_dopair1_density_vec" #endif @@ -45,7 +46,7 @@ #endif #ifndef DOPAIR1 -#define DOPAIR1 runner_dopair1_density +#define DOPAIR1 runner_dopair1_branch_density #define DOPAIR1_NAME "runner_dopair1_density" #endif @@ -312,9 +313,7 @@ int check_results(struct part *serial_parts, struct part *vec_parts, int count, /* Just a forward declaration... */ void runner_doself1_density(struct runner *r, struct cell *ci); void runner_doself1_density_vec(struct runner *r, struct cell *ci); -void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); -void runner_dopair1_density_vec(struct runner *r, struct cell *ci, - struct cell *cj); +void runner_dopair1_branch_density(struct runner *r, struct cell *ci, struct cell *cj); /* And go... */ int main(int argc, char *argv[]) {