From f54a189fa1e929ed7ba3fce19eedbcffa1d25c1d Mon Sep 17 00:00:00 2001 From: James Willis <james.s.willis@durham.ac.uk> Date: Fri, 13 Jan 2017 21:16:58 +0000 Subject: [PATCH] Fixed bug with intrinsic dopair1. Break in search loop and there is no need to pad cache and check for r2>0 in dopair as particles past the threshold will return false in r2<hig2. --- src/runner_doiact_vec.c | 49 +++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 4f5083da52..6f081f2624 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -895,6 +895,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( #endif /* WITH_VECTORIZATION */ } +float max_di[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into ci */ +float max_dj[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into cj */ + /** * @brief Compute the interactions between a cell pair (non-symmetric). * @@ -1013,9 +1016,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * int exit_iteration = count_j; for (int pjd = 0; pjd < count_j ; pjd++) { - if(sort_j[pjd].d >= di) exit_iteration = pjd; + if(sort_j[pjd].d > di) { + exit_iteration = pjd; + break; + } } - + /* Pad cache if there is a serial remainder. */ int exit_iteration_align = exit_iteration; int rem = exit_iteration % (num_vec_proc * VEC_SIZE); @@ -1023,13 +1029,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * int pad = (num_vec_proc * VEC_SIZE) - rem; exit_iteration_align += pad; - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = exit_iteration; i < exit_iteration_align; i++) { - cj_cache.x[i] = pix.f[0]; - cj_cache.y[i] = piy.f[0]; - cj_cache.z[i] = piz.f[0]; - } } vector pjx, pjy, pjz; @@ -1061,15 +1060,14 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v); v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v); - vector v_doi_mask, v_doi_mask_check; + vector v_doi_mask; int doi_mask; - /* Form r2 > 0 mask and r2 < hig2 mask. */ - v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero()); + /* Form r2 < hig2 mask. */ v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v); - /* Combine two masks and form integer mask. */ - doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v)); + /* Form integer mask. */ + doi_mask = vec_cmp_result(v_doi_mask.v); /* If there are any interactions left pack interaction values into c2 * cache. */ @@ -1181,7 +1179,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * int exit_iteration = 0; for (int pid = count_i - 1; pid >= 0; pid--) { - if(sort_i[pid].d <= dj) exit_iteration = pid; + if(sort_i[pid].d < dj) { + exit_iteration = pid; + break; + } } /* Pad cache if there is a serial remainder. */ @@ -1191,13 +1192,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * int pad = (num_vec_proc * VEC_SIZE) - rem; exit_iteration_align -= pad; - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = exit_iteration; i >= exit_iteration_align; i--) { - ci_cache->x[i] = pjx.f[0]; - ci_cache->y[i] = pjy.f[0]; - ci_cache->z[i] = pjz.f[0]; - } } vector pix, piy, piz; @@ -1230,15 +1224,14 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v); v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v); - vector v_doj_mask, v_doj_mask_check; + vector v_doj_mask; int doj_mask; - /* Form r2 > 0 mask and r2 < hig2 mask. */ - v_doj_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero()); + /* Form r2 < hig2 mask. */ v_doj_mask.v = vec_cmp_lt(v_r2.v, v_hjg2.v); - /* Combine two masks and form integer mask. */ - doj_mask = vec_cmp_result(vec_and(v_doj_mask.v, v_doj_mask_check.v)); + /* Form integer mask. */ + doj_mask = vec_cmp_result(v_doj_mask.v); /* If there are any interactions left pack interaction values into c2 * cache. */ @@ -1305,8 +1298,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * #endif /* WITH_VECTORIZATION */ } -float max_di[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into ci */ -float max_dj[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into cj */ /** * @brief Compute the interactions between a cell pair (non-symmetric). -- GitLab