From 33a74261f3aa5d19d38cfaf446a8c5be393968c6 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller <matthieu.schaller@durham.ac.uk> Date: Thu, 7 Sep 2017 12:40:14 +0200 Subject: [PATCH] Also make the DOSELF2_NAIVE function care about active/inactive status. --- src/runner_doiact.h | 50 +++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 541f1b0e70..b4eb2a13df 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -357,15 +357,6 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { error("Don't use in actual runs ! Slow code !"); #endif -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif - TIMER_TIC; /* Anything to do here? */ @@ -382,12 +373,15 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; + const int pi_active = part_is_active(pi, e); /* Loop over the parts in cj. */ for (int pjd = pid + 1; pjd < count; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + const float hj = pj->h; + const int pj_active = part_is_active(pj, e); /* Compute the pairwise distance. */ float r2 = 0.0f; @@ -396,47 +390,31 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } + const float hjg2 = hj * hj * kernel_gamma2; /* Hit or miss? */ - if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { + if (r2 < hig2 || r2 < hjg2) { -#ifndef WITH_OLD_VECTORIZATION + if (pi_active && pj_active) { - IACT(r2, dx, hi, pj->h, pi, pj); + IACT(r2, dx, hi, hj, pi, pj); + } else if (pi_active) { -#else + IACT_NONSYM(r2, dx, hi, hj, pi, pj); + } else if (pj_active) { - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - -#endif } } /* loop over the parts in cj. */ } /* loop over the parts in ci. */ -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif - TIMER_TOC(TIMER_DOSELF); } -- GitLab