From d9ebdcfe7ea2d6293ff2c33990b83a3ecfc8c38c Mon Sep 17 00:00:00 2001 From: Matthieu Schaller <matthieu.schaller@durham.ac.uk> Date: Fri, 8 Sep 2017 22:58:42 +0200 Subject: [PATCH] DOPAIR1() should do its business in the frame of ci --- src/runner_doiact.h | 146 +++++++++++++------------------------------- 1 file changed, 44 insertions(+), 102 deletions(-) diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 737418dabf..b48b84286c 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -500,8 +500,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, struct cell *restrict cj) { - error("FUCK"); - struct engine *e = r->e; #ifdef WITH_OLD_VECTORIZATION @@ -799,17 +797,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const struct engine *restrict e = r->e; -// DOPAIR1_NAIVE(r, ci, cj); -// return; - -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif + // DOPAIR1_NAIVE(r, ci, cj); + // return; TIMER_TIC; @@ -873,28 +862,34 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[sort_i[pid].i]; - if (!part_is_active(pi, e)) continue; const float hi = pi->h; + + /* Skip inactive particles */ + if (!part_is_active(pi, e)) continue; + + /* Is there anything we need to interact with ? */ const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + /* Get some additional information about pi */ const float hig2 = hi * hi * kernel_gamma2; + const double pix = pi->x[0] - ci->loc[0] - shift[0]; + const double piy = pi->x[1] - ci->loc[1] - shift[1]; + const double piz = pi->x[2] - ci->loc[2] - shift[2]; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; + /* Recover pj */ + struct part *pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + const float pjx = pj->x[0] - ci->loc[0]; + const float pjy = pj->x[1] - ci->loc[1]; + const float pjz = pj->x[2] - ci->loc[2]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -907,37 +902,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_OLD_VECTORIZATION - - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - - } /* Cell ci is active */ + } /* loop over the parts in ci. */ + } /* Cell ci is active */ if (cell_is_active(cj, e)) { @@ -946,29 +915,35 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjd++) { /* Get a hold of the jth part in cj. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - if (!part_is_active(pj, e)) continue; + struct part *pj = &parts_j[sort_j[pjd].i]; const float hj = pj->h; + + /* Skip inactive particles */ + if (!part_is_active(pj, e)) continue; + + /* Is there anything we need to interact with ? */ const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; - double pjx[3]; - for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + /* Get some additional information about pj */ const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = pj->x[0] - ci->loc[0]; + const float pjy = pj->x[1] - ci->loc[1]; + const float pjz = pj->x[2] - ci->loc[2]; /* Loop over the parts in ci. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { - /* Get a pointer to the jth particle. */ - struct part *restrict pi = &parts_i[sort_i[pid].i]; + /* Recover pi */ + struct part *pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; + const float pix = pi->x[0] - ci->loc[0] - shift[0]; + const float piy = pi->x[1] - ci->loc[1] - shift[1]; + const float piz = pi->x[2] - ci->loc[2] - shift[2]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pjx[k] - pi->x[k]; - r2 += dx[k] * dx[k]; - } + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -981,44 +956,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hjg2) { -#ifndef WITH_OLD_VECTORIZATION - - IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hj; - hjq[icount] = pi->h; - piq[icount] = pj; - pjq[icount] = pi; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - } /* loop over the parts in ci. */ - - } /* loop over the parts in cj. */ - - } /* Cell cj is active */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif + } /* loop over the parts in cj. */ + } /* Cell cj is active */ TIMER_TOC(TIMER_DOPAIR); } @@ -1233,7 +1175,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { first_pi -= 1; if (first_pi < 0) first_pi = 0; - /* Get some additional information about pi */ + /* Get some additional information about pj */ const float hjg2 = hj * hj * kernel_gamma2; const float pjx = pj->x[0] - ci->loc[0]; const float pjy = pj->x[1] - ci->loc[1]; -- GitLab