From d9ebdcfe7ea2d6293ff2c33990b83a3ecfc8c38c Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <matthieu.schaller@durham.ac.uk>
Date: Fri, 8 Sep 2017 22:58:42 +0200
Subject: [PATCH] DOPAIR1() should do its business in the frame of ci

---
 src/runner_doiact.h | 146 +++++++++++++-------------------------------
 1 file changed, 44 insertions(+), 102 deletions(-)

diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 737418dabf..b48b84286c 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -500,8 +500,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts_i, int *restrict ind, int count,
                    struct cell *restrict cj) {
 
-  error("FUCK");
-
   struct engine *e = r->e;
 
 #ifdef WITH_OLD_VECTORIZATION
@@ -799,17 +797,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
   const struct engine *restrict e = r->e;
 
-// DOPAIR1_NAIVE(r, ci, cj);
-// return;
-
-#ifdef WITH_OLD_VECTORIZATION
-  int icount = 0;
-  float r2q[VEC_SIZE] __attribute__((aligned(16)));
-  float hiq[VEC_SIZE] __attribute__((aligned(16)));
-  float hjq[VEC_SIZE] __attribute__((aligned(16)));
-  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
-  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
-#endif
+  // DOPAIR1_NAIVE(r, ci, cj);
+  // return;
 
   TIMER_TIC;
 
@@ -873,28 +862,34 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
       /* Get a hold of the ith part in ci. */
       struct part *restrict pi = &parts_i[sort_i[pid].i];
-      if (!part_is_active(pi, e)) continue;
       const float hi = pi->h;
+
+      /* Skip inactive particles */
+      if (!part_is_active(pi, e)) continue;
+
+      /* Is there anything we need to interact with ? */
       const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
       if (di < dj_min) continue;
 
-      double pix[3];
-      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+      /* Get some additional information about pi */
       const float hig2 = hi * hi * kernel_gamma2;
+      const double pix = pi->x[0] - ci->loc[0] - shift[0];
+      const double piy = pi->x[1] - ci->loc[1] - shift[1];
+      const double piz = pi->x[2] - ci->loc[2] - shift[2];
 
       /* Loop over the parts in cj. */
       for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        /* Recover pj */
+        struct part *pj = &parts_j[sort_j[pjd].i];
+        const float hj = pj->h;
+        const float pjx = pj->x[0] - ci->loc[0];
+        const float pjy = pj->x[1] - ci->loc[1];
+        const float pjz = pj->x[2] - ci->loc[2];
 
         /* Compute the pairwise distance. */
-        float r2 = 0.0f;
-        float dx[3];
-        for (int k = 0; k < 3; k++) {
-          dx[k] = pix[k] - pj->x[k];
-          r2 += dx[k] * dx[k];
-        }
+        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that particles have been drifted to the current time */
@@ -907,37 +902,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_OLD_VECTORIZATION
-
-          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
-
-#else
-
-          /* Add this interaction to the queue. */
-          r2q[icount] = r2;
-          dxq[3 * icount + 0] = dx[0];
-          dxq[3 * icount + 1] = dx[1];
-          dxq[3 * icount + 2] = dx[2];
-          hiq[icount] = hi;
-          hjq[icount] = pj->h;
-          piq[icount] = pi;
-          pjq[icount] = pj;
-          icount += 1;
-
-          /* Flush? */
-          if (icount == VEC_SIZE) {
-            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
-            icount = 0;
-          }
-
-#endif
+          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
         }
-
       } /* loop over the parts in cj. */
-
-    } /* loop over the parts in ci. */
-
-  } /* Cell ci is active */
+    }   /* loop over the parts in ci. */
+  }     /* Cell ci is active */
 
   if (cell_is_active(cj, e)) {
 
@@ -946,29 +915,35 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
          pjd++) {
 
       /* Get a hold of the jth part in cj. */
-      struct part *restrict pj = &parts_j[sort_j[pjd].i];
-      if (!part_is_active(pj, e)) continue;
+      struct part *pj = &parts_j[sort_j[pjd].i];
       const float hj = pj->h;
+
+      /* Skip inactive particles */
+      if (!part_is_active(pj, e)) continue;
+
+      /* Is there anything we need to interact with ? */
       const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift;
       if (dj - rshift > di_max) continue;
 
-      double pjx[3];
-      for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+      /* Get some additional information about pj */
       const float hjg2 = hj * hj * kernel_gamma2;
+      const float pjx = pj->x[0] - ci->loc[0];
+      const float pjy = pj->x[1] - ci->loc[1];
+      const float pjz = pj->x[2] - ci->loc[2];
 
       /* Loop over the parts in ci. */
       for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
 
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pi = &parts_i[sort_i[pid].i];
+        /* Recover pi */
+        struct part *pi = &parts_i[sort_i[pid].i];
+        const float hi = pi->h;
+        const float pix = pi->x[0] - ci->loc[0] - shift[0];
+        const float piy = pi->x[1] - ci->loc[1] - shift[1];
+        const float piz = pi->x[2] - ci->loc[2] - shift[2];
 
         /* Compute the pairwise distance. */
-        float r2 = 0.0f;
-        float dx[3];
-        for (int k = 0; k < 3; k++) {
-          dx[k] = pjx[k] - pi->x[k];
-          r2 += dx[k] * dx[k];
-        }
+        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that particles have been drifted to the current time */
@@ -981,44 +956,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
         /* Hit or miss? */
         if (r2 < hjg2) {
 
-#ifndef WITH_OLD_VECTORIZATION
-
-          IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
-
-#else
-
-          /* Add this interaction to the queue. */
-          r2q[icount] = r2;
-          dxq[3 * icount + 0] = dx[0];
-          dxq[3 * icount + 1] = dx[1];
-          dxq[3 * icount + 2] = dx[2];
-          hiq[icount] = hj;
-          hjq[icount] = pi->h;
-          piq[icount] = pj;
-          pjq[icount] = pi;
-          icount += 1;
-
-          /* Flush? */
-          if (icount == VEC_SIZE) {
-            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
-            icount = 0;
-          }
-
-#endif
+          IACT_NONSYM(r2, dx, hj, hi, pj, pi);
         }
-
       } /* loop over the parts in ci. */
-
-    } /* loop over the parts in cj. */
-
-  } /* Cell cj is active */
-
-#ifdef WITH_OLD_VECTORIZATION
-  /* Pick up any leftovers. */
-  if (icount > 0)
-    for (int k = 0; k < icount; k++)
-      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
-#endif
+    }   /* loop over the parts in cj. */
+  }     /* Cell cj is active */
 
   TIMER_TOC(TIMER_DOPAIR);
 }
@@ -1233,7 +1175,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
       first_pi -= 1;
       if (first_pi < 0) first_pi = 0;
 
-      /* Get some additional information about pi */
+      /* Get some additional information about pj */
       const float hjg2 = hj * hj * kernel_gamma2;
       const float pjx = pj->x[0] - ci->loc[0];
       const float pjy = pj->x[1] - ci->loc[1];
-- 
GitLab