From 2b50cf09ab8648d923c69a7704f0fe848afa8628 Mon Sep 17 00:00:00 2001
From: James Willis <james.s.willis@durham.ac.uk>
Date: Wed, 14 Dec 2016 11:23:12 +0000
Subject: [PATCH] Use unsorted cache for particle properties in first loop.

---
 src/runner_doiact_vec.c | 60 ++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index db4d3a32cf..9e9eaf0864 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -926,6 +926,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
   const double dj_min = sort_j[0].d;
   const float dx_max = (ci->dx_max + cj->dx_max);
 
+  /* Get the particle cache from the runner and re-allocate
+   * the cache if it is not big enough for the cell. */
+  struct cache *restrict ci_cache = &r->par_cache;
+
+  if (ci_cache->count < count_i) {
+    cache_init(ci_cache, count_i);
+  }
+  if (cj_cache.count < count_j) {
+    cache_init(&cj_cache, count_j);
+  }
+
+  cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
+
   /* Loop over the parts in ci. */
   for (int pid = count_i - 1;
        pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
@@ -933,12 +946,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts_i[sort_i[pid].i];
     if (!part_is_active(pi, e)) continue;
-    const float hi = pi->h;
+
+    int ci_cache_idx = sort_i[pid].i;
+
+    //const float hi = pi->h;
+    const float hi = ci_cache->h[ci_cache_idx];
     const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
     if (di < dj_min) continue;
 
     double pix[3];
-    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    //for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    pix[0] = ci_cache->x[ci_cache_idx];
+    pix[1] = ci_cache->y[ci_cache_idx];
+    pix[2] = ci_cache->z[ci_cache_idx];
     const float hig2 = hi * hi * kernel_gamma2;
 
     //vector pix, piy, piz;
@@ -975,19 +995,30 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     curlvySum.v = vec_setzero();
     curlvzSum.v = vec_setzero();
 
+    //int exit_iteration = count_j;
+    //for (int pjd = 0; pjd < count_j ; pjd++) {
+    //  if(sort_j[pjd].d >= di) exit_iteration = pjd;
+    //}
+
     /* Loop over the parts in cj. */
     for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+    //for (int pjd = 0; pjd < exit_iteration; pjd++) {
 
+      int cj_cache_idx = sort_j[pjd].i;
       /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[sort_j[pjd].i];
+      //struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
       /* Compute the pairwise distance. */
-      float r2 = 0.0f;
+      float r2;// = 0.0f;
       float dx[3];
-      for (int k = 0; k < 3; k++) {
-        dx[k] = pix[k] - pj->x[k];
-        r2 += dx[k] * dx[k];
-      }
+      //for (int k = 0; k < 3; k++) {
+      //  dx[k] = pix[k] - pj->x[k] + ci->loc[k];
+      //  r2 += dx[k] * dx[k];
+      //}
+      dx[0] = pix[0] - cj_cache.x[cj_cache_idx];
+      dx[1] = pix[1] - cj_cache.y[cj_cache_idx];
+      dx[2] = pix[2] - cj_cache.z[cj_cache_idx];
+      r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
       /* Hit or miss? */
       if (r2 < hig2) {
@@ -997,15 +1028,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
         int_cache.dxq[icount] = dx[0];
         int_cache.dyq[icount] = dx[1];
         int_cache.dzq[icount] = dx[2];
-        int_cache.mq[icount] = pj->mass;
-        int_cache.vxq[icount] = pj->v[0];
-        int_cache.vyq[icount] = pj->v[1];
-        int_cache.vzq[icount] = pj->v[2];
+        int_cache.mq[icount] = cj_cache.m[cj_cache_idx];//pj->mass;
+        int_cache.vxq[icount] = cj_cache.vx[cj_cache_idx];//pj->v[0];
+        int_cache.vyq[icount] = cj_cache.vy[cj_cache_idx];//pj->v[1];
+        int_cache.vzq[icount] = cj_cache.vz[cj_cache_idx];//pj->v[2];
         
-        hjq[icount] = pj->h;
-        pjq[icount] = pj;
-
-        icount += 1;
+        icount++;
       }
 
     } /* loop over the parts in cj. */
-- 
GitLab