From f159a52ff2c66fa1c2ed06b0a6de770dabbc0049 Mon Sep 17 00:00:00 2001
From: James Willis <james.s.willis@durham.ac.uk>
Date: Tue, 14 Mar 2017 21:03:08 +0000
Subject: [PATCH] Perform padding of the cache when populating it instead of on
 the fly. Create fake particles to be maxed out in the inner interaction loop.

---
 src/cache.h             |  7 +++++++
 src/runner_doiact_vec.c | 21 +++------------------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/cache.h b/src/cache.h
index 8f4cc3d987..7f5624a076 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -340,6 +340,9 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     ci_cache->vy[ci_cache_idx] = ci->parts[idx].v[1];
     ci_cache->vz[ci_cache_idx] = ci->parts[idx].v[2];
   }
+  float fake_pix = 2.0f * ci_cache->x[ci->count - 1];
+  for(int i=ci->count - first_pi_align; i<ci->count - first_pi_align + VEC_SIZE; i++)
+    ci_cache->x[i] = fake_pix;
  
 #if defined(WITH_VECTORIZATION) && defined(__ICC)
 #pragma simd
@@ -356,6 +359,10 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     cj_cache->vy[i] = cj->parts[idx].v[1];
     cj_cache->vz[i] = cj->parts[idx].v[2];
   }
+
+  float fake_pjx = 2.0f * cj_cache->x[last_pj_align];
+  for(int i=last_pj_align + 1; i<last_pj_align + 1 + VEC_SIZE; i++)
+    cj_cache->x[i] = fake_pjx;
 }
 
 /* @brief Clean the memory allocated by a #cache object.
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index ace89c2759..b2e90a652b 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -1137,16 +1137,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     if (rem != 0) {
       int pad = VEC_SIZE - rem;
 
-      if (exit_iteration_align + pad <= last_pj_align + 1) {
-        exit_iteration_align += pad;
-      }
-      //else {
-      //  exit_iteration_align += pad;
-      //  for(int i=last_pj_align + 1; i<exit_iteration_align; i++) {
-      //    cj_cache->x[i] = pix.f[0] + 2.0f * hi * kernel_gamma;
-      //  }
-
-      //}
+      if (exit_iteration_align + pad <= last_pj_align + 1) exit_iteration_align += pad;
+   
     }
 
     vector pjx, pjy, pjz;
@@ -1272,14 +1264,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     if (rem != 0) {
       int pad = VEC_SIZE - rem;
 
-      if (exit_iteration_align - pad >= first_pi_align) {
-        exit_iteration_align -= pad;
-      }
-      //else {
-      //  for(int i=count_i - first_pi_align; i<count_i - first_pi_align + pad; i++) {
-      //      ci_cache->x[i] = pjx.f[0] + 2.0f * hj * kernel_gamma;
-      //  }
-      //}
+      if (exit_iteration_align - pad >= first_pi_align) exit_iteration_align -= pad;     
     }
 
     vector pix, piy, piz;
-- 
GitLab