From c9b11f99b3ffc5bf7fed556e1a4419085c0784b6 Mon Sep 17 00:00:00 2001
From: James Willis <james.s.willis@durham.ac.uk>
Date: Mon, 30 Oct 2017 11:26:26 +0000
Subject: [PATCH] Revert "Fix the exit iteration padding."

This reverts commit ab51e5921894dd90ea94d9b389c0200d355adee9.
---
 src/runner_doiact_vec.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 787ad5c21c..7f2ca89d53 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -1255,10 +1255,20 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
       vector v_curlvySum = vector_setzero();
       vector v_curlvzSum = vector_setzero();
 
+      /* Pad the exit iteration if there is a serial remainder. */
+      int exit_iteration_align = exit_iteration;
+      const int rem = exit_iteration % VEC_SIZE;
+      if (rem != 0) {
+        const int pad = VEC_SIZE - rem;
+
+        if (exit_iteration_align + pad <= last_pj + 1)
+          exit_iteration_align += pad;
+      }
+
       /* Loop over the parts in cj. Making sure to perform an iteration of the
        * loop even if exit_iteration_align is zero and there is only one
        * particle to interact with.*/
-      for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
+      for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) {
 
         /* Get the cache index to the jth particle. */
         const int cj_cache_idx = pjd;
@@ -1366,7 +1376,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
       int exit_iteration_align = exit_iteration - first_pi;
 
       /* Pad the exit iteration align so cache reads are aligned. */
-      const int rem = (ci_cache_count - exit_iteration_align) % VEC_SIZE;
+      const int rem = exit_iteration_align % VEC_SIZE;
       if (exit_iteration_align < VEC_SIZE) {
         exit_iteration_align = 0;
       } else
@@ -1604,10 +1614,20 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
       vector v_sigSum = vector_set1(pi->force.v_sig);
       vector v_entropy_dtSum = vector_setzero();
 
+      /* Pad the exit iteration if there is a serial remainder. */
+      int exit_iteration_align = exit_iteration;
+      const int rem = exit_iteration % VEC_SIZE;
+      if (rem != 0) {
+        int pad = VEC_SIZE - rem;
+
+        if (exit_iteration_align + pad <= last_pj + 1)
+          exit_iteration_align += pad;
+      }
+
       /* Loop over the parts in cj. Making sure to perform an iteration of the
        * loop even if exit_iteration_align is zero and there is only one
        * particle to interact with.*/
-      for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
+      for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) {
 
         /* Get the cache index to the jth particle. */
         const int cj_cache_idx = pjd;
@@ -1729,7 +1749,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
       int exit_iteration_align = exit_iteration - first_pi;
 
       /* Pad the exit iteration align so cache reads are aligned. */
-      const int rem = (ci_cache_count - exit_iteration_align) % VEC_SIZE;
+      const int rem = exit_iteration_align % VEC_SIZE;
       if (exit_iteration_align < VEC_SIZE) {
         exit_iteration_align = 0;
       } else
-- 
GitLab