Commit ab51e592 authored by James Willis's avatar James Willis
Browse files

Fix the exit iteration padding.

parent 214e577e
...@@ -1255,20 +1255,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1255,20 +1255,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
vector v_curlvySum = vector_setzero(); vector v_curlvySum = vector_setzero();
vector v_curlvzSum = vector_setzero(); vector v_curlvzSum = vector_setzero();
/* Pad the exit iteration if there is a serial remainder. */
int exit_iteration_align = exit_iteration;
const int rem = exit_iteration % VEC_SIZE;
if (rem != 0) {
const int pad = VEC_SIZE - rem;
if (exit_iteration_align + pad <= last_pj + 1)
exit_iteration_align += pad;
}
/* Loop over the parts in cj. Making sure to perform an iteration of the /* Loop over the parts in cj. Making sure to perform an iteration of the
* loop even if exit_iteration_align is zero and there is only one * loop even if exit_iteration_align is zero and there is only one
* particle to interact with.*/ * particle to interact with.*/
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd; const int cj_cache_idx = pjd;
...@@ -1376,7 +1366,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1376,7 +1366,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
int exit_iteration_align = exit_iteration - first_pi; int exit_iteration_align = exit_iteration - first_pi;
/* Pad the exit iteration align so cache reads are aligned. */ /* Pad the exit iteration align so cache reads are aligned. */
const int rem = exit_iteration_align % VEC_SIZE; const int rem = (ci_cache_count - exit_iteration_align) % VEC_SIZE;
if (exit_iteration_align < VEC_SIZE) { if (exit_iteration_align < VEC_SIZE) {
exit_iteration_align = 0; exit_iteration_align = 0;
} else } else
...@@ -1614,20 +1604,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1614,20 +1604,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
vector v_sigSum = vector_set1(pi->force.v_sig); vector v_sigSum = vector_set1(pi->force.v_sig);
vector v_entropy_dtSum = vector_setzero(); vector v_entropy_dtSum = vector_setzero();
/* Pad the exit iteration if there is a serial remainder. */
int exit_iteration_align = exit_iteration;
const int rem = exit_iteration % VEC_SIZE;
if (rem != 0) {
int pad = VEC_SIZE - rem;
if (exit_iteration_align + pad <= last_pj + 1)
exit_iteration_align += pad;
}
/* Loop over the parts in cj. Making sure to perform an iteration of the /* Loop over the parts in cj. Making sure to perform an iteration of the
* loop even if exit_iteration_align is zero and there is only one * loop even if exit_iteration_align is zero and there is only one
* particle to interact with.*/ * particle to interact with.*/
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd; const int cj_cache_idx = pjd;
...@@ -1749,7 +1729,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1749,7 +1729,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
int exit_iteration_align = exit_iteration - first_pi; int exit_iteration_align = exit_iteration - first_pi;
/* Pad the exit iteration align so cache reads are aligned. */ /* Pad the exit iteration align so cache reads are aligned. */
const int rem = exit_iteration_align % VEC_SIZE; const int rem = (ci_cache_count - exit_iteration_align) % VEC_SIZE;
if (exit_iteration_align < VEC_SIZE) { if (exit_iteration_align < VEC_SIZE) {
exit_iteration_align = 0; exit_iteration_align = 0;
} else } else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment