Skip to content
Snippets Groups Projects
Commit ab51e592 authored by James Willis's avatar James Willis
Browse files

Fix the exit iteration padding.

parent 214e577e
No related branches found
No related tags found
1 merge request!445Doself subset vec
...@@ -1255,20 +1255,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1255,20 +1255,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
vector v_curlvySum = vector_setzero(); vector v_curlvySum = vector_setzero();
vector v_curlvzSum = vector_setzero(); vector v_curlvzSum = vector_setzero();
/* Pad the exit iteration if there is a serial remainder. */
int exit_iteration_align = exit_iteration;
const int rem = exit_iteration % VEC_SIZE;
if (rem != 0) {
const int pad = VEC_SIZE - rem;
if (exit_iteration_align + pad <= last_pj + 1)
exit_iteration_align += pad;
}
/* Loop over the parts in cj. Making sure to perform an iteration of the /* Loop over the parts in cj. Making sure to perform an iteration of the
* loop even if exit_iteration_align is zero and there is only one * loop even if exit_iteration_align is zero and there is only one
* particle to interact with.*/ * particle to interact with.*/
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd; const int cj_cache_idx = pjd;
...@@ -1376,7 +1366,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1376,7 +1366,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
int exit_iteration_align = exit_iteration - first_pi; int exit_iteration_align = exit_iteration - first_pi;
/* Pad the exit iteration align so cache reads are aligned. */ /* Pad the exit iteration align so cache reads are aligned. */
const int rem = exit_iteration_align % VEC_SIZE; const int rem = (ci_cache_count - exit_iteration_align) % VEC_SIZE;
if (exit_iteration_align < VEC_SIZE) { if (exit_iteration_align < VEC_SIZE) {
exit_iteration_align = 0; exit_iteration_align = 0;
} else } else
...@@ -1614,20 +1604,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1614,20 +1604,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
vector v_sigSum = vector_set1(pi->force.v_sig); vector v_sigSum = vector_set1(pi->force.v_sig);
vector v_entropy_dtSum = vector_setzero(); vector v_entropy_dtSum = vector_setzero();
/* Pad the exit iteration if there is a serial remainder. */
int exit_iteration_align = exit_iteration;
const int rem = exit_iteration % VEC_SIZE;
if (rem != 0) {
int pad = VEC_SIZE - rem;
if (exit_iteration_align + pad <= last_pj + 1)
exit_iteration_align += pad;
}
/* Loop over the parts in cj. Making sure to perform an iteration of the /* Loop over the parts in cj. Making sure to perform an iteration of the
* loop even if exit_iteration_align is zero and there is only one * loop even if exit_iteration_align is zero and there is only one
* particle to interact with.*/ * particle to interact with.*/
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd; const int cj_cache_idx = pjd;
...@@ -1749,7 +1729,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1749,7 +1729,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
int exit_iteration_align = exit_iteration - first_pi; int exit_iteration_align = exit_iteration - first_pi;
/* Pad the exit iteration align so cache reads are aligned. */ /* Pad the exit iteration align so cache reads are aligned. */
const int rem = exit_iteration_align % VEC_SIZE; const int rem = (ci_cache_count - exit_iteration_align) % VEC_SIZE;
if (exit_iteration_align < VEC_SIZE) { if (exit_iteration_align < VEC_SIZE) {
exit_iteration_align = 0; exit_iteration_align = 0;
} else } else
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment