Commit f159a52f authored by James Willis's avatar James Willis
Browse files

Perform padding of the cache when populating it instead of on the fly. Create...

Perform padding of the cache when populating it instead of on the fly. Create fake particles to be maxed out in the inner interaction loop.
parent 3025b752
......@@ -340,6 +340,9 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
ci_cache->vy[ci_cache_idx] = ci->parts[idx].v[1];
ci_cache->vz[ci_cache_idx] = ci->parts[idx].v[2];
}
float fake_pix = 2.0f * ci_cache->x[ci->count - 1];
for(int i=ci->count - first_pi_align; i<ci->count - first_pi_align + VEC_SIZE; i++)
ci_cache->x[i] = fake_pix;
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma simd
......@@ -356,6 +359,10 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
cj_cache->vy[i] = cj->parts[idx].v[1];
cj_cache->vz[i] = cj->parts[idx].v[2];
}
float fake_pjx = 2.0f * cj_cache->x[last_pj_align];
for(int i=last_pj_align + 1; i<last_pj_align + 1 + VEC_SIZE; i++)
cj_cache->x[i] = fake_pjx;
}
/* @brief Clean the memory allocated by a #cache object.
......
......@@ -1137,16 +1137,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
if (rem != 0) {
int pad = VEC_SIZE - rem;
if (exit_iteration_align + pad <= last_pj_align + 1) {
exit_iteration_align += pad;
}
//else {
// exit_iteration_align += pad;
// for(int i=last_pj_align + 1; i<exit_iteration_align; i++) {
// cj_cache->x[i] = pix.f[0] + 2.0f * hi * kernel_gamma;
// }
//}
if (exit_iteration_align + pad <= last_pj_align + 1) exit_iteration_align += pad;
}
vector pjx, pjy, pjz;
......@@ -1272,14 +1264,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
if (rem != 0) {
int pad = VEC_SIZE - rem;
if (exit_iteration_align - pad >= first_pi_align) {
exit_iteration_align -= pad;
}
//else {
// for(int i=count_i - first_pi_align; i<count_i - first_pi_align + pad; i++) {
// ci_cache->x[i] = pjx.f[0] + 2.0f * hj * kernel_gamma;
// }
//}
if (exit_iteration_align - pad >= first_pi_align) exit_iteration_align -= pad;
}
vector pix, piy, piz;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment