Commit 2aee3db7 authored by James Willis's avatar James Willis
Browse files

Pad cache for self-interactions when reading the cache instead of on the fly.

parent 7a77cfa4
......@@ -157,6 +157,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma simd
#endif
for (int i = 0; i < ci->count; i++) {
ci_cache->x[i] = ci->parts[i].x[0] - ci->loc[0];
ci_cache->y[i] = ci->parts[i].x[1] - ci->loc[1];
......@@ -169,6 +172,11 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
ci_cache->vz[i] = ci->parts[i].v[2];
}
/* Pad cache with fake particles that exist outside the cell so will not interact.*/
float fake_pix = 2.0f * ci->width[0] * ci->parts[ci->count - 1].x[0];
for (int i = ci->count; i < ci->count + (2 * VEC_SIZE); i++)
ci_cache->x[i] = fake_pix;
#endif
}
......@@ -364,6 +372,8 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
ci_cache->vy[ci_cache_idx] = ci->parts[idx].v[1];
ci_cache->vz[ci_cache_idx] = ci->parts[idx].v[2];
}
/* Pad cache with fake particles that exist outside the cell so will not interact.*/
float fake_pix = 2.0f * ci_cache->x[ci->count - 1];
for (int i = ci->count - first_pi_align;
i < ci->count - first_pi_align + VEC_SIZE; i++)
......@@ -385,6 +395,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
cj_cache->vz[i] = cj->parts[idx].v[2];
}
/* Pad cache with fake particles that exist outside the cell so will not interact.*/
float fake_pjx = 2.0f * cj_cache->x[last_pj_align];
for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++)
cj_cache->x[i] = fake_pjx;
......
......@@ -445,13 +445,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
int pad = (num_vec_proc * VEC_SIZE) - rem;
count_align += pad;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for (int i = count; i < count_align; i++) {
cell_cache->x[i] = pix.f[0];
cell_cache->y[i] = piy.f[0];
cell_cache->z[i] = piz.f[0];
}
}
vector pjx, pjy, pjz;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment