From 2aee3db73cec01cbaa793b6b8b810de39c33e7c5 Mon Sep 17 00:00:00 2001 From: James Willis <james.s.willis@durham.ac.uk> Date: Wed, 15 Mar 2017 11:42:42 +0000 Subject: [PATCH] Pad cache for self-interactions when reading the cache instead of on the fly. --- src/cache.h | 11 +++++++++++ src/runner_doiact_vec.c | 7 ------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/cache.h b/src/cache.h index facbb1c4e7..db85216b7d 100644 --- a/src/cache.h +++ b/src/cache.h @@ -157,6 +157,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles( /* Shift the particles positions to a local frame so single precision can be * used instead of double precision. */ +#if defined(WITH_VECTORIZATION) && defined(__ICC) +#pragma simd +#endif for (int i = 0; i < ci->count; i++) { ci_cache->x[i] = ci->parts[i].x[0] - ci->loc[0]; ci_cache->y[i] = ci->parts[i].x[1] - ci->loc[1]; @@ -169,6 +172,11 @@ __attribute__((always_inline)) INLINE void cache_read_particles( ci_cache->vz[i] = ci->parts[i].v[2]; } + /* Pad cache with fake particles that exist outside the cell so will not interact.*/ + float fake_pix = 2.0f * ci->width[0] * ci->parts[ci->count - 1].x[0]; + for (int i = ci->count; i < ci->count + (2 * VEC_SIZE); i++) + ci_cache->x[i] = fake_pix; + #endif } @@ -364,6 +372,8 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ci_cache->vy[ci_cache_idx] = ci->parts[idx].v[1]; ci_cache->vz[ci_cache_idx] = ci->parts[idx].v[2]; } + + /* Pad cache with fake particles that exist outside the cell so will not interact.*/ float fake_pix = 2.0f * ci_cache->x[ci->count - 1]; for (int i = ci->count - first_pi_align; i < ci->count - first_pi_align + VEC_SIZE; i++) @@ -385,6 +395,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( cj_cache->vz[i] = cj->parts[idx].v[2]; } + /* Pad cache with fake particles that exist outside the cell so will not interact.*/ float fake_pjx = 2.0f * cj_cache->x[last_pj_align]; for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) cj_cache->x[i] = fake_pjx; diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index df2a1074f8..c29de502c4 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -445,13 +445,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( int pad = (num_vec_proc * VEC_SIZE) - rem; count_align += pad; - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = count; i < count_align; i++) { - cell_cache->x[i] = pix.f[0]; - cell_cache->y[i] = piy.f[0]; - cell_cache->z[i] = piz.f[0]; - } } vector pjx, pjy, pjz; -- GitLab