diff --git a/src/cache.h b/src/cache.h index 8f4cc3d98708b73cc9fe86a5139b11e584a822f4..7f5624a076583b95ac6d18d56cdf71928a4abccc 100644 --- a/src/cache.h +++ b/src/cache.h @@ -340,6 +340,9 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ci_cache->vy[ci_cache_idx] = ci->parts[idx].v[1]; ci_cache->vz[ci_cache_idx] = ci->parts[idx].v[2]; } + float fake_pix = 2.0f * ci_cache->x[ci->count - 1]; + for(int i=ci->count - first_pi_align; i<ci->count - first_pi_align + VEC_SIZE; i++) + ci_cache->x[i] = fake_pix; #if defined(WITH_VECTORIZATION) && defined(__ICC) #pragma simd @@ -356,6 +359,10 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( cj_cache->vy[i] = cj->parts[idx].v[1]; cj_cache->vz[i] = cj->parts[idx].v[2]; } + + float fake_pjx = 2.0f * cj_cache->x[last_pj_align]; + for(int i=last_pj_align + 1; i<last_pj_align + 1 + VEC_SIZE; i++) + cj_cache->x[i] = fake_pjx; } /* @brief Clean the memory allocated by a #cache object. diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index ace89c2759010f7376873152edd2e8ad29967d10..b2e90a652b940e483e13cc0fe511415f055833ff 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -1137,16 +1137,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * if (rem != 0) { int pad = VEC_SIZE - rem; - if (exit_iteration_align + pad <= last_pj_align + 1) { - exit_iteration_align += pad; - } - //else { - // exit_iteration_align += pad; - // for(int i=last_pj_align + 1; i<exit_iteration_align; i++) { - // cj_cache->x[i] = pix.f[0] + 2.0f * hi * kernel_gamma; - // } - - //} + if (exit_iteration_align + pad <= last_pj_align + 1) exit_iteration_align += pad; + } vector pjx, pjy, pjz; @@ -1272,14 +1264,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * if (rem != 0) { int pad = VEC_SIZE - rem; - if (exit_iteration_align - pad >= first_pi_align) { - exit_iteration_align -= pad; - } - //else { - // for(int i=count_i - first_pi_align; i<count_i - first_pi_align + pad; i++) { - // ci_cache->x[i] = pjx.f[0] + 2.0f * hj * kernel_gamma; - // } - //} + if (exit_iteration_align - pad >= first_pi_align) exit_iteration_align -= pad; } vector pix, piy, piz;