Commit 4252fa32 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

When padding the gravity cache, use -2 * cell->width for the particle position...

When padding the gravity cache, use -2 * cell->width for the particle position to make sure this is an invalid (but reasonable) position.
parent ae03b870
...@@ -145,12 +145,13 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { ...@@ -145,12 +145,13 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
* @param CoM The position of the multipole. * @param CoM The position of the multipole.
* @param r_max2 The square of the multipole radius. * @param r_max2 The square of the multipole radius.
* @param theta_crit2 The square of the opening angle. * @param theta_crit2 The square of the opening angle.
* @param cell The cell we play with (to get reasonable padding positions).
*/ */
__attribute__((always_inline)) INLINE static void gravity_cache_populate( __attribute__((always_inline)) INLINE static void gravity_cache_populate(
timebin_t max_active_bin, struct gravity_cache *c, timebin_t max_active_bin, struct gravity_cache *c,
const struct gpart *restrict gparts, int gcount, int gcount_padded, const struct gpart *restrict gparts, int gcount, int gcount_padded,
const double shift[3], const float CoM[3], float r_max2, const double shift[3], const float CoM[3], float r_max2, float theta_crit2,
float theta_crit2) { const struct cell *cell) {
/* Make the compiler understand we are in happy vectorization land */ /* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
...@@ -184,11 +185,16 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( ...@@ -184,11 +185,16 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate(
if (gcount_padded < gcount) error("Padded counter smaller than counter"); if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif #endif
/* Particles used for padding should get impossible positions
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1],
-2. * cell->width[2]};
/* Pad the caches */ /* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) { for (int i = gcount; i < gcount_padded; ++i) {
x[i] = 0.f; x[i] = pos_padded[0];
y[i] = 0.f; y[i] = pos_padded[1];
z[i] = 0.f; z[i] = pos_padded[2];
epsilon[i] = 0.f; epsilon[i] = 0.f;
m[i] = 0.f; m[i] = 0.f;
active[i] = 0; active[i] = 0;
...@@ -206,12 +212,14 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( ...@@ -206,12 +212,14 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate(
* @param gcount_padded The number of particle to read padded to the next * @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length. * multiple of the vector length.
* @param shift A shift to apply to all the particles. * @param shift A shift to apply to all the particles.
* @param cell The cell we play with (to get reasonable padding positions).
*/ */
__attribute__((always_inline)) INLINE static void __attribute__((always_inline)) INLINE static void
gravity_cache_populate_no_mpole(timebin_t max_active_bin, gravity_cache_populate_no_mpole(timebin_t max_active_bin,
struct gravity_cache *c, struct gravity_cache *c,
const struct gpart *restrict gparts, int gcount, const struct gpart *restrict gparts, int gcount,
int gcount_padded, const double shift[3]) { int gcount_padded, const double shift[3],
const struct cell *cell) {
/* Make the compiler understand we are in happy vectorization land */ /* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
...@@ -236,11 +244,15 @@ gravity_cache_populate_no_mpole(timebin_t max_active_bin, ...@@ -236,11 +244,15 @@ gravity_cache_populate_no_mpole(timebin_t max_active_bin,
if (gcount_padded < gcount) error("Padded counter smaller than counter"); if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif #endif
/* Particles used for padding should get impossible positions
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1],
-2. * cell->width[2]};
/* Pad the caches */ /* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) { for (int i = gcount; i < gcount_padded; ++i) {
x[i] = 0.f; x[i] = pos_padded[0];
y[i] = 0.f; y[i] = pos_padded[1];
z[i] = 0.f; z[i] = pos_padded[2];
epsilon[i] = 0.f; epsilon[i] = 0.f;
m[i] = 0.f; m[i] = 0.f;
active[i] = 0; active[i] = 0;
......
...@@ -505,9 +505,11 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { ...@@ -505,9 +505,11 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
/* Fill the caches */ /* Fill the caches */
gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i, gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i,
gcount_padded_i, shift_i, CoM_j, rmax2_j, theta_crit2); gcount_padded_i, shift_i, CoM_j, rmax2_j, theta_crit2,
ci);
gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j, gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j,
gcount_padded_j, shift_j, CoM_i, rmax2_i, theta_crit2); gcount_padded_j, shift_j, CoM_i, rmax2_i, theta_crit2,
cj);
/* Can we use the Newtonian version or do we need the truncated one ? */ /* Can we use the Newtonian version or do we need the truncated one ? */
if (!periodic) { if (!periodic) {
...@@ -645,7 +647,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { ...@@ -645,7 +647,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount, gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
gcount_padded, loc); gcount_padded, loc, c);
/* Ok... Here we go ! */ /* Ok... Here we go ! */
...@@ -771,7 +773,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { ...@@ -771,7 +773,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount, gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
gcount_padded, loc); gcount_padded, loc, c);
/* Ok... Here we go ! */ /* Ok... Here we go ! */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment