Commit 4cb04d51 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Split the P2P and M2P into two separate (vectorizable) loops in the leaf-leaf...

Split the P2P and M2P into two separate (vectorizable) loops in the leaf-leaf gravity P-P interactions.
parent c08fce03
......@@ -59,6 +59,12 @@ struct gravity_cache {
/*! #gpart z acceleration. */
float *restrict a_z SWIFT_CACHE_ALIGN;
/*! Is this #gpart active ? */
int *restrict active SWIFT_CACHE_ALIGN;
/*! Can this #gpart use a M2P interaction ? */
int *restrict use_mpole SWIFT_CACHE_ALIGN;
/*! Cache size */
int count;
};
......@@ -79,6 +85,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) {
free(c->a_x);
free(c->a_y);
free(c->a_z);
free(c->active);
free(c->use_mpole);
}
c->count = 0;
}
......@@ -97,24 +105,26 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
/* Size of the gravity cache */
const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE;
const size_t sizeBytes = padded_count * sizeof(float);
const size_t sizeBytesF = padded_count * sizeof(float);
const size_t sizeBytesI = padded_count * sizeof(int);
/* Delete old stuff if any */
gravity_cache_clean(c);
int error = 0;
error += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error +=
posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytes);
if (error != 0)
error("Couldn't allocate gravity cache, size: %d", padded_count);
int e = 0;
e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
e +=
posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count);
c->count = padded_count;
}
......@@ -129,9 +139,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
* multiple of the vector length.
* @param shift A shift to apply to all the particles.
*/
__attribute__((always_inline)) INLINE void gravity_cache_populate(
struct gravity_cache *c, const struct gpart *restrict gparts, int gcount,
int gcount_padded, const double shift[3]) {
__attribute__((always_inline)) INLINE static void gravity_cache_populate(
timebin_t max_active_bin, struct gravity_cache *c,
const struct gpart *restrict gparts, int gcount, int gcount_padded,
const double shift[3], const float CoM[3], float r_max2,
float theta_crit2) {
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
......@@ -139,6 +151,9 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
......@@ -148,6 +163,14 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
z[i] = (float)(gparts[i].x[2] - shift[2]);
epsilon[i] = gparts[i].epsilon;
m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
/* Check whether we can use the multipole instead of P-P */
const float dx = x[i] - CoM[0];
const float dy = y[i] - CoM[1];
const float dz = z[i] - CoM[2];
const float r2 = dx * dx + dy * dy + dz * dz;
use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2);
}
#ifdef SWIFT_DEBUG_CHECKS
......@@ -161,21 +184,26 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
z[i] = 0.f;
epsilon[i] = 0.f;
m[i] = 0.f;
active[i] = 0;
use_mpole[i] = 0;
}
}
/**
* @brief Fills a #gravity_cache structure with some #gpart.
* @brief Fills a #gravity_cache structure with some #gpart and shift them.
*
* @param c The #gravity_cache to fill.
* @param gparts The #gpart array to read from.
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param shift A shift to apply to all the particles.
*/
__attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
struct gravity_cache *c, const struct gpart *restrict gparts, int gcount,
int gcount_padded) {
__attribute__((always_inline)) INLINE static void
gravity_cache_populate_no_mpole(timebin_t max_active_bin,
struct gravity_cache *c,
const struct gpart *restrict gparts, int gcount,
int gcount_padded, const double shift[3]) {
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
......@@ -183,15 +211,17 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
for (int i = 0; i < gcount; ++i) {
x[i] = (float)(gparts[i].x[0]);
y[i] = (float)(gparts[i].x[1]);
z[i] = (float)(gparts[i].x[2]);
x[i] = (float)(gparts[i].x[0] - shift[0]);
y[i] = (float)(gparts[i].x[1] - shift[1]);
z[i] = (float)(gparts[i].x[2] - shift[2]);
epsilon[i] = gparts[i].epsilon;
m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
}
#ifdef SWIFT_DEBUG_CHECKS
......@@ -205,6 +235,7 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
z[i] = 0.f;
epsilon[i] = 0.f;
m[i] = 0.f;
active[i] = 0;
}
}
......@@ -219,19 +250,19 @@ __attribute__((always_inline)) INLINE void gravity_cache_write_back(
const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) {
/* Make the compiler understand we are in happy vectorization land */
float *restrict a_x = c->a_x;
float *restrict a_y = c->a_y;
float *restrict a_z = c->a_z;
swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
/* Write stuff back to the particles */
for (int i = 0; i < gcount; ++i) {
if (active[i]) {
gparts[i].a_grav[0] += a_x[i];
gparts[i].a_grav[1] += a_y[i];
gparts[i].a_grav[2] += a_z[i];
}
}
}
#endif /* SWIFT_GRAVITY_CACHE_H */
......@@ -2411,4 +2411,26 @@ __attribute__((always_inline)) INLINE static int gravity_multipole_accept(
return (r2 * theta_crit2 > size2);
}
/**
* @brief Checks whether a particle-cell interaction can be appromixated by a
* M2P
* interaction using the distance and cell radius.
*
* We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
* Issue 1, pp.27-42, equation 10.
*
* @param r_max2 The square of the size of the multipole.
* @param theta_crit2 The square of the critical opening angle.
* @param r2 Square of the distance (periodically wrapped) between the
* multipoles.
*/
__attribute__((always_inline)) INLINE static int gravity_M2P_accept(
float r_max2, float theta_crit2, float r2) {
// MATTHIEU: Make this mass-dependent ?
/* Multipole acceptance criterion (Dehnen 2002, eq.10) */
return (r2 * theta_crit2 > r_max2);
}
#endif /* SWIFT_MULTIPOLE_H */
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment