Commit 4cb04d51 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Split the P2P and M2P into two separate (vectorizable) loops in the leaf-leaf...

Split the P2P and M2P into two separate (vectorizable) loops in the leaf-leaf gravity P-P interactions.
parent c08fce03
...@@ -59,6 +59,12 @@ struct gravity_cache { ...@@ -59,6 +59,12 @@ struct gravity_cache {
/*! #gpart z acceleration. */ /*! #gpart z acceleration. */
float *restrict a_z SWIFT_CACHE_ALIGN; float *restrict a_z SWIFT_CACHE_ALIGN;
/*! Is this #gpart active ? */
int *restrict active SWIFT_CACHE_ALIGN;
/*! Can this #gpart use a M2P interaction ? */
int *restrict use_mpole SWIFT_CACHE_ALIGN;
/*! Cache size */ /*! Cache size */
int count; int count;
}; };
...@@ -79,6 +85,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) { ...@@ -79,6 +85,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) {
free(c->a_x); free(c->a_x);
free(c->a_y); free(c->a_y);
free(c->a_z); free(c->a_z);
free(c->active);
free(c->use_mpole);
} }
c->count = 0; c->count = 0;
} }
...@@ -97,24 +105,26 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { ...@@ -97,24 +105,26 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
/* Size of the gravity cache */ /* Size of the gravity cache */
const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE; const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE;
const size_t sizeBytes = padded_count * sizeof(float); const size_t sizeBytesF = padded_count * sizeof(float);
const size_t sizeBytesI = padded_count * sizeof(int);
/* Delete old stuff if any */ /* Delete old stuff if any */
gravity_cache_clean(c); gravity_cache_clean(c);
int error = 0; int e = 0;
error += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
error += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytes); e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
e +=
if (error != 0) posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
error("Couldn't allocate gravity cache, size: %d", padded_count);
if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count);
c->count = padded_count; c->count = padded_count;
} }
...@@ -129,9 +139,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { ...@@ -129,9 +139,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
* multiple of the vector length. * multiple of the vector length.
* @param shift A shift to apply to all the particles. * @param shift A shift to apply to all the particles.
*/ */
__attribute__((always_inline)) INLINE void gravity_cache_populate( __attribute__((always_inline)) INLINE static void gravity_cache_populate(
struct gravity_cache *c, const struct gpart *restrict gparts, int gcount, timebin_t max_active_bin, struct gravity_cache *c,
int gcount_padded, const double shift[3]) { const struct gpart *restrict gparts, int gcount, int gcount_padded,
const double shift[3], const float CoM[3], float r_max2,
float theta_crit2) {
/* Make the compiler understand we are in happy vectorization land */ /* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
...@@ -139,6 +151,9 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( ...@@ -139,6 +151,9 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE); swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */ /* Fill the input caches */
...@@ -148,6 +163,14 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( ...@@ -148,6 +163,14 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
z[i] = (float)(gparts[i].x[2] - shift[2]); z[i] = (float)(gparts[i].x[2] - shift[2]);
epsilon[i] = gparts[i].epsilon; epsilon[i] = gparts[i].epsilon;
m[i] = gparts[i].mass; m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
/* Check whether we can use the multipole instead of P-P */
const float dx = x[i] - CoM[0];
const float dy = y[i] - CoM[1];
const float dz = z[i] - CoM[2];
const float r2 = dx * dx + dy * dy + dz * dz;
use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2);
} }
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
...@@ -161,21 +184,26 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( ...@@ -161,21 +184,26 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
z[i] = 0.f; z[i] = 0.f;
epsilon[i] = 0.f; epsilon[i] = 0.f;
m[i] = 0.f; m[i] = 0.f;
active[i] = 0;
use_mpole[i] = 0;
} }
} }
/** /**
* @brief Fills a #gravity_cache structure with some #gpart. * @brief Fills a #gravity_cache structure with some #gpart and shift them.
* *
* @param c The #gravity_cache to fill. * @param c The #gravity_cache to fill.
* @param gparts The #gpart array to read from. * @param gparts The #gpart array to read from.
* @param gcount The number of particles to read. * @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next * @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length. * multiple of the vector length.
* @param shift A shift to apply to all the particles.
*/ */
__attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( __attribute__((always_inline)) INLINE static void
struct gravity_cache *c, const struct gpart *restrict gparts, int gcount, gravity_cache_populate_no_mpole(timebin_t max_active_bin,
int gcount_padded) { struct gravity_cache *c,
const struct gpart *restrict gparts, int gcount,
int gcount_padded, const double shift[3]) {
/* Make the compiler understand we are in happy vectorization land */ /* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
...@@ -183,15 +211,17 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( ...@@ -183,15 +211,17 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE); swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */ /* Fill the input caches */
for (int i = 0; i < gcount; ++i) { for (int i = 0; i < gcount; ++i) {
x[i] = (float)(gparts[i].x[0]); x[i] = (float)(gparts[i].x[0] - shift[0]);
y[i] = (float)(gparts[i].x[1]); y[i] = (float)(gparts[i].x[1] - shift[1]);
z[i] = (float)(gparts[i].x[2]); z[i] = (float)(gparts[i].x[2] - shift[2]);
epsilon[i] = gparts[i].epsilon; epsilon[i] = gparts[i].epsilon;
m[i] = gparts[i].mass; m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
} }
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
...@@ -205,6 +235,7 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( ...@@ -205,6 +235,7 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
z[i] = 0.f; z[i] = 0.f;
epsilon[i] = 0.f; epsilon[i] = 0.f;
m[i] = 0.f; m[i] = 0.f;
active[i] = 0;
} }
} }
...@@ -219,18 +250,18 @@ __attribute__((always_inline)) INLINE void gravity_cache_write_back( ...@@ -219,18 +250,18 @@ __attribute__((always_inline)) INLINE void gravity_cache_write_back(
const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) { const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) {
/* Make the compiler understand we are in happy vectorization land */ /* Make the compiler understand we are in happy vectorization land */
float *restrict a_x = c->a_x; swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
float *restrict a_y = c->a_y; swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
float *restrict a_z = c->a_z; swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT);
/* Write stuff back to the particles */ /* Write stuff back to the particles */
for (int i = 0; i < gcount; ++i) { for (int i = 0; i < gcount; ++i) {
gparts[i].a_grav[0] += a_x[i]; if (active[i]) {
gparts[i].a_grav[1] += a_y[i]; gparts[i].a_grav[0] += a_x[i];
gparts[i].a_grav[2] += a_z[i]; gparts[i].a_grav[1] += a_y[i];
gparts[i].a_grav[2] += a_z[i];
}
} }
} }
......
...@@ -2411,4 +2411,26 @@ __attribute__((always_inline)) INLINE static int gravity_multipole_accept( ...@@ -2411,4 +2411,26 @@ __attribute__((always_inline)) INLINE static int gravity_multipole_accept(
return (r2 * theta_crit2 > size2); return (r2 * theta_crit2 > size2);
} }
/**
* @brief Checks whether a particle-cell interaction can be appromixated by a
* M2P
* interaction using the distance and cell radius.
*
* We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
* Issue 1, pp.27-42, equation 10.
*
* @param r_max2 The square of the size of the multipole.
* @param theta_crit2 The square of the critical opening angle.
* @param r2 Square of the distance (periodically wrapped) between the
* multipoles.
*/
__attribute__((always_inline)) INLINE static int gravity_M2P_accept(
float r_max2, float theta_crit2, float r2) {
// MATTHIEU: Make this mass-dependent ?
/* Multipole acceptance criterion (Dehnen 2002, eq.10) */
return (r2 * theta_crit2 > r_max2);
}
#endif /* SWIFT_MULTIPOLE_H */ #endif /* SWIFT_MULTIPOLE_H */
...@@ -155,345 +155,313 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, ...@@ -155,345 +155,313 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
TIMER_TOC(timer_dopair_grav_mm); TIMER_TOC(timer_dopair_grav_mm);
} }
/** static INLINE void runner_dopair_grav_pp_full(const struct engine *e,
* @brief Computes the interaction of all the particles in a cell with all the struct gravity_cache *ci_cache,
* particles of another cell using the full Newtonian potential struct gravity_cache *cj_cache,
* int gcount_i, int gcount_j,
* @param r The #runner. int gcount_padded_j,
* @param ci The first #cell. struct gpart *restrict gparts_i,
* @param cj The other #cell. struct gpart *restrict gparts_j) {
* @param shift The distance vector (periodically wrapped) between the cell
* centres.
*/
void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
struct cell *cj, double shift[3]) {
/* Some constants */
struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
const struct engine *const e = r->e;
const struct gravity_props *props = e->gravity_properties;
const float theta_crit2 = props->theta_crit2;
/* Cell properties */
const int gcount_i = ci->gcount;
const int gcount_j = cj->gcount;
struct gpart *restrict gparts_i = ci->gparts;
struct gpart *restrict gparts_j = cj->gparts;
const int ci_active = cell_is_active(ci, e);
const int cj_active = cell_is_active(cj, e);
const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]),
0.5 * (loc_i[1] + loc_j[1]),
0.5 * (loc_i[2] + loc_j[2])};
/* Anything to do here ?*/
if (!ci_active && !cj_active) return;
/* Check that we fit in cache */ /* Loop over all particles in ci... */
if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) for (int pid = 0; pid < gcount_i; pid++) {
error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
gcount_j);
/* Computed the padded counts */ /* Skip inactive particles */
const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; if (!ci_cache->active[pid]) continue;
const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
/* Fill the caches */ /* Skip particle that can use the multipole */
gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, if (ci_cache->use_mpole[pid]) continue;
loc_mean);
gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j,
loc_mean);
/* Recover the multipole info and shift the CoM locations */ #ifdef SWIFT_DEBUG_CHECKS
const float rmax_i = ci->multipole->r_max; if (!gpart_is_active(&gparts_i[pid], e))
const float rmax_j = cj->multipole->r_max; error("Active particle went through the cache");
const struct multipole *multi_i = &ci->multipole->m_pole; #endif
const struct multipole *multi_j = &cj->multipole->m_pole;
const float CoM_i[3] = {ci->multipole->CoM[0] - loc_mean[0],
ci->multipole->CoM[1] - loc_mean[1],
ci->multipole->CoM[2] - loc_mean[2]};
const float CoM_j[3] = {cj->multipole->CoM[0] - loc_mean[0],
cj->multipole->CoM[1] - loc_mean[1],
cj->multipole->CoM[2] - loc_mean[2]};
/* Ok... Here we go ! */ const float x_i = ci_cache->x[pid];
const float y_i = ci_cache->y[pid];
const float z_i = ci_cache->z[pid];
if (ci_active) { /* Some powers of the softening length */
const float h_i = ci_cache->epsilon[pid];
const float h2_i = h_i * h_i;
const float h_inv_i = 1.f / h_i;
const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
/* Loop over all particles in ci... */ /* Local accumulators for the acceleration */
for (int pid = 0; pid < gcount_i; pid++) { float a_x = 0.f, a_y = 0.f, a_z = 0.f;
/* Skip inactive particles */ /* Make the compiler understand we are in happy vectorization land */
if (!gpart_is_active(&gparts_i[pid], e)) continue; swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded_j, VEC_SIZE);
const float x_i = ci_cache->x[pid]; /* Loop over every particle in the other cell. */
const float y_i = ci_cache->y[pid]; for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
const float z_i = ci_cache->z[pid];
/* Some powers of the softening length */ /* Get info about j */
const float h_i = ci_cache->epsilon[pid]; const float x_j = cj_cache->x[pjd];
const float h2_i = h_i * h_i; const float y_j = cj_cache->y[pjd];
const float h_inv_i = 1.f / h_i; const float z_j = cj_cache->z[pjd];
const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; const float mass_j = cj_cache->m[pjd];
/* Distance to the multipole in cj */ /* Compute the pairwise (square) distance. */
const float dx = x_i - CoM_j[0]; const float dx = x_i - x_j;
const float dy = y_i - CoM_j[1]; const float dy = y_i - y_j;
const float dz = z_i - CoM_j[2]; const float dz = z_i - z_j;
const float r2 = dx * dx + dy * dy + dz * dz; const float r2 = dx * dx + dy * dy + dz * dz;
/* Can we use the multipole in cj ? */ #ifdef SWIFT_DEBUG_CHECKS
if (gravity_multipole_accept(0., rmax_j, theta_crit2, r2)) { if (r2 == 0.f) error("Interacting particles with 0 distance");
/* Interact! */ /* Check that particles have been drifted to the current time */
float f_x, f_y, f_z; if (gparts_i[pid].ti_drift != e->ti_current)
runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, error("gpi not drifted to current time");
&f_z); if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
error("gpj not drifted to current time");
#endif
/* Interact! */
float f_ij;
runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij);
/* Store it back */ /* Store it back */
ci_cache->a_x[pid] = f_x; a_x -= f_ij * dx;
ci_cache->a_y[pid] = f_y; a_y -= f_ij * dy;
ci_cache->a_z[pid] = f_z; a_z -= f_ij * dz;
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter */ /* Update the interaction counter if it's not a padded gpart */
gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; if (pjd < gcount_j) gparts_i[pid].num_interacted++;
#endif #endif
/* Done with this particle */ }
continue;
}
/* Ok, as of here we have no choice but directly interact everything */
/* Local accumulators for the acceleration */ /* Store everything back in cache */
float a_x = 0.f, a_y = 0.f, a_z = 0.f; ci_cache->a_x[pid] = a_x;
ci_cache->a_y[pid] = a_y;
ci_cache->a_z[pid] = a_z;
}
}
/* Make the compiler understand we are in happy vectorization land */ static INLINE void runner_dopair_grav_pp_truncated(
swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); const struct engine *e, const float rlr_inv, struct gravity_cache *ci_cache,
swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); struct gravity_cache *cj_cache, int gcount_i, int gcount_j,
swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); int gcount_padded_j, struct gpart *restrict gparts_i,
swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); struct gpart *restrict gparts_j) {
swift_assume_size(gcount_padded_j, VEC_SIZE);
/* Loop over every particle in the other cell. */ /* Loop over all particles in ci... */
for (int pjd = 0; pjd < gcount_padded_j; pjd++) { for (int pid = 0; pid < gcount_i; pid++) {
/* Get info about j */ /* Skip inactive particles */
const float x_j = cj_cache->x[pjd]; if (!ci_cache->active[pid]) continue;
const float y_j = cj_cache->y[pjd];
const float z_j = cj_cache->z[pjd];
const float mass_j = cj_cache->m[pjd];
/* Compute the pairwise (square) distance. */ /* Skip particle that can use the multipole */
const float dx = x_i - x_j; if (ci_cache->use_mpole[pid]) continue;
const float dy = y_i - y_j;
const float dz = z_i - z_j;
const float r2 = dx * dx + dy * dy + dz * dz;
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
if (r2 == 0.f) error("Interacting particles with 0 distance"); if (!gpart_is_active(&gparts_i[pid], e))
error("Active particle went through the cache");
/* Check that particles have been drifted to the current time */
if (gparts_i[pid].ti_drift != e->ti_current)
error("gpi not drifted to current time");
if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
error("gpj not drifted to current time");
#endif #endif
/* Interact! */ const float x_i = ci_cache->x[pid];
float f_ij; const float y_i = ci_cache->y[pid];
runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); const float z_i = ci_cache->z[pid];
/* Store it back */
a_x -= f_ij * dx;
a_y -= f_ij * dy;
a_z -= f_ij * dz;
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter if it's not a padded gpart */
if (pjd < gcount_j) gparts_i[pid].num_interacted++;
#endif
}
/* Store everything back in cache */ /* Some powers of the softening length */
ci_cache->a_x[pid] = a_x; const float h_i = ci_cache->epsilon[pid];
ci_cache->a_y[pid] = a_y; const float h2_i = h_i * h_i;
ci_cache->a_z[pid] = a_z; const float h_inv_i = 1.f / h_i;
} const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
}
/* Now do the opposite loop */ /* Local accumulators for the acceleration */
if (cj_active) { float a_x = 0.f, a_y = 0.f, a_z = 0.f;
/* Loop over all particles in cj... */ /* Make the compiler understand we are in happy vectorization land */
for (int pjd = 0; pjd < gcount_j; pjd++) { swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded_j, VEC_SIZE);
/* Skip inactive particles */ /* Loop over every particle in the other cell. */
if (!gpart_is_active(&gparts_j[pjd], e)) continue; for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
/* Get info about j */
const float x_j = cj_cache->x[pjd]; const float x_j = cj_cache->x[pjd];
const float y_j = cj_cache->y[pjd]; const float y_j = cj_cache->y[pjd];
const float z_j = cj_cache->z[pjd]; const float z_j = cj_cache->z[pjd];
const float mass_j = cj_cache->m[pjd];
/* Some powers of the softening length */ /* Compute the pairwise (square) distance. */
const float h_j = cj_cache->epsilon[pjd]; const float dx = x_i - x_j;
const float h2_j = h_j * h_j; const float dy = y_i - y_j;
const float h_inv_j = 1.f / h_j; const float dz = z_i - z_j;
const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;
/* Distance to the multipole in ci */
const float dx = x_j - CoM_i[0];
const float dy = y_j - CoM_i[1];