diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 259c5179f7179987fb3e4a071143d47556361f3a..1b10be283963b1a437926f08e0804435a6d9e18c 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -59,6 +59,12 @@ struct gravity_cache { /*! #gpart z acceleration. */ float *restrict a_z SWIFT_CACHE_ALIGN; + /*! Is this #gpart active ? */ + int *restrict active SWIFT_CACHE_ALIGN; + + /*! Can this #gpart use a M2P interaction ? */ + int *restrict use_mpole SWIFT_CACHE_ALIGN; + /*! Cache size */ int count; }; @@ -79,6 +85,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) { free(c->a_x); free(c->a_y); free(c->a_z); + free(c->active); + free(c->use_mpole); } c->count = 0; } @@ -97,24 +105,26 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { /* Size of the gravity cache */ const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE; - const size_t sizeBytes = padded_count * sizeof(float); + const size_t sizeBytesF = padded_count * sizeof(float); + const size_t sizeBytesI = padded_count * sizeof(int); /* Delete old stuff if any */ gravity_cache_clean(c); - int error = 0; - error += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += - posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytes); - - if (error != 0) - error("Couldn't allocate gravity cache, size: %d", padded_count); + int e = 0; + e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI); + e += + posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI); + + if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count); c->count = padded_count; } @@ -129,9 +139,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { * multiple of the vector length. * @param shift A shift to apply to all the particles. */ -__attribute__((always_inline)) INLINE void gravity_cache_populate( - struct gravity_cache *c, const struct gpart *restrict gparts, int gcount, - int gcount_padded, const double shift[3]) { +__attribute__((always_inline)) INLINE static void gravity_cache_populate( + timebin_t max_active_bin, struct gravity_cache *c, + const struct gpart *restrict gparts, int gcount, int gcount_padded, + const double shift[3], const float CoM[3], float r_max2, + float theta_crit2) { /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); @@ -139,6 +151,9 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, c->use_mpole, + SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ @@ -148,6 +163,14 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( z[i] = (float)(gparts[i].x[2] - shift[2]); epsilon[i] = gparts[i].epsilon; m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); + + /* Check whether we can use the multipole instead of P-P */ + const float dx = x[i] - CoM[0]; + const float dy = y[i] - CoM[1]; + const float dz = z[i] - CoM[2]; + const float r2 = dx * dx + dy * dy + dz * dz; + use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2); } #ifdef SWIFT_DEBUG_CHECKS @@ -161,21 +184,26 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( z[i] = 0.f; epsilon[i] = 0.f; m[i] = 0.f; + active[i] = 0; + use_mpole[i] = 0; } } /** - * @brief Fills a #gravity_cache structure with some #gpart. + * @brief Fills a #gravity_cache structure with some #gpart and shift them. * * @param c The #gravity_cache to fill. * @param gparts The #gpart array to read from. * @param gcount The number of particles to read. * @param gcount_padded The number of particle to read padded to the next * multiple of the vector length. + * @param shift A shift to apply to all the particles. */ -__attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( - struct gravity_cache *c, const struct gpart *restrict gparts, int gcount, - int gcount_padded) { +__attribute__((always_inline)) INLINE static void +gravity_cache_populate_no_mpole(timebin_t max_active_bin, + struct gravity_cache *c, + const struct gpart *restrict gparts, int gcount, + int gcount_padded, const double shift[3]) { /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); @@ -183,15 +211,17 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ for (int i = 0; i < gcount; ++i) { - x[i] = (float)(gparts[i].x[0]); - y[i] = (float)(gparts[i].x[1]); - z[i] = (float)(gparts[i].x[2]); + x[i] = (float)(gparts[i].x[0] - shift[0]); + y[i] = (float)(gparts[i].x[1] - shift[1]); + z[i] = (float)(gparts[i].x[2] - shift[2]); epsilon[i] = gparts[i].epsilon; m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); } #ifdef SWIFT_DEBUG_CHECKS @@ -205,6 +235,7 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( z[i] = 0.f; epsilon[i] = 0.f; m[i] = 0.f; + active[i] = 0; } } @@ -219,18 +250,18 @@ __attribute__((always_inline)) INLINE void gravity_cache_write_back( const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) { /* Make the compiler understand we are in happy vectorization land */ - float *restrict a_x = c->a_x; - float *restrict a_y = c->a_y; - float *restrict a_z = c->a_z; - swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); /* Write stuff back to the particles */ for (int i = 0; i < gcount; ++i) { - gparts[i].a_grav[0] += a_x[i]; - gparts[i].a_grav[1] += a_y[i]; - gparts[i].a_grav[2] += a_z[i]; + if (active[i]) { + gparts[i].a_grav[0] += a_x[i]; + gparts[i].a_grav[1] += a_y[i]; + gparts[i].a_grav[2] += a_z[i]; + } } } diff --git a/src/multipole.h b/src/multipole.h index 2e3bab65c314cdcf94e6eabdac097ca113150e3a..cc67504e97841c01187e8ba17f588ebac007f3a8 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2411,4 +2411,26 @@ __attribute__((always_inline)) INLINE static int gravity_multipole_accept( return (r2 * theta_crit2 > size2); } +/** + * @brief Checks whether a particle-cell interaction can be appromixated by a + * M2P + * interaction using the distance and cell radius. + * + * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179, + * Issue 1, pp.27-42, equation 10. + * + * @param r_max2 The square of the size of the multipole. + * @param theta_crit2 The square of the critical opening angle. + * @param r2 Square of the distance (periodically wrapped) between the + * multipoles. + */ +__attribute__((always_inline)) INLINE static int gravity_M2P_accept( + float r_max2, float theta_crit2, float r2) { + + // MATTHIEU: Make this mass-dependent ? + + /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ + return (r2 * theta_crit2 > r_max2); +} + #endif /* SWIFT_MULTIPOLE_H */ diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index b1d7107bc46f76d5a96f7dc7b30f93e6f5f20dbc..8334e5b3c71063543c70537b9188ca78e48e470a 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -155,345 +155,313 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, TIMER_TOC(timer_dopair_grav_mm); } -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell using the full Newtonian potential - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - * @param shift The distance vector (periodically wrapped) between the cell - * centres. - */ -void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci, - struct cell *cj, double shift[3]) { - - /* Some constants */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - struct gravity_cache *const cj_cache = &r->cj_gravity_cache; - const struct engine *const e = r->e; - const struct gravity_props *props = e->gravity_properties; - const float theta_crit2 = props->theta_crit2; - - /* Cell properties */ - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; - struct gpart *restrict gparts_i = ci->gparts; - struct gpart *restrict gparts_j = cj->gparts; - const int ci_active = cell_is_active(ci, e); - const int cj_active = cell_is_active(cj, e); - const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; - const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; - const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]), - 0.5 * (loc_i[1] + loc_j[1]), - 0.5 * (loc_i[2] + loc_j[2])}; - - /* Anything to do here ?*/ - if (!ci_active && !cj_active) return; +static INLINE void runner_dopair_grav_pp_full(const struct engine *e, + struct gravity_cache *ci_cache, + struct gravity_cache *cj_cache, + int gcount_i, int gcount_j, + int gcount_padded_j, + struct gpart *restrict gparts_i, + struct gpart *restrict gparts_j) { - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) - error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, - gcount_j); + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { - /* Computed the padded counts */ - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; - /* Fill the caches */ - gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, - loc_mean); - gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j, - loc_mean); + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; - /* Recover the multipole info and shift the CoM locations */ - const float rmax_i = ci->multipole->r_max; - const float rmax_j = cj->multipole->r_max; - const struct multipole *multi_i = &ci->multipole->m_pole; - const struct multipole *multi_j = &cj->multipole->m_pole; - const float CoM_i[3] = {ci->multipole->CoM[0] - loc_mean[0], - ci->multipole->CoM[1] - loc_mean[1], - ci->multipole->CoM[2] - loc_mean[2]}; - const float CoM_j[3] = {cj->multipole->CoM[0] - loc_mean[0], - cj->multipole->CoM[1] - loc_mean[1], - cj->multipole->CoM[2] - loc_mean[2]}; +#ifdef SWIFT_DEBUG_CHECKS + if (!gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); +#endif - /* Ok... Here we go ! */ + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; - if (ci_active) { + /* Some powers of the softening length */ + const float h_i = ci_cache->epsilon[pid]; + const float h2_i = h_i * h_i; + const float h_inv_i = 1.f / h_i; + const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { + /* Local accumulators for the acceleration */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f; - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_i[pid], e)) continue; + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - /* Some powers of the softening length */ - const float h_i = ci_cache->epsilon[pid]; - const float h2_i = h_i * h_i; - const float h_inv_i = 1.f / h_i; - const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; + /* Get info about j */ + const float x_j = cj_cache->x[pjd]; + const float y_j = cj_cache->y[pjd]; + const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; - /* Distance to the multipole in cj */ - const float dx = x_i - CoM_j[0]; - const float dy = y_i - CoM_j[1]; - const float dz = z_i - CoM_j[2]; + /* Compute the pairwise (square) distance. */ + const float dx = x_i - x_j; + const float dy = y_i - y_j; + const float dz = z_i - z_j; const float r2 = dx * dx + dy * dy + dz * dz; - /* Can we use the multipole in cj ? */ - if (gravity_multipole_accept(0., rmax_j, theta_crit2, r2)) { +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f) error("Interacting particles with 0 distance"); - /* Interact! */ - float f_x, f_y, f_z; - runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, - &f_z); + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact! */ + float f_ij; + runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); - /* Store it back */ - ci_cache->a_x[pid] = f_x; - ci_cache->a_y[pid] = f_y; - ci_cache->a_z[pid] = f_z; + /* Store it back */ + a_x -= f_ij * dx; + a_y -= f_ij * dy; + a_z -= f_ij * dz; #ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j) gparts_i[pid].num_interacted++; #endif - /* Done with this particle */ - continue; - } - - /* Ok, as of here we have no choice but directly interact everything */ + } - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + /* Store everything back in cache */ + ci_cache->a_x[pid] = a_x; + ci_cache->a_y[pid] = a_y; + ci_cache->a_z[pid] = a_z; + } +} - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); +static INLINE void runner_dopair_grav_pp_truncated( + const struct engine *e, const float rlr_inv, struct gravity_cache *ci_cache, + struct gravity_cache *cj_cache, int gcount_i, int gcount_j, + int gcount_padded_j, struct gpart *restrict gparts_i, + struct gpart *restrict gparts_j) { - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; - /* Compute the pairwise (square) distance. */ - const float dx = x_i - x_j; - const float dy = y_i - y_j; - const float dz = z_i - z_j; - const float r2 = dx * dx + dy * dy + dz * dz; + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); + if (!gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); #endif - /* Interact! */ - float f_ij; - runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); - - /* Store it back */ - a_x -= f_ij * dx; - a_y -= f_ij * dy; - a_z -= f_ij * dz; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j) gparts_i[pid].num_interacted++; -#endif - } + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; - /* Store everything back in cache */ - ci_cache->a_x[pid] = a_x; - ci_cache->a_y[pid] = a_y; - ci_cache->a_z[pid] = a_z; - } - } + /* Some powers of the softening length */ + const float h_i = ci_cache->epsilon[pid]; + const float h2_i = h_i * h_i; + const float h_inv_i = 1.f / h_i; + const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Now do the opposite loop */ - if (cj_active) { + /* Local accumulators for the acceleration */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f; - /* Loop over all particles in cj... */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_j[pjd], e)) continue; + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + /* Get info about j */ const float x_j = cj_cache->x[pjd]; const float y_j = cj_cache->y[pjd]; const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; - /* Some powers of the softening length */ - const float h_j = cj_cache->epsilon[pjd]; - const float h2_j = h_j * h_j; - const float h_inv_j = 1.f / h_j; - const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j; - - /* Distance to the multipole in ci */ - const float dx = x_j - CoM_i[0]; - const float dy = y_j - CoM_i[1]; - const float dz = z_j - CoM_i[2]; + /* Compute the pairwise (square) distance. */ + const float dx = x_i - x_j; + const float dy = y_i - y_j; + const float dz = z_i - z_j; const float r2 = dx * dx + dy * dy + dz * dz; - /* Can we use the multipole in cj ? */ - if (gravity_multipole_accept(0., rmax_i, theta_crit2, r2)) { +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f) error("Interacting particles with 0 distance"); - /* Interact! */ - float f_x, f_y, f_z; - runner_iact_grav_pm(dx, dy, dz, r2, h_j, h_inv_j, multi_i, &f_x, &f_y, - &f_z); + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact! */ + float f_ij; + runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j, + rlr_inv, &f_ij); - /* Store it back */ - cj_cache->a_x[pjd] = f_x; - cj_cache->a_y[pjd] = f_y; - cj_cache->a_z[pjd] = f_z; + /* Store it back */ + a_x -= f_ij * dx; + a_y -= f_ij * dy; + a_z -= f_ij * dz; #ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - gparts_j[pjd].num_interacted += ci->multipole->m_pole.num_gpart; + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j) gparts_i[pid].num_interacted++; #endif - /* Done with this particle */ - continue; - } - - /* Ok, as of here we have no choice but directly interact everything */ + } - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + /* Store everything back in cache */ + ci_cache->a_x[pid] = a_x; + ci_cache->a_y[pid] = a_y; + ci_cache->a_z[pid] = a_z; + } +} - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); +static INLINE void runner_dopair_grav_pm( + const struct engine *restrict e, struct gravity_cache *ci_cache, + int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + struct cell *restrict cj) { + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->active, SWIFT_CACHE_ALIGNMENT); + swift_align_information(ci_cache->use_mpole, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_padded_i; pid++) { + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_padded_i; pid++) { - /* Get info about j */ - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float mass_i = ci_cache->m[pid]; + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; - /* Compute the pairwise (square) distance. */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - const float r2 = dx * dx + dy * dy + dz * dz; + /* Skip particle that cannot use the multipole */ + if (!ci_cache->use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); - - /* Check that particles have been drifted to the current time */ - if (gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); - if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); #endif - /* Interact! */ - float f_ji; - runner_iact_grav_pp_full(r2, h2_j, h_inv_j, h_inv3_j, mass_i, &f_ji); + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; - /* Store it back */ - a_x -= f_ji * dx; - a_y -= f_ji * dy; - a_z -= f_ji * dz; + /* Some powers of the softening length */ + const float h_i = ci_cache->epsilon[pid]; + const float h_inv_i = 1.f / h_i; + + /* Distance to the Multipole */ + const float dx = x_i - CoM_j[0]; + const float dy = y_i - CoM_j[1]; + const float dz = z_i - CoM_j[2]; + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Interact! */ + float f_x, f_y, f_z; + runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, + &f_z); + + /* Store it back */ + ci_cache->a_x[pid] = f_x; + ci_cache->a_y[pid] = f_y; + ci_cache->a_z[pid] = f_z; #ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pid < gcount_i) gparts_j[pjd].num_interacted++; + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; #endif - } - - /* Store everything back in cache */ - cj_cache->a_x[pjd] = a_x; - cj_cache->a_y[pjd] = a_y; - cj_cache->a_z[pjd] = a_z; - } } - - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i); - if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j); } /** * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell using the truncated Newtonian potential + * particles of another cell (switching function between full and truncated). * * @param r The #runner. * @param ci The first #cell. * @param cj The other #cell. - * @param shift The distance vector (periodically wrapped) between the cell - * centres. */ -void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci, - struct cell *cj, double shift[3]) { +void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { - /* Some constants */ - const struct engine *const e = r->e; + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + const int ci_active = cell_is_active(ci, e); + const int cj_active = cell_is_active(cj, e); + if (!ci_active && !cj_active) return; + + /* Check that we are not doing something stupid */ + if (ci->split || cj->split) error("Running P-P on splitable cells"); + + /* Let's start by drifting things */ + if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); + if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); + + /* Recover some useful constants */ const struct space *s = e->s; + const int periodic = s->periodic; const double cell_width = s->width[0]; - const double a_smooth = e->gravity_properties->a_smooth; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; const float theta_crit2 = e->gravity_properties->theta_crit2; + const double a_smooth = e->gravity_properties->a_smooth; + const double r_cut_min = e->gravity_properties->r_cut_min; const double rlr = cell_width * a_smooth; + const double min_trunc = rlr * r_cut_min; const float rlr_inv = 1. / rlr; /* Caches to play with */ struct gravity_cache *const ci_cache = &r->ci_gravity_cache; struct gravity_cache *const cj_cache = &r->cj_gravity_cache; - /* Cell properties */ + /* Centre of the cell pais */ + const double loc_mean[3] = {0.5 * (ci->loc[0] + cj->loc[0]), + 0.5 * (ci->loc[1] + cj->loc[1]), + 0.5 * (ci->loc[2] + cj->loc[2])}; + // MATTHIEU deal with periodicity + + /* Star by constructing particle caches */ + + /* Computed the padded counts */ const int gcount_i = ci->gcount; const int gcount_j = cj->gcount; - struct gpart *restrict gparts_i = ci->gparts; - struct gpart *restrict gparts_j = cj->gparts; - const int ci_active = cell_is_active(ci, e); - const int cj_active = cell_is_active(cj, e); - const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; - const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; - const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]), - 0.5 * (loc_i[1] + loc_j[1]), - 0.5 * (loc_i[2] + loc_j[2])}; - - /* Anything to do here ?*/ - if (!ci_active && !cj_active) return; + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; /* Check that we fit in cache */ if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, gcount_j); - /* Computed the padded counts */ - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; - - /* Fill the caches */ - gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, - loc_mean); - gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j, - loc_mean); - /* Recover the multipole info and shift the CoM locations */ const float rmax_i = ci->multipole->r_max; const float rmax_j = cj->multipole->r_max; + const float rmax2_i = rmax_i * rmax_i; + const float rmax2_j = rmax_j * rmax_j; const struct multipole *multi_i = &ci->multipole->m_pole; const struct multipole *multi_j = &cj->multipole->m_pole; const float CoM_i[3] = {ci->multipole->CoM[0] - loc_mean[0], @@ -502,264 +470,48 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci, const float CoM_j[3] = {cj->multipole->CoM[0] - loc_mean[0], cj->multipole->CoM[1] - loc_mean[1], cj->multipole->CoM[2] - loc_mean[2]}; + // MATTHIEU deal with periodicity - /* Ok... Here we go ! */ - - if (ci_active) { - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { - - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_i[pid], e)) continue; - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - - /* Some powers of the softening length */ - const float h_i = ci_cache->epsilon[pid]; - const float h2_i = h_i * h_i; - const float h_inv_i = 1.f / h_i; - const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - - /* Distance to the multipole in cj */ - const float dx = x_i - CoM_j[0]; - const float dy = y_i - CoM_j[1]; - const float dz = z_i - CoM_j[2]; - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Can we use the multipole in cj ? */ - if (gravity_multipole_accept(0., rmax_j, theta_crit2, r2)) { - - /* Interact! */ - float f_x, f_y, f_z; - runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, - &f_z); - - /* Store it back */ - ci_cache->a_x[pid] = f_x; - ci_cache->a_y[pid] = f_y; - ci_cache->a_z[pid] = f_z; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; -#endif - /* Done with this particle */ - continue; - } - - /* Ok, as of here we have no choice but directly interact everything */ - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; - - /* Compute the pairwise (square) distance. */ - const float dx = x_i - x_j; - const float dy = y_i - y_j; - const float dz = z_i - z_j; - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); + /* Fill the caches */ + gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i, + gcount_padded_i, loc_mean, CoM_j, rmax2_j, + theta_crit2); + gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j, + gcount_padded_j, loc_mean, CoM_i, rmax2_i, + theta_crit2); - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); -#endif + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { - /* Interact! */ - float f_ij; - runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j, - rlr_inv, &f_ij); + /* Not periodic -> Can always use Newtonian potential */ - /* Store it back */ - a_x -= f_ij * dx; - a_y -= f_ij * dy; - a_z -= f_ij * dz; + /* Let's updated the active cell(s) only */ + if (ci_active) { -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j) gparts_i[pid].num_interacted++; -#endif - } + /* First the P2P */ + runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, ci->gparts, cj->gparts); - /* Store everything back in cache */ - ci_cache->a_x[pid] = a_x; - ci_cache->a_y[pid] = a_y; - ci_cache->a_z[pid] = a_z; + /* Then the M2P */ + runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, ci->gparts, + CoM_j, multi_j, cj); } - } - - /* Now do the opposite loop */ - if (cj_active) { - - /* Loop over all particles in cj... */ - for (int pjd = 0; pjd < gcount_j; pjd++) { - - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_j[pjd], e)) continue; - - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - - /* Some powers of the softening length */ - const float h_j = cj_cache->epsilon[pjd]; - const float h2_j = h_j * h_j; - const float h_inv_j = 1.f / h_j; - const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j; - - /* Distance to the multipole in ci */ - const float dx = x_j - CoM_i[0]; - const float dy = y_j - CoM_i[1]; - const float dz = z_j - CoM_i[2]; - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Can we use the multipole in cj ? */ - if (gravity_multipole_accept(0., rmax_i, theta_crit2, r2)) { - - /* Interact! */ - float f_x, f_y, f_z; - runner_iact_grav_pm(dx, dy, dz, r2, h_j, h_inv_j, multi_i, &f_x, &f_y, - &f_z); - - /* Store it back */ - cj_cache->a_x[pjd] = f_x; - cj_cache->a_y[pjd] = f_y; - cj_cache->a_z[pjd] = f_z; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - gparts_j[pjd].num_interacted += ci->multipole->m_pole.num_gpart; -#endif - /* Done with this particle */ - continue; - } - - /* Ok, as of here we have no choice but directly interact everything */ - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Get info about j */ - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float mass_i = ci_cache->m[pid]; - - /* Compute the pairwise (square) distance. */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); - - /* Check that particles have been drifted to the current time */ - if (gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); - if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); -#endif - - /* Interact! */ - float f_ji; - runner_iact_grav_pp_truncated(r2, h2_j, h_inv_j, h_inv3_j, mass_i, - rlr_inv, &f_ji); - - /* Store it back */ - a_x -= f_ji * dx; - a_y -= f_ji * dy; - a_z -= f_ji * dz; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pid < gcount_i) gparts_j[pjd].num_interacted++; -#endif - } - - /* Store everything back in cache */ - cj_cache->a_x[pjd] = a_x; - cj_cache->a_y[pjd] = a_y; - cj_cache->a_z[pjd] = a_z; + if (cj_active) { + + /* First the P2P */ + runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, cj->gparts, ci->gparts); + /* Then the M2P */ + runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, cj->gparts, + CoM_i, multi_i, ci); } - } - - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i); - if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j); -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell (switching function between full and truncated). - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - */ -void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { - - /* Some properties of the space */ - const struct engine *e = r->e; - const struct space *s = e->s; - const int periodic = s->periodic; - const double cell_width = s->width[0]; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const double a_smooth = e->gravity_properties->a_smooth; - const double r_cut_min = e->gravity_properties->r_cut_min; - const double min_trunc = cell_width * r_cut_min * a_smooth; - double shift[3] = {0.0, 0.0, 0.0}; - TIMER_TIC; + } else { /* Periodic BC */ - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - /* Check that we are not doing something stupid */ - if (ci->split || cj->split) error("Running P-P on splitable cells"); - - /* Let's start by drifting things */ - if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); - if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - runner_dopair_grav_pp_full(r, ci, cj, shift); - } else { + // MATTHIEU deal with periodicity /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; shift[0] = nearest(cj->loc[0] - ci->loc[0], dim[0]); shift[1] = nearest(cj->loc[1] - ci->loc[1], dim[1]); shift[2] = nearest(cj->loc[2] - ci->loc[2], dim[2]); @@ -767,15 +519,40 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { shift[0] * shift[0] + shift[1] * shift[1] + shift[2] * shift[2]; /* Get the maximal distance between any two particles */ - const double max_r = sqrt(r2) + ci->multipole->r_max + cj->multipole->r_max; + const double max_r = sqrt(r2) + rmax_i + rmax_j; /* Do we need to use the truncated interactions ? */ - if (max_r > min_trunc) - runner_dopair_grav_pp_truncated(r, ci, cj, shift); - else - runner_dopair_grav_pp_full(r, ci, cj, shift); + if (max_r > min_trunc) { + + /* Periodic but far-away cells must use the truncated potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) + runner_dopair_grav_pp_truncated(e, rlr_inv, ci_cache, cj_cache, + gcount_i, gcount_j, gcount_padded_j, + ci->gparts, cj->gparts); + if (cj_active) + runner_dopair_grav_pp_truncated(e, rlr_inv, cj_cache, ci_cache, + gcount_j, gcount_i, gcount_padded_i, + cj->gparts, ci->gparts); + } else { + + /* Periodic but close-by cells can use the full Newtonian potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) + runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, ci->gparts, cj->gparts); + if (cj_active) + runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, cj->gparts, ci->gparts); + } } + /* Write back to the particles */ + if (ci_active) gravity_cache_write_back(ci_cache, ci->gparts, gcount_i); + if (cj_active) gravity_cache_write_back(cj_cache, cj->gparts, gcount_j); + TIMER_TOC(timer_dopair_grav_pp); } @@ -812,7 +589,8 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { /* Computed the padded counts */ const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; - gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc); + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount, + gcount_padded, loc); /* Ok... Here we go ! */ @@ -820,7 +598,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { for (int pid = 0; pid < gcount; pid++) { /* Skip inactive particles */ - if (!gpart_is_active(&gparts[pid], e)) continue; + if (!ci_cache->active[pid]) continue; const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; @@ -935,7 +713,8 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { /* Computed the padded counts */ const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; - gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc); + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount, + gcount_padded, loc); /* Ok... Here we go ! */ @@ -943,7 +722,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { for (int pid = 0; pid < gcount; pid++) { /* Skip inactive particles */ - if (!gpart_is_active(&gparts[pid], e)) continue; + if (!ci_cache->active[pid]) continue; const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid];