diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index 4499ae335848b1b4b3f0cae7daead414a2e771ee..167b4fe3deb0569aeae42d2376aa6beff41dd312 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -214,23 +214,17 @@ runner_iact_grav_pp_truncated_debug(float r2, float h2, float h_inv, * @param f_z (return) The z-component of the acceleration. * @param pot (return) The potential. */ -__attribute__((always_inline)) INLINE static void runner_iact_grav_pm( +__attribute__((always_inline)) INLINE static void runner_iact_grav_pm_full( float r_x, float r_y, float r_z, float r2, float h, float h_inv, const struct multipole *m, float *f_x, float *f_y, float *f_z, float *pot) { - *f_x = 0.f; - *f_y = 0.f; - *f_z = 0.f; - *pot = 0.f; - return; - #if SELF_GRAVITY_MULTIPOLE_ORDER < 3 float f_ij; runner_iact_grav_pp_full(r2, h * h, h_inv, h_inv * h_inv * h_inv, m->M_000, &f_ij, pot); - *f_x = -f_ij * r_x; - *f_y = -f_ij * r_y; - *f_z = -f_ij * r_z; + *f_x = f_ij * r_x; + *f_y = f_ij * r_y; + *f_z = f_ij * r_z; #else /* Get the inverse distance */ @@ -259,4 +253,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pm( #endif } +__attribute__((always_inline)) INLINE static void runner_iact_grav_pm_truncated( + float r_x, float r_y, float r_z, float r2, float h, float h_inv, + float rlr_inv, const struct multipole *m, float *f_x, float *f_y, + float *f_z, float *pot) { + + float f_ij; + runner_iact_grav_pp_truncated(r2, h * h, h_inv, h_inv * h_inv * h_inv, + m->M_000, rlr_inv, &f_ij, pot); + *f_x = f_ij * r_x; + *f_y = f_ij * r_y; + *f_z = f_ij * r_z; +} + #endif /* SWIFT_DEFAULT_GRAVITY_IACT_H */ diff --git a/src/gravity_cache.h b/src/gravity_cache.h index b8d0b74d7623cc84c1933cfd4030a75e45b66998..3ac0c85948c747f7e47724190515721f9142da39 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -105,7 +105,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) { * @param count The number of #gpart to allocated for (space_splitsize is a good * choice). */ -static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { +static INLINE void gravity_cache_init(struct gravity_cache *c, + const int count) { /* Size of the gravity cache */ const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE; @@ -153,10 +154,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { * @param grav_props The global gravity properties. */ __attribute__((always_inline)) INLINE static void gravity_cache_populate( - timebin_t max_active_bin, struct gravity_cache *c, - const struct gpart *restrict gparts, int gcount, int gcount_padded, - const double shift[3], const float CoM[3], float r_max2, - const struct cell *cell, const struct gravity_props *grav_props) { + const timebin_t max_active_bin, struct gravity_cache *c, + const struct gpart *restrict gparts, const int gcount, + const int gcount_padded, const double shift[3], const float CoM[3], + const float r_max2, const struct cell *cell, + const struct gravity_props *grav_props) { const float theta_crit2 = grav_props->theta_crit2; @@ -187,7 +189,7 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( const float dy = y[i] - CoM[1]; const float dz = z[i] - CoM[2]; const float r2 = dx * dx + dy * dy + dz * dz; - use_mpole[i] = 0 * gravity_M2P_accept(r_max2, theta_crit2, r2); + use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2); } #ifdef SWIFT_DEBUG_CHECKS @@ -227,11 +229,11 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate( * @param grav_props The global gravity properties. */ __attribute__((always_inline)) INLINE static void -gravity_cache_populate_no_mpole(timebin_t max_active_bin, +gravity_cache_populate_no_mpole(const timebin_t max_active_bin, struct gravity_cache *c, - const struct gpart *restrict gparts, int gcount, - int gcount_padded, const double shift[3], - const struct cell *cell, + const struct gpart *restrict gparts, + const int gcount, const int gcount_padded, + const double shift[3], const struct cell *cell, const struct gravity_props *grav_props) { /* Make the compiler understand we are in happy vectorization land */ diff --git a/src/multipole.h b/src/multipole.h index 26857c3b877ffec1c3070e707d938d682f5c0069..472516f6932cc9023b0e48cefb4066de6f419e94 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2429,7 +2429,8 @@ __attribute__((always_inline)) INLINE static int gravity_M2P_accept( // MATTHIEU: Make this mass-dependent ? /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ - return (r2 * theta_crit2 > r_max2); + return (r2 * theta_crit2 * 0.0001 > r_max2); + // return 0; } #endif /* SWIFT_MULTIPOLE_H */ diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index fe573bcad375c86a302a9327f63b6935b4cc025c..84cff52cefd297a12063ffcc1f2cf3dfd86f2588 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -24,6 +24,7 @@ #include "active.h" #include "cell.h" #include "gravity.h" +#include "gravity_cache.h" #include "inline.h" #include "part.h" #include "space_getsid.h" @@ -440,12 +441,14 @@ static INLINE void runner_dopair_grav_pp_truncated( TIMER_TOC(timer_dopair_grav_pp); } -static INLINE void runner_dopair_grav_pm( +static INLINE void runner_dopair_grav_pm_full( const struct engine *restrict e, struct gravity_cache *ci_cache, int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i, const float CoM_j[3], const struct multipole *restrict multi_j, struct cell *restrict cj) { + const float dim[3] = {e->s->dim[0], e->s->dim[1], e->s->dim[2]}; + TIMER_TIC; /* Make the compiler understand we are in happy vectorization land */ @@ -478,7 +481,86 @@ static INLINE void runner_dopair_grav_pm( error("Active particle went through the cache"); #endif - error("aa"); + const float x_i = x[pid]; + const float y_i = y[pid]; + const float z_i = z[pid]; + + /* Some powers of the softening length */ + const float h_i = epsilon[pid]; + const float h_inv_i = 1.f / h_i; + + /* Distance to the Multipole */ + float dx = CoM_j[0] - x_i; + float dy = CoM_j[1] - y_i; + float dz = CoM_j[2] - z_i; + + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, + &f_z, &pot_ij); + + /* Store it back */ + a_x[pid] = f_x; + a_y[pid] = f_y; + a_z[pid] = f_z; + pot[pid] = pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; +#endif + } + + TIMER_TOC(timer_dopair_grav_pm); +} + +static INLINE void runner_dopair_grav_pm_truncated( + const struct engine *restrict e, struct gravity_cache *ci_cache, + int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + struct cell *restrict cj) { + + const float dim[3] = {e->s->dim[0], e->s->dim[1], e->s->dim[2]}; + const float rlr_inv = 1. / e->mesh->a_smooth; + + TIMER_TIC; + + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, ci_cache->active, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_padded_i; pid++) { + + /* Skip inactive particles */ + if (!active[pid]) continue; + + /* Skip particle that cannot use the multipole */ + if (!use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); +#endif const float x_i = x[pid]; const float y_i = y[pid]; @@ -489,15 +571,20 @@ static INLINE void runner_dopair_grav_pm( const float h_inv_i = 1.f / h_i; /* Distance to the Multipole */ - const float dx = x_i - CoM_j[0]; - const float dy = y_i - CoM_j[1]; - const float dz = z_i - CoM_j[2]; + float dx = CoM_j[0] - x_i; + float dy = CoM_j[1] - y_i; + float dz = CoM_j[2] - z_i; + + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + const float r2 = dx * dx + dy * dy + dz * dz; /* Interact! */ float f_x, f_y, f_z, pot_ij; - runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, &f_z, - &pot_ij); + runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, rlr_inv, + multi_j, &f_x, &f_y, &f_z, &pot_ij); /* Store it back */ a_x[pid] = f_x; @@ -628,8 +715,8 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, gcount_padded_j, ci->gparts, cj->gparts); /* Then the M2P */ - runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, ci->gparts, - CoM_j, multi_j, cj); + runner_dopair_grav_pm_full(e, ci_cache, gcount_i, gcount_padded_i, + ci->gparts, CoM_j, multi_j, cj); } if (cj_active && symmetric) { @@ -637,8 +724,8 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i, gcount_padded_i, cj->gparts, ci->gparts); /* Then the M2P */ - runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, cj->gparts, - CoM_i, multi_i, ci); + runner_dopair_grav_pm_full(e, cj_cache, gcount_j, gcount_padded_j, + cj->gparts, CoM_i, multi_i, ci); } } else { /* Periodic BC */ @@ -665,8 +752,8 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, ci->gparts, cj->gparts); /* Then the M2P */ - runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, - ci->gparts, CoM_j, multi_j, cj); + runner_dopair_grav_pm_truncated(e, ci_cache, gcount_i, gcount_padded_i, + ci->gparts, CoM_j, multi_j, cj); } if (cj_active && symmetric) { @@ -676,8 +763,8 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, cj->gparts, ci->gparts); /* Then the M2P */ - runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, - cj->gparts, CoM_i, multi_i, ci); + runner_dopair_grav_pm_truncated(e, cj_cache, gcount_j, gcount_padded_j, + cj->gparts, CoM_i, multi_i, ci); } } else { @@ -694,8 +781,8 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, gcount_padded_j, ci->gparts, cj->gparts); /* Then the M2P */ - runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, - ci->gparts, CoM_j, multi_j, cj); + runner_dopair_grav_pm_full(e, ci_cache, gcount_i, gcount_padded_i, + ci->gparts, CoM_j, multi_j, cj); } if (cj_active && symmetric) { @@ -704,8 +791,8 @@ static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, gcount_padded_i, cj->gparts, ci->gparts); /* Then the M2P */ - runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, - cj->gparts, CoM_i, multi_i, ci); + runner_dopair_grav_pm_full(e, cj_cache, gcount_j, gcount_padded_j, + cj->gparts, CoM_i, multi_i, ci); } } }