diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index 1f4310eea49a1b1b298fe00f00e1ae0802d9b688..d0003da9a8a311d6f413440f50fb5a00a7b68eeb 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -40,7 +40,8 @@ * @param f_ij (return) The force intensity. */ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full( - float r2, float h2, float h_inv, float h_inv3, float mass, float *f_ij) { + float r2, float h2, float h_inv, float h_inv3, float mass, float *f_ij, + float *pot_ij) { /* Get the inverse distance */ const float r_inv = 1.f / sqrtf(r2); @@ -50,17 +51,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full( /* Get Newtonian gravity */ *f_ij = mass * r_inv * r_inv * r_inv; + *pot_ij = -mass * r_inv; } else { const float r = r2 * r_inv; const float ui = r * h_inv; - float W_ij; - kernel_grav_eval(ui, &W_ij); + float W_f_ij, W_pot_ij; + kernel_grav_force_eval(ui, &W_f_ij); + kernel_grav_pot_eval(ui, &W_pot_ij); /* Get softened gravity */ - *f_ij = mass * h_inv3 * W_ij; + *f_ij = mass * h_inv3 * W_f_ij; + *pot_ij = mass * h_inv * W_pot_ij; } } @@ -81,7 +85,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full( */ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated( float r2, float h2, float h_inv, float h_inv3, float mass, float rlr_inv, - float *f_ij) { + float *f_ij, float *pot_ij) { /* Get the inverse distance */ const float r_inv = 1.f / sqrtf(r2); @@ -92,23 +96,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated( /* Get Newtonian gravity */ *f_ij = mass * r_inv * r_inv * r_inv; + *pot_ij = -mass * r_inv; } else { const float ui = r * h_inv; - float W_ij; + float W_f_ij, W_pot_ij; - kernel_grav_eval(ui, &W_ij); + kernel_grav_force_eval(ui, &W_f_ij); + kernel_grav_pot_eval(ui, &W_pot_ij); /* Get softened gravity */ - *f_ij = mass * h_inv3 * W_ij; + *f_ij = mass * h_inv3 * W_f_ij; + *pot_ij = mass * h_inv * W_pot_ij; } /* Get long-range correction */ const float u_lr = r * rlr_inv; - float corr_lr; - kernel_long_grav_eval(u_lr, &corr_lr); - *f_ij *= corr_lr; + float corr_f_lr, corr_pot_lr; + kernel_long_grav_force_eval(u_lr, &corr_f_lr); + kernel_long_grav_pot_eval(u_lr, &corr_pot_lr); + *f_ij *= corr_f_lr; + *pot_ij *= corr_pot_lr; } /** diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 799bda85b0c69dd2757f47fb0225006adb6d1432..aae694b8b2cd80573b39d475af979675295afee0 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -27,14 +27,35 @@ #include "minmax.h" /** - * @brief Computes the gravity softening function. + * @brief Computes the gravity softening function for potential. * * This functions assumes 0 < u < 1. * * @param u The ratio of the distance to the softening length $u = x/h$. * @param W (return) The value of the kernel function $W(x,h)$. */ -__attribute__((always_inline)) INLINE static void kernel_grav_eval( +__attribute__((always_inline)) INLINE static void kernel_grav_pot_eval( + float u, float *const W) { + + /* W(u) = 3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3 */ + *W = 3.f * u - 15.f; + *W = *W * u + 28.f; + *W = *W * u - 21.f; + *W = *W * u; + *W = *W * u + 7.f; + *W = *W * u; + *W = *W * u - 3; +} + +/** + * @brief Computes the gravity softening function for forces. + * + * This functions assumes 0 < u < 1. + * + * @param u The ratio of the distance to the softening length $u = x/h$. + * @param W (return) The value of the kernel function $W(x,h)$. + */ +__attribute__((always_inline)) INLINE static void kernel_grav_force_eval( float u, float *const W) { /* W(u) = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h index 2e0097ded217769af832af049b6c00b1305ac339..fab633053a468d6e5ba291b2b782d4766a912f14 100644 --- a/src/kernel_long_gravity.h +++ b/src/kernel_long_gravity.h @@ -31,12 +31,41 @@ #include <math.h> /** - * @brief Computes the long-range correction term for the FFT calculation. + * @brief Computes the long-range correction term for the potential calculation + * coming from FFT. * * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. * @param W (return) The value of the kernel function. */ -__attribute__((always_inline)) INLINE static void kernel_long_grav_eval( +__attribute__((always_inline)) INLINE static void kernel_long_grav_pot_eval( + const float u, float *const W) { + +#ifdef GADGET2_LONG_RANGE_CORRECTION + + const float arg1 = u * 0.5f; + const float term1 = erfcf(arg1); + + *W = term1; +#else + + const float x = 2.f * u; + const float exp_x = good_approx_expf(x); + const float alpha = 1.f / (1.f + exp_x); + + /* We want 2 - 2 exp(x) * alpha */ + *W = 1.f - alpha * exp_x; + *W *= 2.f; +#endif +} + +/** + * @brief Computes the long-range correction term for the force calculation + * coming from FFT. + * + * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. + * @param W (return) The value of the kernel function. + */ +__attribute__((always_inline)) INLINE static void kernel_long_grav_force_eval( float u, float *const W) { #ifdef GADGET2_LONG_RANGE_CORRECTION diff --git a/src/multipole.h b/src/multipole.h index d842081814b6ef7b6f490a1ba47a0152dd84d5a1..f580ef35871e94f5d4ef374f03b51901ff180091 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2384,7 +2384,7 @@ INLINE static void gravity_M2P(const struct multipole *ma, const float r = r2 * r_inv; const float u = r * eps_inv; - kernel_grav_eval(u, &W); + kernel_grav_force_eval(u, &W); /* Get softened gravity */ f = ma->M_000 * eps_inv3 * W; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 0bc7b851c05e94974ef6337ba3d8df5e5420c9a1..0af87c94b6fc0d6650d7e24c922fe188b260ec61 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -204,8 +204,8 @@ static INLINE void runner_dopair_grav_pp_full(const struct engine *e, const float h_inv_i = 1.f / h_i; const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); @@ -240,13 +240,15 @@ static INLINE void runner_dopair_grav_pp_full(const struct engine *e, #endif /* Interact! */ - float f_ij; - runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij, + &pot_ij); /* Store it back */ a_x -= f_ij * dx; a_y -= f_ij * dy; a_z -= f_ij * dz; + pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ @@ -258,6 +260,7 @@ static INLINE void runner_dopair_grav_pp_full(const struct engine *e, ci_cache->a_x[pid] = a_x; ci_cache->a_y[pid] = a_y; ci_cache->a_z[pid] = a_z; + ci_cache->pot[pid] = pot; } TIMER_TOC(timer_dopair_grav_pp); @@ -295,8 +298,8 @@ static INLINE void runner_dopair_grav_pp_truncated( const float h_inv_i = 1.f / h_i; const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); @@ -331,14 +334,15 @@ static INLINE void runner_dopair_grav_pp_truncated( #endif /* Interact! */ - float f_ij; + float f_ij, pot_ij; runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j, - rlr_inv, &f_ij); + rlr_inv, &f_ij, &pot_ij); /* Store it back */ a_x -= f_ij * dx; a_y -= f_ij * dy; a_z -= f_ij * dz; + pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ @@ -350,6 +354,7 @@ static INLINE void runner_dopair_grav_pp_truncated( ci_cache->a_x[pid] = a_x; ci_cache->a_y[pid] = a_y; ci_cache->a_z[pid] = a_z; + ci_cache->pot[pid] = pot; } TIMER_TOC(timer_dopair_grav_pp); @@ -683,7 +688,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); @@ -721,13 +726,15 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { #endif /* Interact! */ - float f_ij; - runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij, + &pot_ij); /* Store it back */ a_x -= f_ij * dx; a_y -= f_ij * dy; a_z -= f_ij * dz; + pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ @@ -739,6 +746,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { ci_cache->a_x[pid] = a_x; ci_cache->a_y[pid] = a_y; ci_cache->a_z[pid] = a_z; + ci_cache->pot[pid] = pot; } /* Write back to the particles */ @@ -808,8 +816,8 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { const float h_inv_i = 1.f / h_i; const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); @@ -847,14 +855,15 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { #endif /* Interact! */ - float f_ij; + float f_ij, pot_ij; runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j, - rlr_inv, &f_ij); + rlr_inv, &f_ij, &pot_ij); /* Store it back */ a_x -= f_ij * dx; a_y -= f_ij * dy; a_z -= f_ij * dz; + pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ @@ -866,6 +875,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { ci_cache->a_x[pid] = a_x; ci_cache->a_y[pid] = a_y; ci_cache->a_z[pid] = a_z; + ci_cache->pot[pid] = pot; } /* Write back to the particles */