From a9d58e809bb23d88923e534d199de7ca0fdfa3f1 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller <schaller@strw.leidenuniv.nl> Date: Tue, 12 May 2020 12:22:03 +0200 Subject: [PATCH] Apply the same improvement to the functions called in the direct interaction --- src/approx_math.h | 25 ------- src/gravity/MultiSoftening/gravity_iact.h | 4 +- src/kernel_long_gravity.h | 89 ++++++++++------------- tests/testKernelLongGrav.c | 11 +++ 4 files changed, 52 insertions(+), 77 deletions(-) diff --git a/src/approx_math.h b/src/approx_math.h index 4015e6040b..70cb3e203c 100644 --- a/src/approx_math.h +++ b/src/approx_math.h @@ -21,31 +21,6 @@ #include "inline.h" -/** - * @brief Approximate version of the complementay error function erfcf(x). - * - * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972. - * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity. - * - * Returns garbage for x < 0. - * @param x The number to compute erfc for. - */ -__attribute__((always_inline, const)) INLINE static float approx_erfcf( - float x) { - - /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */ - float arg = 0.078108f; - arg = x * arg + 0.000972f; - arg = x * arg + 0.230389f; - arg = x * arg + 0.278393f; - arg = x * arg + 1.f; - - /* 1 / arg^4 */ - const float arg2 = arg * arg; - const float arg4 = arg2 * arg2; - return 1.f / arg4; -} - /** * @brief Approximate version of expf(x) using a 4th order Taylor expansion * diff --git a/src/gravity/MultiSoftening/gravity_iact.h b/src/gravity/MultiSoftening/gravity_iact.h index 944aab348c..1fb3954c56 100644 --- a/src/gravity/MultiSoftening/gravity_iact.h +++ b/src/gravity/MultiSoftening/gravity_iact.h @@ -117,8 +117,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, /* Get long-range correction */ const float u_lr = r * r_s_inv; - const float corr_f_lr = kernel_long_grav_force_eval(u_lr); - const float corr_pot_lr = kernel_long_grav_pot_eval(u_lr); + float corr_f_lr, corr_pot_lr; + kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr); *f_ij *= corr_f_lr; *pot_ij *= corr_pot_lr; } diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h index f525d89f8c..5cc8f75532 100644 --- a/src/kernel_long_gravity.h +++ b/src/kernel_long_gravity.h @@ -23,7 +23,6 @@ #include "../config.h" /* Local headers. */ -#include "approx_math.h" #include "const.h" #include "inline.h" @@ -83,7 +82,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, const float u2 = u * u; const float u4 = u2 * u2; - const float e = expf(-u2); + const float exp_u2 = expf(-u2); /* Compute erfcf(u) using eq. 7.1.25 of * Abramowitz & Stegun, 1972. @@ -99,11 +98,11 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, a = a * t + 0.3480242f; a = a * t; - const float erfc_u = a * e; + const float erfc_u = a * exp_u2; /* C = (1/sqrt(pi)) * expf(-u^2) */ const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5)); - const float common_factor = one_over_sqrt_pi * e; + const float common_factor = one_over_sqrt_pi * exp_u2; /* (1/r_s)^n * C */ const float r_s_inv_times_C = r_s_inv * common_factor; @@ -177,74 +176,64 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, } /** - * @brief Computes the long-range correction term for the potential calculation - * coming from FFT. + * @brief Computes the long-range correction terms for the potential and + * force calculations due to the mesh truncation. + * + * We use an approximation to the erfc() that gives a *relative* accuracy + * for the potential tem of 3.4e-3 and 2.4e-4 for the force term over the + * range [0, 5] of r_over_r_s. + * The accuracy is much better in the range [0, 2] (6e-5 and 2e-5 respectively). * * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. */ -__attribute__((const)) INLINE static float kernel_long_grav_pot_eval( - const float u) { +__attribute__((nonnull)) INLINE static void kernel_long_grav_eval( + const float r_over_r_s, float *restrict corr_f, float *restrict corr_pot) { #ifdef GADGET2_LONG_RANGE_CORRECTION - const float arg1 = u * 0.5f; -#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH - return erfcf(arg1); -#else - return approx_erfcf(arg1); -#endif - -#else + const float two_over_sqrt_pi = ((float)M_2_SQRTPI); - const float x = 2.f * u; - const float exp_x = expf(x); // good_approx_expf(x); - const float alpha = 1.f / (1.f + exp_x); - - /* We want 2 - 2 exp(x) * alpha */ - float W = 1.f - alpha * exp_x; - W = W * 2.f; - - return W; -#endif -} + const float u = 0.5f * r_over_r_s; + const float u2 = u * u; + const float exp_u2 = expf(-u2); -/** - * @brief Computes the long-range correction term for the force calculation - * coming from FFT. - * - * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. - */ -__attribute__((const)) INLINE static float kernel_long_grav_force_eval( - const float u) { + /* Compute erfcf(u) using eq. 7.1.25 of + * Abramowitz & Stegun, 1972. + * + * This has a *relative* error of less than 4e-3 over + * the range of interest (0 < u < 5) */ -#ifdef GADGET2_LONG_RANGE_CORRECTION + const float t = 1.f / (1.f + 0.47047f * u); - static const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5)); + /* 0.3480242 * t - 0.0958798 * t^2 + 0.7478556 * t^3 */ + float a = 0.7478556f; + a = a * t - 0.0958798f; + a = a * t + 0.3480242f; + a = a * t; - const float arg1 = u * 0.5f; - const float arg2 = -arg1 * arg1; + const float erfc_u = a * exp_u2; -#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH - const float term1 = erfcf(arg1); -#else - const float term1 = approx_erfcf(arg1); -#endif - const float term2 = u * one_over_sqrt_pi * expf(arg2); + *corr_pot = erfc_u; + *corr_f = erfc_u + two_over_sqrt_pi * u * exp_u2; - return term1 + term2; #else - - const float x = 2.f * u; + const float x = 2.f * r_over_r_s; const float exp_x = expf(x); // good_approx_expf(x); const float alpha = 1.f / (1.f + exp_x); + /* We want 2 - 2 exp(x) * alpha */ + float W = 1.f - alpha * exp_x; + W = W * 2.f; + + *corr_pot = W; + /* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */ - float W = 1.f - alpha; + W = 1.f - alpha; W = W * x - exp_x; W = W * alpha + 1.f; W = W * 2.f; - return W; + *corr_f = W; #endif } diff --git a/tests/testKernelLongGrav.c b/tests/testKernelLongGrav.c index e305e1944e..cda2df9c2a 100644 --- a/tests/testKernelLongGrav.c +++ b/tests/testKernelLongGrav.c @@ -100,6 +100,17 @@ int main(int argc, char* argv[]) { check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s); check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s); check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s); + + /* Compute the expression for individual particles */ + float swift_corr_f_lr, swift_corr_pot_lr; + kernel_long_grav_eval(r / r_s, &swift_corr_f_lr, &swift_corr_pot_lr); + + /* And the exact ones */ + const double corr_pot = erfc(u); + const double corr_f = erfc(u) + M_2_SQRTPI * u * exp(-u * u); + + check_value(swift_corr_pot_lr, corr_pot, "corr_pot", 3.4e-3, r, r_s); + check_value(swift_corr_f_lr, corr_f, "corr_f", 2.4e-4, r, r_s); } } -- GitLab