From a9d58e809bb23d88923e534d199de7ca0fdfa3f1 Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <schaller@strw.leidenuniv.nl>
Date: Tue, 12 May 2020 12:22:03 +0200
Subject: [PATCH] Apply the same improvement to the functions called in the
 direct interaction

---
 src/approx_math.h                         | 25 -------
 src/gravity/MultiSoftening/gravity_iact.h |  4 +-
 src/kernel_long_gravity.h                 | 89 ++++++++++-------------
 tests/testKernelLongGrav.c                | 11 +++
 4 files changed, 52 insertions(+), 77 deletions(-)

diff --git a/src/approx_math.h b/src/approx_math.h
index 4015e6040b..70cb3e203c 100644
--- a/src/approx_math.h
+++ b/src/approx_math.h
@@ -21,31 +21,6 @@
 
 #include "inline.h"
 
-/**
- * @brief Approximate version of the complementay error function erfcf(x).
- *
- * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972.
- * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity.
- *
- * Returns garbage for x < 0.
- * @param x The number to compute erfc for.
- */
-__attribute__((always_inline, const)) INLINE static float approx_erfcf(
-    float x) {
-
-  /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */
-  float arg = 0.078108f;
-  arg = x * arg + 0.000972f;
-  arg = x * arg + 0.230389f;
-  arg = x * arg + 0.278393f;
-  arg = x * arg + 1.f;
-
-  /* 1 / arg^4 */
-  const float arg2 = arg * arg;
-  const float arg4 = arg2 * arg2;
-  return 1.f / arg4;
-}
-
 /**
  * @brief Approximate version of expf(x) using a 4th order Taylor expansion
  *
diff --git a/src/gravity/MultiSoftening/gravity_iact.h b/src/gravity/MultiSoftening/gravity_iact.h
index 944aab348c..1fb3954c56 100644
--- a/src/gravity/MultiSoftening/gravity_iact.h
+++ b/src/gravity/MultiSoftening/gravity_iact.h
@@ -117,8 +117,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
 
   /* Get long-range correction */
   const float u_lr = r * r_s_inv;
-  const float corr_f_lr = kernel_long_grav_force_eval(u_lr);
-  const float corr_pot_lr = kernel_long_grav_pot_eval(u_lr);
+  float corr_f_lr, corr_pot_lr;
+  kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr);
   *f_ij *= corr_f_lr;
   *pot_ij *= corr_pot_lr;
 }
diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h
index f525d89f8c..5cc8f75532 100644
--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -23,7 +23,6 @@
 #include "../config.h"
 
 /* Local headers. */
-#include "approx_math.h"
 #include "const.h"
 #include "inline.h"
 
@@ -83,7 +82,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
   const float u2 = u * u;
   const float u4 = u2 * u2;
 
-  const float e = expf(-u2);
+  const float exp_u2 = expf(-u2);
 
   /* Compute erfcf(u) using eq. 7.1.25 of
    * Abramowitz & Stegun, 1972.
@@ -99,11 +98,11 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
   a = a * t + 0.3480242f;
   a = a * t;
 
-  const float erfc_u = a * e;
+  const float erfc_u = a * exp_u2;
 
   /* C = (1/sqrt(pi)) * expf(-u^2) */
   const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
-  const float common_factor = one_over_sqrt_pi * e;
+  const float common_factor = one_over_sqrt_pi * exp_u2;
 
   /* (1/r_s)^n * C */
   const float r_s_inv_times_C = r_s_inv * common_factor;
@@ -177,74 +176,64 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
 }
 
 /**
- * @brief Computes the long-range correction term for the potential calculation
- * coming from FFT.
+ * @brief Computes the long-range correction terms for the potential and
+ * force calculations due to the mesh truncation.
+ *
+ * We use an approximation to the erfc() that gives a *relative* accuracy
+ * for the potential tem of 3.4e-3 and 2.4e-4 for the force term over the
+ * range [0, 5] of r_over_r_s.
+ * The accuracy is much better in the range [0, 2] (6e-5 and 2e-5 respectively).
  *
  * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
  */
-__attribute__((const)) INLINE static float kernel_long_grav_pot_eval(
-    const float u) {
+__attribute__((nonnull)) INLINE static void kernel_long_grav_eval(
+    const float r_over_r_s, float *restrict corr_f, float *restrict corr_pot) {
 
 #ifdef GADGET2_LONG_RANGE_CORRECTION
 
-  const float arg1 = u * 0.5f;
-#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH
-  return erfcf(arg1);
-#else
-  return approx_erfcf(arg1);
-#endif
-
-#else
+  const float two_over_sqrt_pi = ((float)M_2_SQRTPI);
 
-  const float x = 2.f * u;
-  const float exp_x = expf(x);  // good_approx_expf(x);
-  const float alpha = 1.f / (1.f + exp_x);
-
-  /* We want 2 - 2 exp(x) * alpha */
-  float W = 1.f - alpha * exp_x;
-  W = W * 2.f;
-
-  return W;
-#endif
-}
+  const float u = 0.5f * r_over_r_s;
+  const float u2 = u * u;
+  const float exp_u2 = expf(-u2);
 
-/**
- * @brief Computes the long-range correction term for the force calculation
- * coming from FFT.
- *
- * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
- */
-__attribute__((const)) INLINE static float kernel_long_grav_force_eval(
-    const float u) {
+  /* Compute erfcf(u) using eq. 7.1.25 of
+   * Abramowitz & Stegun, 1972.
+   *
+   * This has a *relative* error of less than 4e-3 over
+   * the range of interest (0 < u <  5) */
 
-#ifdef GADGET2_LONG_RANGE_CORRECTION
+  const float t = 1.f / (1.f + 0.47047f * u);
 
-  static const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
+  /* 0.3480242 * t - 0.0958798 * t^2 + 0.7478556 * t^3 */
+  float a = 0.7478556f;
+  a = a * t - 0.0958798f;
+  a = a * t + 0.3480242f;
+  a = a * t;
 
-  const float arg1 = u * 0.5f;
-  const float arg2 = -arg1 * arg1;
+  const float erfc_u = a * exp_u2;
 
-#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH
-  const float term1 = erfcf(arg1);
-#else
-  const float term1 = approx_erfcf(arg1);
-#endif
-  const float term2 = u * one_over_sqrt_pi * expf(arg2);
+  *corr_pot = erfc_u;
+  *corr_f = erfc_u + two_over_sqrt_pi * u * exp_u2;
 
-  return term1 + term2;
 #else
-
-  const float x = 2.f * u;
+  const float x = 2.f * r_over_r_s;
   const float exp_x = expf(x);  // good_approx_expf(x);
   const float alpha = 1.f / (1.f + exp_x);
 
+  /* We want 2 - 2 exp(x) * alpha */
+  float W = 1.f - alpha * exp_x;
+  W = W * 2.f;
+
+  *corr_pot = W;
+
   /* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */
-  float W = 1.f - alpha;
+  W = 1.f - alpha;
   W = W * x - exp_x;
   W = W * alpha + 1.f;
   W = W * 2.f;
 
-  return W;
+  *corr_f = W;
 #endif
 }
 
diff --git a/tests/testKernelLongGrav.c b/tests/testKernelLongGrav.c
index e305e1944e..cda2df9c2a 100644
--- a/tests/testKernelLongGrav.c
+++ b/tests/testKernelLongGrav.c
@@ -100,6 +100,17 @@ int main(int argc, char* argv[]) {
       check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s);
       check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s);
       check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s);
+
+      /* Compute the expression for individual particles */
+      float swift_corr_f_lr, swift_corr_pot_lr;
+      kernel_long_grav_eval(r / r_s, &swift_corr_f_lr, &swift_corr_pot_lr);
+
+      /* And the exact ones */
+      const double corr_pot = erfc(u);
+      const double corr_f = erfc(u) + M_2_SQRTPI * u * exp(-u * u);
+
+      check_value(swift_corr_pot_lr, corr_pot, "corr_pot", 3.4e-3, r, r_s);
+      check_value(swift_corr_f_lr, corr_f, "corr_f", 2.4e-4, r, r_s);
     }
   }
 
-- 
GitLab