Apply the same improvement to the functions called in the direct interaction

a9d58e80 · Matthieu Schaller · 898bafbb · a9d58e80 · a9d58e80 · a9d58e80
Commit a9d58e80 authored 5 years ago by Matthieu Schaller
--- a/src/approx_math.h
+++ b/src/approx_math.h
@@ -21,31 +21,6 @@
 #include "inline.h"
-/**
- * @brief Approximate version of the complementay error function erfcf(x).
- *
- * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972.
- * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity.
- *
- * Returns garbage for x < 0.
- * @param x The number to compute erfc for.
- */
-__attribute__((always_inline, const)) INLINE static float approx_erfcf(
-    float x) {
-  /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */
-  float arg = 0.078108f;
-  arg = x * arg + 0.000972f;
-  arg = x * arg + 0.230389f;
-  arg = x * arg + 0.278393f;
-  arg = x * arg + 1.f;
-  /* 1 / arg^4 */
-  const float arg2 = arg * arg;
-  const float arg4 = arg2 * arg2;
-  return 1.f / arg4;
-}
 /**
 * @brief Approximate version of expf(x) using a 4th order Taylor expansion
 *

--- a/src/gravity/MultiSoftening/gravity_iact.h
+++ b/src/gravity/MultiSoftening/gravity_iact.h
@@ -117,8 +117,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
  /* Get long-range correction */
  const float u_lr = r * r_s_inv;
-  const float corr_f_lr = kernel_long_grav_force_eval(u_lr);
+  float corr_f_lr, corr_pot_lr;
-  const float corr_pot_lr = kernel_long_grav_pot_eval(u_lr);
+  kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr);
  *f_ij *= corr_f_lr;
  *pot_ij *= corr_pot_lr;
 }

--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -23,7 +23,6 @@
 #include "../config.h"
 /* Local headers. */
-#include "approx_math.h"
 #include "const.h"
 #include "inline.h"
@@ -83,7 +82,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
  const float u2 = u * u;
  const float u4 = u2 * u2;
-  const float e = expf(-u2);
+  const float exp_u2 = expf(-u2);
  /* Compute erfcf(u) using eq. 7.1.25 of
   * Abramowitz & Stegun, 1972.
@@ -99,11 +98,11 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
  a = a * t + 0.3480242f;
  a = a * t;
-  const float erfc_u = a * e;
+  const float erfc_u = a * exp_u2;
  /* C = (1/sqrt(pi)) * expf(-u^2) */
  const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
-  const float common_factor = one_over_sqrt_pi * e;
+  const float common_factor = one_over_sqrt_pi * exp_u2;
  /* (1/r_s)^n * C */
  const float r_s_inv_times_C = r_s_inv * common_factor;
@@ -177,74 +176,64 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
 }
 /**
- * @brief Computes the long-range correction term for the potential calculation
+ * @brief Computes the long-range correction terms for the potential and
- * coming from FFT.
+ * force calculations due to the mesh truncation.
+ *
+ * We use an approximation to the erfc() that gives a *relative* accuracy
+ * for the potential tem of 3.4e-3 and 2.4e-4 for the force term over the
+ * range [0, 5] of r_over_r_s.
+ * The accuracy is much better in the range [0, 2] (6e-5 and 2e-5 respectively).
 *
 * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
 */
-__attribute__((const)) INLINE static float kernel_long_grav_pot_eval(
+__attribute__((nonnull)) INLINE static void kernel_long_grav_eval(
-    const float u) {
+    const float r_over_r_s, float *restrict corr_f, float *restrict corr_pot) {
 #ifdef GADGET2_LONG_RANGE_CORRECTION
-  const float arg1 = u * 0.5f;
+  const float two_over_sqrt_pi = ((float)M_2_SQRTPI);
-#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH
-  return erfcf(arg1);
-#else
-  return approx_erfcf(arg1);
-#endif
-#else
-  const float x = 2.f * u;
+  const float u = 0.5f * r_over_r_s;
-  const float exp_x = expf(x);  // good_approx_expf(x);
+  const float u2 = u * u;
-  const float alpha = 1.f / (1.f + exp_x);
+  const float exp_u2 = expf(-u2);
-  /* We want 2 - 2 exp(x) * alpha */
-  float W = 1.f - alpha * exp_x;
-  W = W * 2.f;
-  return W;
-#endif
-}
-/**
+  /* Compute erfcf(u) using eq. 7.1.25 of
- * @brief Computes the long-range correction term for the force calculation
+   * Abramowitz & Stegun, 1972.
- * coming from FFT.
+   *
- *
+   * This has a *relative* error of less than 4e-3 over
- * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
+   * the range of interest (0 < u <  5) */
- */
-__attribute__((const)) INLINE static float kernel_long_grav_force_eval(
-    const float u) {
-#ifdef GADGET2_LONG_RANGE_CORRECTION
+  const float t = 1.f / (1.f + 0.47047f * u);
-  static const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
+  /* 0.3480242 * t - 0.0958798 * t^2 + 0.7478556 * t^3 */
+  float a = 0.7478556f;
+  a = a * t - 0.0958798f;
+  a = a * t + 0.3480242f;
+  a = a * t;
-  const float arg1 = u * 0.5f;
+  const float erfc_u = a * exp_u2;
-  const float arg2 = -arg1 * arg1;
-#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH
+  *corr_pot = erfc_u;
-  const float term1 = erfcf(arg1);
+  *corr_f = erfc_u + two_over_sqrt_pi * u * exp_u2;
-#else
-  const float term1 = approx_erfcf(arg1);
-#endif
-  const float term2 = u * one_over_sqrt_pi * expf(arg2);
-  return term1 + term2;
 #else
+  const float x = 2.f * r_over_r_s;
-  const float x = 2.f * u;
  const float exp_x = expf(x);  // good_approx_expf(x);
  const float alpha = 1.f / (1.f + exp_x);
+  /* We want 2 - 2 exp(x) * alpha */
+  float W = 1.f - alpha * exp_x;
+  W = W * 2.f;
+  *corr_pot = W;
  /* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */
-  float W = 1.f - alpha;
+  W = 1.f - alpha;
  W = W * x - exp_x;
  W = W * alpha + 1.f;
  W = W * 2.f;
-  return W;
+  *corr_f = W;
 #endif
 }

--- a/tests/testKernelLongGrav.c
+++ b/tests/testKernelLongGrav.c
@@ -100,6 +100,17 @@ int main(int argc, char* argv[]) {
      check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s);
      check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s);
      check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s);
+      /* Compute the expression for individual particles */
+      float swift_corr_f_lr, swift_corr_pot_lr;
+      kernel_long_grav_eval(r / r_s, &swift_corr_f_lr, &swift_corr_pot_lr);
+      /* And the exact ones */
+      const double corr_pot = erfc(u);
+      const double corr_f = erfc(u) + M_2_SQRTPI * u * exp(-u * u);
+      check_value(swift_corr_pot_lr, corr_pot, "corr_pot", 3.4e-3, r, r_s);
+      check_value(swift_corr_f_lr, corr_f, "corr_f", 2.4e-4, r, r_s);
    }
  }