Use an approximation to erfc() instead of an external function call to the math library.

9e7231b4 · Matthieu Schaller · a3f7f044 · 9e7231b4 · 9e7231b4
Commit 9e7231b4 authored 7 years ago by Matthieu Schaller
--- a/src/approx_math.h
+++ b/src/approx_math.h
@@ -21,6 +21,31 @@
 #include "inline.h"
+/**
+ * @brief Approximate version of the complementay error function erfcf(x).
+ *
+ * This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972.
+ * The absolute error is < 4.7*10^-4 over the range 0 < x < infinity.
+ *
+ * Returns garbage for x < 0.
+ * @param x The number to compute erfc for.
+ */
+__attribute__((always_inline, const)) INLINE static float approx_erfcf(float x) {
+  /* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */
+  float arg = 0.078108f;
+  arg = x * arg + 0.000972f;
+  arg = x * arg + 0.230389f;
+  arg = x * arg + 0.278393f;
+  arg = x * arg + 1.f;
+  /* 1 / arg^4 */
+  const float arg2 = arg * arg;
+  const float arg4 = arg2 * arg2;  
+  return 1.f / arg4;
+}
 /**
 * @brief Approximate version of expf(x) using a 4th order Taylor expansion
 *

--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -90,7 +90,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_derivatives(
  const float r_s_inv5 = r_s_inv4 * r_s_inv;
  /* Derivatives of \chi */
-  derivs->chi_0 = erfcf(u);
+  derivs->chi_0 = approx_erfcf(u);
  derivs->chi_1 = -r_s_inv;
  derivs->chi_2 = r_s_inv2 * u;
  derivs->chi_3 = -r_s_inv3 * (u2 - 0.5f);
@@ -158,7 +158,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_pot_eval(
 #ifdef GADGET2_LONG_RANGE_CORRECTION
  const float arg1 = u * 0.5f;
-  const float term1 = erfcf(arg1);
+  const float term1 = approx_erfcf(arg1);
  *W = term1;
 #else
@@ -190,7 +190,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_force_eval(
  const float arg1 = u * 0.5f;
  const float arg2 = -arg1 * arg1;
-  const float term1 = erfcf(arg1);
+  const float term1 = approx_erfcf(arg1);
  const float term2 = u * one_over_sqrt_pi * expf(arg2);
  *W = term1 + term2;