Added a fully inline approximation to exp() for use at the heart of the gravity code

2bdafa04 · Matthieu Schaller · 076d7229 · 2bdafa04 · 2bdafa04 · 2bdafa04
Commit 2bdafa04 authored 5 years ago by Matthieu Schaller
--- a/.gitignore
+++ b/.gitignore
@@ -114,6 +114,7 @@ tests/testReading
 tests/testSingle
 tests/testTimeIntegration
 tests/testSPHStep
+tests/testExp
 tests/testKernel
 tests/testKernelGrav
 tests/testKernelLongGrav

--- a/src/exp.h
+++ b/src/exp.h
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_OPTIMIZED_EXP_H
+#define SWIFT_OPTIMIZED_EXP_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "inline.h"
+
+/**
+ * @brief Compute the exponential of a number.
+ *
+ * This function has a relative accuracy of 1.618e-6 over the input
+ * range [-32., 32.].
+ *
+ * @param x The number to take the exponential of.
+ */
+__attribute__((always_inline, const)) INLINE static float optimized_expf(
+    const float x) {
+
+  /* Let's first express e^x as 2^i * e^f with
+   * f in the range [-ln(2)/2, ln(2)/2] */
+  const float i = rintf(x * ((float)M_LOG2E));
+  const float f = x - ((float)M_LN2) * i;
+
+  /* We can now compute exp(f) using a polynomial
+   * approximation valid over the range [-ln(2)/2, ln(2)/2].
+   * The coefficients come from the Cephes library and
+   * have been obtained using a minmax algorithm */
+  float exp_f = 0.041944388f;
+  exp_f = exp_f * f + 0.168006673f;
+  exp_f = exp_f * f + 0.499999940f;
+  exp_f = exp_f * f + 0.999956906f;
+  exp_f = exp_f * f + 0.999999642f;
+
+  union {
+    int i;
+    float f;
+  } e;
+
+  /* We can now construct the result by taking exp_f
+   * as the mantissa of the answer and bit-shifting i
+   * into the exponent part of the floating-point
+   * number */
+  e.f = exp_f;
+  e.i += ((int)i) << 23;
+
+  return e.f;
+}
+
+#endif /* SWIFT_OPTIMIZED_EXP_H */
--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -24,6 +24,7 @@

 /* Local headers. */
 #include "const.h"
+#include "exp.h"
 #include "inline.h"

 /* Standard headers */
@@ -82,7 +83,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
  const float u2 = u * u;
  const float u4 = u2 * u2;

-  const float exp_u2 = expf(-u2);
+  const float exp_u2 = optimized_expf(-u2);

  /* Compute erfcf(u) using eq. 7.1.25 of
   * Abramowitz & Stegun, 1972.
@@ -195,7 +196,7 @@ __attribute__((nonnull)) INLINE static void kernel_long_grav_eval(

  const float u = 0.5f * r_over_r_s;
  const float u2 = u * u;
-  const float exp_u2 = expf(-u2);
+  const float exp_u2 = optimized_expf(-u2);

  /* Compute erfcf(u) using eq. 7.1.25 of
   * Abramowitz & Stegun, 1972.

--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,7 +21,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS

 # List of programs and scripts to run in the test suite
 TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \
-        testActivePair.sh test27cells.sh test27cellsPerturbed.sh  \
+        testActivePair.sh test27cells.sh test27cellsPerturbed.sh testExp \
        testParser.sh test125cells.sh test125cellsPerturbed.sh testFFT \
        testAdiabaticIndex testRandom testRandomSpacing \
        testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \
@@ -35,7 +35,7 @@ TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testTimeIntegration testKernelLongGrav \
 		 testActivePair test27cells test27cells_subset test125cells testParser \
-                 testKernel testFFT testInteractions testMaths testRandom \
+                 testKernel testFFT testInteractions testMaths testRandom testExp \
                 testSymmetry testThreadpool testRandomSpacing \
                 testAdiabaticIndex testRiemannExact testRiemannTRRS \
                 testRiemannHLLC testMatrixInversion testDump testLogger \
@@ -124,6 +124,8 @@ testDump_SOURCES = testDump.c

 testLogger_SOURCES = testLogger.c

+testExp_SOURCES = testExp.c
+
 testGravityDerivatives_SOURCES = testGravityDerivatives.c

 testGravitySpeed_SOURCES = testGravitySpeed.c

--- a/tests/testExp.c
+++ b/tests/testExp.c
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "../config.h"
+
+#include "swift.h"
+
+/* Standard includes */
+#include <fenv.h>
+#include <math.h>
+
+/**
+ * @brief Check that a and b are consistent (up to some relative error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(double a, double b, const double tol,  const double x) {
+  if (fabs(a - b) / fabs(a + b) > tol)
+    error(
+        "Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).",
+        a, b, fabs(a - b) / fabs(a + b), x);
+}
+
+
+int main(int argc, char* argv[]) {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+/* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+  /* Get some randomness going */
+  const int seed = time(NULL);
+  message("Seed = %d", seed);
+  srand(seed);
+
+  /* Loop over some values */
+  for (float x = 0.; x < 32.; x += 0.000001) {
+    
+    const double exact_p = exp(x);
+    const double exact_n = exp(-x);
+    const double swift_exp_p = optimized_expf(x);
+    const double swift_exp_n = optimized_expf(-x);
+
+    check_value(exact_p, swift_exp_p, 1.618e-6, x);
+    check_value(exact_n, swift_exp_n, 1.618e-6, x);
+  }
+
+  return 0;
+}
+