Commit 2bdafa04 authored by Matthieu Schaller's avatar Matthieu Schaller

Added a fully inline approximation to exp() for use at the heart of the gravity code

parent 076d7229
......@@ -114,6 +114,7 @@ tests/testReading
tests/testSingle
tests/testTimeIntegration
tests/testSPHStep
tests/testExp
tests/testKernel
tests/testKernelGrav
tests/testKernelLongGrav
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_OPTIMIZED_EXP_H
#define SWIFT_OPTIMIZED_EXP_H
/* Config parameters. */
#include "../config.h"
/* Local headers. */
#include "inline.h"
/**
* @brief Compute the exponential of a number.
*
* This function has a relative accuracy of 1.618e-6 over the input
* range [-32., 32.].
*
* @param x The number to take the exponential of.
*/
__attribute__((always_inline, const)) INLINE static float optimized_expf(
const float x) {
/* Let's first express e^x as 2^i * e^f with
* f in the range [-ln(2)/2, ln(2)/2] */
const float i = rintf(x * ((float)M_LOG2E));
const float f = x - ((float)M_LN2) * i;
/* We can now compute exp(f) using a polynomial
* approximation valid over the range [-ln(2)/2, ln(2)/2].
* The coefficients come from the Cephes library and
* have been obtained using a minmax algorithm */
float exp_f = 0.041944388f;
exp_f = exp_f * f + 0.168006673f;
exp_f = exp_f * f + 0.499999940f;
exp_f = exp_f * f + 0.999956906f;
exp_f = exp_f * f + 0.999999642f;
union {
int i;
float f;
} e;
/* We can now construct the result by taking exp_f
* as the mantissa of the answer and bit-shifting i
* into the exponent part of the floating-point
* number */
e.f = exp_f;
e.i += ((int)i) << 23;
return e.f;
}
#endif /* SWIFT_OPTIMIZED_EXP_H */
......@@ -24,6 +24,7 @@
/* Local headers. */
#include "const.h"
#include "exp.h"
#include "inline.h"
/* Standard headers */
......@@ -82,7 +83,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
const float u2 = u * u;
const float u4 = u2 * u2;
const float exp_u2 = expf(-u2);
const float exp_u2 = optimized_expf(-u2);
/* Compute erfcf(u) using eq. 7.1.25 of
* Abramowitz & Stegun, 1972.
......@@ -195,7 +196,7 @@ __attribute__((nonnull)) INLINE static void kernel_long_grav_eval(
const float u = 0.5f * r_over_r_s;
const float u2 = u * u;
const float exp_u2 = expf(-u2);
const float exp_u2 = optimized_expf(-u2);
/* Compute erfcf(u) using eq. 7.1.25 of
* Abramowitz & Stegun, 1972.
......
......@@ -21,7 +21,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS
# List of programs and scripts to run in the test suite
TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \
testActivePair.sh test27cells.sh test27cellsPerturbed.sh \
testActivePair.sh test27cells.sh test27cellsPerturbed.sh testExp \
testParser.sh test125cells.sh test125cellsPerturbed.sh testFFT \
testAdiabaticIndex testRandom testRandomSpacing \
testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \
......@@ -35,7 +35,7 @@ TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \
# List of test programs to compile
check_PROGRAMS = testGreetings testReading testTimeIntegration testKernelLongGrav \
testActivePair test27cells test27cells_subset test125cells testParser \
testKernel testFFT testInteractions testMaths testRandom \
testKernel testFFT testInteractions testMaths testRandom testExp \
testSymmetry testThreadpool testRandomSpacing \
testAdiabaticIndex testRiemannExact testRiemannTRRS \
testRiemannHLLC testMatrixInversion testDump testLogger \
......@@ -124,6 +124,8 @@ testDump_SOURCES = testDump.c
testLogger_SOURCES = testLogger.c
testExp_SOURCES = testExp.c
testGravityDerivatives_SOURCES = testGravityDerivatives.c
testGravitySpeed_SOURCES = testGravitySpeed.c
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#include "../config.h"
#include "swift.h"
/* Standard includes */
#include <fenv.h>
#include <math.h>
/**
* @brief Check that a and b are consistent (up to some relative error)
*
* @param a First value
* @param b Second value
* @param s String used to identify this check in messages
*/
void check_value(double a, double b, const double tol, const double x) {
if (fabs(a - b) / fabs(a + b) > tol)
error(
"Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).",
a, b, fabs(a - b) / fabs(a + b), x);
}
int main(int argc, char* argv[]) {
/* Initialize CPU frequency, this also starts time. */
unsigned long long cpufreq = 0;
clocks_set_cpufreq(cpufreq);
/* Choke on FPEs */
#ifdef HAVE_FE_ENABLE_EXCEPT
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
#endif
/* Get some randomness going */
const int seed = time(NULL);
message("Seed = %d", seed);
srand(seed);
/* Loop over some values */
for (float x = 0.; x < 32.; x += 0.000001) {
const double exact_p = exp(x);
const double exact_n = exp(-x);
const double swift_exp_p = optimized_expf(x);
const double swift_exp_n = optimized_expf(-x);
check_value(exact_p, swift_exp_p, 1.618e-6, x);
check_value(exact_n, swift_exp_n, 1.618e-6, x);
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment