Commit 2bdafa04 by Matthieu Schaller

Added a fully inline approximation to exp() for use at the heart of the gravity code

parent 076d7229
 ... ... @@ -114,6 +114,7 @@ tests/testReading tests/testSingle tests/testTimeIntegration tests/testSPHStep tests/testExp tests/testKernel tests/testKernelGrav tests/testKernelLongGrav ... ...
src/exp.h 0 → 100644
 /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ******************************************************************************/ #ifndef SWIFT_OPTIMIZED_EXP_H #define SWIFT_OPTIMIZED_EXP_H /* Config parameters. */ #include "../config.h" /* Local headers. */ #include "inline.h" /** * @brief Compute the exponential of a number. * * This function has a relative accuracy of 1.618e-6 over the input * range [-32., 32.]. * * @param x The number to take the exponential of. */ __attribute__((always_inline, const)) INLINE static float optimized_expf( const float x) { /* Let's first express e^x as 2^i * e^f with * f in the range [-ln(2)/2, ln(2)/2] */ const float i = rintf(x * ((float)M_LOG2E)); const float f = x - ((float)M_LN2) * i; /* We can now compute exp(f) using a polynomial * approximation valid over the range [-ln(2)/2, ln(2)/2]. * The coefficients come from the Cephes library and * have been obtained using a minmax algorithm */ float exp_f = 0.041944388f; exp_f = exp_f * f + 0.168006673f; exp_f = exp_f * f + 0.499999940f; exp_f = exp_f * f + 0.999956906f; exp_f = exp_f * f + 0.999999642f; union { int i; float f; } e; /* We can now construct the result by taking exp_f * as the mantissa of the answer and bit-shifting i * into the exponent part of the floating-point * number */ e.f = exp_f; e.i += ((int)i) << 23; return e.f; } #endif /* SWIFT_OPTIMIZED_EXP_H */
 ... ... @@ -24,6 +24,7 @@ /* Local headers. */ #include "const.h" #include "exp.h" #include "inline.h" /* Standard headers */ ... ... @@ -82,7 +83,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, const float u2 = u * u; const float u4 = u2 * u2; const float exp_u2 = expf(-u2); const float exp_u2 = optimized_expf(-u2); /* Compute erfcf(u) using eq. 7.1.25 of * Abramowitz & Stegun, 1972. ... ... @@ -195,7 +196,7 @@ __attribute__((nonnull)) INLINE static void kernel_long_grav_eval( const float u = 0.5f * r_over_r_s; const float u2 = u * u; const float exp_u2 = expf(-u2); const float exp_u2 = optimized_expf(-u2); /* Compute erfcf(u) using eq. 7.1.25 of * Abramowitz & Stegun, 1972. ... ...
 ... ... @@ -21,7 +21,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a \$(HDF5_LDFLAGS) \$(HDF5_LIBS) \$(FFTW_LIBS # List of programs and scripts to run in the test suite TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \ testActivePair.sh test27cells.sh test27cellsPerturbed.sh \ testActivePair.sh test27cells.sh test27cellsPerturbed.sh testExp \ testParser.sh test125cells.sh test125cellsPerturbed.sh testFFT \ testAdiabaticIndex testRandom testRandomSpacing \ testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \ ... ... @@ -35,7 +35,7 @@ TESTS = testGreetings testMaths testReading.sh testKernel testKernelLongGrav \ # List of test programs to compile check_PROGRAMS = testGreetings testReading testTimeIntegration testKernelLongGrav \ testActivePair test27cells test27cells_subset test125cells testParser \ testKernel testFFT testInteractions testMaths testRandom \ testKernel testFFT testInteractions testMaths testRandom testExp \ testSymmetry testThreadpool testRandomSpacing \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger \ ... ... @@ -124,6 +124,8 @@ testDump_SOURCES = testDump.c testLogger_SOURCES = testLogger.c testExp_SOURCES = testExp.c testGravityDerivatives_SOURCES = testGravityDerivatives.c testGravitySpeed_SOURCES = testGravitySpeed.c ... ...
tests/testExp.c 0 → 100644
 /******************************************************************************* * This file is part of SWIFT. * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ******************************************************************************/ #include "../config.h" #include "swift.h" /* Standard includes */ #include #include /** * @brief Check that a and b are consistent (up to some relative error) * * @param a First value * @param b Second value * @param s String used to identify this check in messages */ void check_value(double a, double b, const double tol, const double x) { if (fabs(a - b) / fabs(a + b) > tol) error( "Values are inconsistent: %12.15e %12.15e rel=%e (for x=%e).", a, b, fabs(a - b) / fabs(a + b), x); } int main(int argc, char* argv[]) { /* Initialize CPU frequency, this also starts time. */ unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); /* Choke on FPEs */ #ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); #endif /* Get some randomness going */ const int seed = time(NULL); message("Seed = %d", seed); srand(seed); /* Loop over some values */ for (float x = 0.; x < 32.; x += 0.000001) { const double exact_p = exp(x); const double exact_n = exp(-x); const double swift_exp_p = optimized_expf(x); const double swift_exp_n = optimized_expf(-x); check_value(exact_p, swift_exp_p, 1.618e-6, x); check_value(exact_n, swift_exp_n, 1.618e-6, x); } return 0; }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!