Skip to content
Snippets Groups Projects
Commit a9d58e80 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Apply the same improvement to the functions called in the direct interaction

parent 898bafbb
Branches
Tags
1 merge request!1077Improved multipole acceptance criterion (MAC)
...@@ -21,31 +21,6 @@ ...@@ -21,31 +21,6 @@
#include "inline.h" #include "inline.h"
/**
* @brief Approximate version of the complementay error function erfcf(x).
*
* This is based on eq. 7.1.27 of Abramowitz & Stegun, 1972.
* The absolute error is < 4.7*10^-4 over the range 0 < x < infinity.
*
* Returns garbage for x < 0.
* @param x The number to compute erfc for.
*/
__attribute__((always_inline, const)) INLINE static float approx_erfcf(
float x) {
/* 1 + 0.278393*x + 0.230389*x^2 + 0.000972*x^3 + 0.078108*x^4 */
float arg = 0.078108f;
arg = x * arg + 0.000972f;
arg = x * arg + 0.230389f;
arg = x * arg + 0.278393f;
arg = x * arg + 1.f;
/* 1 / arg^4 */
const float arg2 = arg * arg;
const float arg4 = arg2 * arg2;
return 1.f / arg4;
}
/** /**
* @brief Approximate version of expf(x) using a 4th order Taylor expansion * @brief Approximate version of expf(x) using a 4th order Taylor expansion
* *
......
...@@ -117,8 +117,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv, ...@@ -117,8 +117,8 @@ runner_iact_grav_pp_truncated(const float r2, const float h2, const float h_inv,
/* Get long-range correction */ /* Get long-range correction */
const float u_lr = r * r_s_inv; const float u_lr = r * r_s_inv;
const float corr_f_lr = kernel_long_grav_force_eval(u_lr); float corr_f_lr, corr_pot_lr;
const float corr_pot_lr = kernel_long_grav_pot_eval(u_lr); kernel_long_grav_eval(u_lr, &corr_f_lr, &corr_pot_lr);
*f_ij *= corr_f_lr; *f_ij *= corr_f_lr;
*pot_ij *= corr_pot_lr; *pot_ij *= corr_pot_lr;
} }
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#include "../config.h" #include "../config.h"
/* Local headers. */ /* Local headers. */
#include "approx_math.h"
#include "const.h" #include "const.h"
#include "inline.h" #include "inline.h"
...@@ -83,7 +82,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, ...@@ -83,7 +82,7 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
const float u2 = u * u; const float u2 = u * u;
const float u4 = u2 * u2; const float u4 = u2 * u2;
const float e = expf(-u2); const float exp_u2 = expf(-u2);
/* Compute erfcf(u) using eq. 7.1.25 of /* Compute erfcf(u) using eq. 7.1.25 of
* Abramowitz & Stegun, 1972. * Abramowitz & Stegun, 1972.
...@@ -99,11 +98,11 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, ...@@ -99,11 +98,11 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
a = a * t + 0.3480242f; a = a * t + 0.3480242f;
a = a * t; a = a * t;
const float erfc_u = a * e; const float erfc_u = a * exp_u2;
/* C = (1/sqrt(pi)) * expf(-u^2) */ /* C = (1/sqrt(pi)) * expf(-u^2) */
const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5)); const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5));
const float common_factor = one_over_sqrt_pi * e; const float common_factor = one_over_sqrt_pi * exp_u2;
/* (1/r_s)^n * C */ /* (1/r_s)^n * C */
const float r_s_inv_times_C = r_s_inv * common_factor; const float r_s_inv_times_C = r_s_inv * common_factor;
...@@ -177,74 +176,64 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv, ...@@ -177,74 +176,64 @@ kernel_long_grav_derivatives(const float r, const float r_s_inv,
} }
/** /**
* @brief Computes the long-range correction term for the potential calculation * @brief Computes the long-range correction terms for the potential and
* coming from FFT. * force calculations due to the mesh truncation.
*
* We use an approximation to the erfc() that gives a *relative* accuracy
* for the potential tem of 3.4e-3 and 2.4e-4 for the force term over the
* range [0, 5] of r_over_r_s.
* The accuracy is much better in the range [0, 2] (6e-5 and 2e-5 respectively).
* *
* @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. * @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$.
*/ */
__attribute__((const)) INLINE static float kernel_long_grav_pot_eval( __attribute__((nonnull)) INLINE static void kernel_long_grav_eval(
const float u) { const float r_over_r_s, float *restrict corr_f, float *restrict corr_pot) {
#ifdef GADGET2_LONG_RANGE_CORRECTION #ifdef GADGET2_LONG_RANGE_CORRECTION
const float arg1 = u * 0.5f; const float two_over_sqrt_pi = ((float)M_2_SQRTPI);
#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH
return erfcf(arg1);
#else
return approx_erfcf(arg1);
#endif
#else
const float x = 2.f * u; const float u = 0.5f * r_over_r_s;
const float exp_x = expf(x); // good_approx_expf(x); const float u2 = u * u;
const float alpha = 1.f / (1.f + exp_x); const float exp_u2 = expf(-u2);
/* We want 2 - 2 exp(x) * alpha */
float W = 1.f - alpha * exp_x;
W = W * 2.f;
return W;
#endif
}
/** /* Compute erfcf(u) using eq. 7.1.25 of
* @brief Computes the long-range correction term for the force calculation * Abramowitz & Stegun, 1972.
* coming from FFT. *
* * This has a *relative* error of less than 4e-3 over
* @param u The ratio of the distance to the FFT cell scale \f$u = r/r_s\f$. * the range of interest (0 < u < 5) */
*/
__attribute__((const)) INLINE static float kernel_long_grav_force_eval(
const float u) {
#ifdef GADGET2_LONG_RANGE_CORRECTION const float t = 1.f / (1.f + 0.47047f * u);
static const float one_over_sqrt_pi = ((float)(M_2_SQRTPI * 0.5)); /* 0.3480242 * t - 0.0958798 * t^2 + 0.7478556 * t^3 */
float a = 0.7478556f;
a = a * t - 0.0958798f;
a = a * t + 0.3480242f;
a = a * t;
const float arg1 = u * 0.5f; const float erfc_u = a * exp_u2;
const float arg2 = -arg1 * arg1;
#ifdef GRAVITY_USE_EXACT_LONG_RANGE_MATH *corr_pot = erfc_u;
const float term1 = erfcf(arg1); *corr_f = erfc_u + two_over_sqrt_pi * u * exp_u2;
#else
const float term1 = approx_erfcf(arg1);
#endif
const float term2 = u * one_over_sqrt_pi * expf(arg2);
return term1 + term2;
#else #else
const float x = 2.f * r_over_r_s;
const float x = 2.f * u;
const float exp_x = expf(x); // good_approx_expf(x); const float exp_x = expf(x); // good_approx_expf(x);
const float alpha = 1.f / (1.f + exp_x); const float alpha = 1.f / (1.f + exp_x);
/* We want 2 - 2 exp(x) * alpha */
float W = 1.f - alpha * exp_x;
W = W * 2.f;
*corr_pot = W;
/* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */ /* We want 2*(x*alpha - x*alpha^2 - exp(x)*alpha + 1) */
float W = 1.f - alpha; W = 1.f - alpha;
W = W * x - exp_x; W = W * x - exp_x;
W = W * alpha + 1.f; W = W * alpha + 1.f;
W = W * 2.f; W = W * 2.f;
return W; *corr_f = W;
#endif #endif
} }
......
...@@ -100,6 +100,17 @@ int main(int argc, char* argv[]) { ...@@ -100,6 +100,17 @@ int main(int argc, char* argv[]) {
check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s); check_value(chi_swift.chi_3, chi_3, "chi_3", 1e-4, r, r_s);
check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s); check_value(chi_swift.chi_4, chi_4, "chi_4", 4e-4, r, r_s);
check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s); check_value(chi_swift.chi_5, chi_5, "chi_5", 4e-4, r, r_s);
/* Compute the expression for individual particles */
float swift_corr_f_lr, swift_corr_pot_lr;
kernel_long_grav_eval(r / r_s, &swift_corr_f_lr, &swift_corr_pot_lr);
/* And the exact ones */
const double corr_pot = erfc(u);
const double corr_f = erfc(u) + M_2_SQRTPI * u * exp(-u * u);
check_value(swift_corr_pot_lr, corr_pot, "corr_pot", 3.4e-3, r, r_s);
check_value(swift_corr_f_lr, corr_f, "corr_f", 2.4e-4, r, r_s);
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment