Upated the kernel functions for gravity. Added a unit test to test it against the Gadget-2 kernel

9e28ec1e · Matthieu Schaller · 4f4227c4 · 9e28ec1e · 9e28ec1e · 9e28ec1e
Commit 9e28ec1e authored 9 years ago by Matthieu Schaller
--- a/.gitignore
+++ b/.gitignore
@@ -48,6 +48,7 @@ tests/testSingle
 tests/testTimeIntegration
 tests/testSPHStep
 tests/testKernel
+tests/testKernelGrav
 tests/testParser
 tests/parser_output.yml

--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -42,7 +42,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
    serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
    units.c common_io.c single_io.c multipole.c version.c map.c \
-    kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c \
+    kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
    physical_constants.c potentials.c hydro_properties.c
 # Include files for distribution, not installation.

--- a/src/kernel_gravity.h
+++ b/src/kernel_gravity.h
@@ -20,18 +20,13 @@
 #ifndef SWIFT_KERNEL_GRAVITY_H
 #define SWIFT_KERNEL_GRAVITY_H
+#include <math.h>
 /* Includes. */
 #include "const.h"
 #include "inline.h"
 #include "vector.h"
-/* #define const_iepsilon (1. / const_epsilon) */
-/* #define const_iepsilon2 (const_iepsilon *const_iepsilon) */
-/* #define const_iepsilon3 (const_iepsilon2 *const_iepsilon) */
-/* #define const_iepsilon4 (const_iepsilon2 *const_iepsilon2) */
-/* #define const_iepsilon5 (const_iepsilon3 *const_iepsilon2) */
-/* #define const_iepsilon6 (const_iepsilon3 *const_iepsilon3) */
 /* The gravity kernel is defined as a degree 6 polynomial in the distance
   r. The resulting value should be post-multiplied with r^-3, resulting
   in a polynomial with terms ranging from r^-3 to r^3, which are
@@ -44,16 +39,27 @@
 #define kernel_grav_ivals 2  /* Number of branches */
 static const float
    kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)]
-    __attribute__((aligned(16))) = {
+    __attribute__((aligned(16))) = {32.f,
-        32.f,          -38.4f, 0.f,         10.66666667f,
+                                    -38.4f,
-        0.f,           0.f,    0.f, /* 0 < u < 0.5 */
+                                    0.f,
-        -10.66666667f, 38.4f,  -48.f,       21.3333333,
+                                    10.66666667f,
-        0.f,           0.f,    0.66666667f, /* 0.5 < u < 1 */
+                                    0.f,
-        0.f,           0.f,    0.f,         0.f,
+                                    0.f,
-        0.f,           0.f,    0.f}; /* 1 < u */
+                                    0.f, /* 0 < u < 0.5 */
+                                    -10.66666667f,
-#define kernel_grav_igamma 1.
+                                    38.4f,
-#define kernel_grav_igamma3 1.
+                                    -48.f,
+                                    21.3333333,
+                                    0.f,
+                                    0.f,
+                                    -0.066666667f, /* 0.5 < u < 1 */
+                                    0.f,
+                                    0.f,
+                                    0.f,
+                                    0.f,
+                                    0.f,
+                                    0.f,
+                                    0.f}; /* 1 < u */
 /**
 * @brief Computes the kernel function.
@@ -63,197 +69,20 @@ static const float
 */
 __attribute__((always_inline)) INLINE static void kernel_grav_eval(
    float u, float *const W) {
-  /* Go to the range [0,1[ from [0,H[ */
-  const float x = u * (float)kernel_grav_igamma;
  /* Pick the correct branch of the kernel */
-  const int ind = (int)fminf(x * (float)kernel_grav_ivals, kernel_grav_ivals);
+  const int ind = (int)fminf(u * (float)kernel_grav_ivals, kernel_grav_ivals);
  const float *const coeffs =
      &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)];
  /* First two terms of the polynomial ... */
-  float w = coeffs[0] * x + coeffs[1];
+  float w = coeffs[0] * u + coeffs[1];
  /* ... and the rest of them */
-  for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k];
+  for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k];
  /* Return everything */
-  *W = w * (float)kernel_grav_igamma3;
+  *W = w / (u * u * u);
-}
-#if 0
-/* Coefficients for the gravity kernel. */
-#define kernel_grav_degree 6
-#define kernel_grav_ivals 2
-#define kernel_grav_scale (2 * const_iepsilon)
-static float kernel_grav_coeffs
-    [(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = {
-        32.0f * const_iepsilon6,         -192.0f / 5.0f * const_iepsilon5,
-        0.0f,                            32.0f / 3.0f * const_iepsilon3,
-        0.0f,                            0.0f,
-        0.0f,                            -32.0f / 3.0f * const_iepsilon6,
-        192.0f / 5.0f * const_iepsilon5, -48.0f * const_iepsilon4,
-        64.0f / 3.0f * const_iepsilon3,  0.0f,
-        0.0f,                            -1.0f / 15.0f,
-        0.0f,                            0.0f,
-        0.0f,                            0.0f,
-        0.0f,                            0.0f,
-        1.0f};
-/**
- * @brief Computes the gravity cubic spline for a given distance x.
- */
-__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x,
-                                                                   float *W) {
-  int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals);
-  float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-#ifdef VECTORIZE
-/**
- * @brief Computes the gravity cubic spline for a given distance x (Vectorized
- * version).
- */
-__attribute__((always_inline))
-    INLINE static void kernel_grav_eval_vec(vector *x, vector *w) {
-  vector ind, c[kernel_grav_degree + 1];
-  int j, k;
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale),
-                            vec_set1((float)kernel_grav_ivals)));
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < kernel_grav_degree + 1; j++)
-      c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j];
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  /* And we're off! */
-  for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v;
 }
-#endif
-/* Blending function stuff
- * --------------------------------------------------------------------------------------------
- */
-/* Coefficients for the blending function. */
-#define blender_degree 3
-#define blender_ivals 3
-#define blender_scale 4.0f
-static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = {
-    0.0f,   0.0f,  0.0f,   1.0f,  -32.0f, 24.0f, -6.0f, 1.5f,
-    -32.0f, 72.0f, -54.0f, 13.5f, 0.0f,   0.0f,  0.0f,  0.0f};
-/**
- * @brief Computes the cubic spline blender for a given distance x.
- */
-__attribute__((always_inline)) INLINE static void blender_eval(float x,
-                                                               float *W) {
-  int ind = fmin(x * blender_scale, blender_ivals);
-  float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x.
- */
-__attribute__((always_inline)) INLINE static void blender_deval(float x,
-                                                                float *W,
-                                                                float *dW_dx) {
-  int ind = fminf(x * blender_scale, blender_ivals);
-  float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  float dw_dx = coeffs[0];
-  for (int k = 2; k <= blender_degree; k++) {
-    dw_dx = dw_dx * x + w;
-    w = x * w + coeffs[k];
-  }
-  *W = w;
-  *dW_dx = dw_dx;
-}
-#ifdef VECTORIZE
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
- */
-__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x,
-                                                                   vector *w) {
-  vector ind, c[blender_degree + 1];
-  int j, k;
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(
-      vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < blender_degree + 1; j++)
-      c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  /* And we're off! */
-  for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v;
-}
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
- */
-__attribute__((always_inline))
-    INLINE static void blender_deval_vec(vector *x, vector *w, vector *dw_dx) {
-  vector ind, c[blender_degree + 1];
-  int j, k;
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(
-      vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < blender_degree + 1; j++)
-      c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  dw_dx->v = c[0].v;
-  /* And we're off! */
-  for (int k = 2; k <= blender_degree; k++) {
-    dw_dx->v = (dw_dx->v * x->v) + w->v;
-    w->v = (x->v * w->v) + c[k].v;
-  }
-}
-#endif
-#endif
-void gravity_kernel_dump(float r_max, int N);
 #endif  // SWIFT_KERNEL_GRAVITY_H
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -22,11 +22,11 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS)
 # List of programs and scripts to run in the test suite
 TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \
-	test27cells.sh test27cellsPerturbed.sh testParser.sh testKernel
+	test27cells.sh test27cellsPerturbed.sh testParser.sh testKernel testKernelGrav
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
-		 testSPHStep testPair test27cells testParser testKernel
+		 testSPHStep testPair test27cells testParser testKernel testKernelGrav
 # Sources for the individual programs
 testGreetings_SOURCES = testGreetings.c
@@ -47,6 +47,8 @@ testParser_SOURCES = testParser.c
 testKernel_SOURCES = testKernel.c
+testKernelGrav_SOURCES = testKernelGrav.c
 # Files necessary for distribution
 EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
 	     test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \

--- a/src/kernel_gravity.c
+++ b/src/kernel_gravity.c
 /*******************************************************************************
 * This file is part of SWIFT.
- * Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    James Willis (james.s.willis@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
@@ -17,61 +17,69 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
+#include "kernel_gravity.h"
 #include <math.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
-#include "kernel_gravity.h"
+#define numPoints (1 << 6)
-#define const_epsilon 1.
-#define const_iepsilon3 1.
 /**
 * @brief The Gadget-2 gravity kernel function
 *
 * @param r The distance between particles
 */
-float gadget(float r) {
+float gadget(float r, float h) {
-  float fac, h_inv, u, r2 = r * r;
+  float fac;
-  if (r >= const_epsilon)
+  const float r2 = r * r;
+  if (r >= h)
    fac = 1.0f / (r2 * r);
  else {
-    h_inv = 1. / const_epsilon;
+    const float h_inv = 1. / h;
-    u = r * h_inv;
+    const float h_inv3 = h_inv * h_inv * h_inv;
+    const float u = r * h_inv;
    if (u < 0.5)
-      fac = const_iepsilon3 * (10.666666666667 + u * u * (32.0 * u - 38.4));
+      fac = h_inv3 * (10.666666666667 + u * u * (32.0 * u - 38.4));
    else
-      fac = const_iepsilon3 *
+      fac =
-            (21.333333333333 - 48.0 * u + 38.4 * u * u -
+          h_inv3 * (21.333333333333 - 48.0 * u + 38.4 * u * u -
-             10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
+                    10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
  }
  return fac;
 }
-/**
+int main() {
- * @brief Test the gravity kernel function by dumping it in the interval [0,1].
- *
+  const float h = 3.f;
- * @param r_max The radius up to which the kernel is dumped.
+  const float r_max = 5.f;
- * @param N number of intervals in [0,1].
- */
-void gravity_kernel_dump(float r_max, int N) {
-  int k;
+  for (int k = 1; k < numPoints; ++k) {
-  float x, w;
-  float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-  float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-  // float dw_dx4[4] __attribute__ ((aligned (16)));
-  for (k = 1; k <= N; k++) {
+    const float r = (r_max * k) / numPoints;
-    x = (r_max * k) / N;
-    x4[3] = x4[2];
+    const float u = r / h;
-    x4[2] = x4[1];
-    x4[1] = x4[0];
+    const float gadget_w = gadget(r, h);
-    x4[0] = x;
-    kernel_grav_eval(x, &w);
+    float swift_w;
-    w *= 1.f / (x * x * x);
+    if (u < 1.) {
-    // blender_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
+      kernel_grav_eval(u, &swift_w);
-    printf(" %.16e %.16e %.16e %.16e %.16e %.16e %.16e\n", x, w * x, w4[0],
+      swift_w *= (1 / (h * h * h));
-           w4[1], w4[2], w4[3], gadget(x) * x);
+    } else {
+      swift_w = 1 / (r * r * r);
+    }
+    printf("%2d: r= %f h= %f u= %f Wg(r,h)= %f Ws(r,h)= %f\n", k, r, h, u,
+           gadget_w, swift_w);
+    if (fabsf(gadget_w - swift_w) > 2e-7) {
+      printf("Invalid value ! Gadget= %e, SWIFT= %e\n", gadget_w, swift_w);
+      return 1;
+    }
  }
+  printf("\nAll values are consistent\n");
+  return 0;
 }