Commit 1cb97461 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'updated_vectorisation_tests' into 'master'

More physical definition of SPH kernel functions

This "solves" #97. 

 - I have split the SPH and gravity kernels into two separate files for ease of use. That explains a lot of the file changes below.
 - Corrected a small typo in the hydro.h files. The mistake was not leading to wrong physics results but would slow down convergence towards the correct h in the ghost task.
 - When switching kernel function, the resolution is not modified any more. This was the main point of #97.
 - The documentation of the kernels has increased and I have updated the .tex documents and figures to be completely consistent with the code.

In normal mode (i.e. without editing const.h) the results are identical. 

See merge request !138
parents 05ac290c bcb63b9c
......@@ -41,9 +41,13 @@ tests/input.hdf5
tests/testSingle
tests/testTimeIntegration
tests/testSPHStep
tests/testKernel
tests/testParser
theory/latex/swift.pdf
theory/kernel/kernels.pdf
theory/kernel/kernel_derivatives.pdf
theory/kernel/kernel_definitions.pdf
m4/libtool.m4
m4/ltoptions.m4
......
......@@ -759,7 +759,7 @@ WARN_LOGFILE =
# spaces.
# Note: If this tag is empty the current directory is searched.
INPUT = @top_srcdir@ @top_srcdir@/src @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default
INPUT = @top_srcdir@ @top_srcdir@/src @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default @top_srcdir@/riemann
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
......
......@@ -41,11 +41,11 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
units.c common_io.c single_io.c multipole.c version.c map.c \
kernel.c tools.c part.c partition.c clocks.c parser.c
kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \
nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
vector.h runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \
gravity.h gravity_io.h \
gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \
......
......@@ -42,7 +42,7 @@
/* Local includes. */
#include "const.h"
#include "error.h"
#include "kernel.h"
#include "kernel_hydro.h"
#include "version.h"
const char* particle_type_names[NUM_PARTICLE_TYPES] = {
......
......@@ -24,6 +24,7 @@
#include "../config.h"
/* Includes. */
#include "kernel_hydro.h"
#include "part.h"
#include "units.h"
......
......@@ -22,7 +22,7 @@
/* Includes. */
#include "const.h"
#include "kernel.h"
#include "kernel_gravity.h"
#include "vector.h"
/**
......
......@@ -91,13 +91,16 @@ __attribute__((always_inline))
const float ih2 = ih * ih;
const float ih4 = ih2 * ih2;
/* Final operation on the density. */
p->rho = ih * ih2 * (p->rho + p->mass * kernel_root);
p->rho_dh = (p->rho_dh - 3.0f * p->mass * kernel_root) * ih4;
p->density.wcount =
(p->density.wcount + kernel_root) * (4.0f / 3.0 * M_PI * kernel_gamma3);
p->density.wcount_dh =
p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3);
/* Final operation on the density (add self-contribution). */
p->rho += p->mass * kernel_root;
p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma;
p->density.wcount += kernel_root;
/* Finish the calculation by inserting the missing h-factors */
p->rho *= ih * ih2;
p->rho_dh *= ih4;
p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3);
p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4);
}
/**
......
......@@ -22,7 +22,7 @@
/* Includes. */
#include "const.h"
#include "kernel.h"
#include "kernel_hydro.h"
#include "part.h"
#include "vector.h"
......
......@@ -101,7 +101,7 @@ __attribute__((always_inline))
p->rho *= ih * ih2;
p->rho_dh *= ih4;
p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3);
p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3);
p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4);
const float irho = 1.f / p->rho;
......
......@@ -22,7 +22,7 @@
/* Includes. */
#include "const.h"
#include "kernel.h"
#include "kernel_hydro.h"
#include "part.h"
#include "vector.h"
......
......@@ -101,7 +101,12 @@ __attribute__((always_inline))
p->rho *= ih * ih2;
p->rho_dh *= ih4;
p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3);
p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3);
p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4);
const float irho = 1.f / p->rho;
/* Compute the derivative term */
p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh * irho);
}
/**
......
......@@ -21,7 +21,7 @@
/* Includes. */
#include "const.h"
#include "kernel.h"
#include "kernel_hydro.h"
#include "part.h"
#include "vector.h"
......
......@@ -21,32 +21,7 @@
#include <math.h>
#include <stdio.h>
#include "kernel.h"
/**
* @brief Test the SPH kernel function by dumping it in the interval [0,1].
*
* @param N number of intervals in [0,1].
*/
void SPH_kernel_dump(int N) {
int k;
float x, w, dw_dx;
float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
// float dw_dx4[4] __attribute__ ((aligned (16)));
for (k = 0; k <= N; k++) {
x = ((float)k) / N;
x4[3] = x4[2];
x4[2] = x4[1];
x4[1] = x4[0];
x4[0] = x;
kernel_deval(x, &w, &dw_dx);
// kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
printf(" %e %e %e %e %e %e %e\n", x, w, dw_dx, w4[0], w4[1], w4[2], w4[3]);
}
}
#include "kernel_gravity.h"
/**
* @brief The Gadget-2 gravity kernel function
......
......@@ -17,20 +17,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_KERNEL_H
#define SWIFT_KERNEL_H
#ifndef SWIFT_KERNEL_GRAVITY_H
#define SWIFT_KERNEL_GRAVITY_H
/* Includes. */
#include "const.h"
#include "inline.h"
#include "vector.h"
/**
* @file kernel.h
* @brief SPH kernel functions. Compute W(x,h) and the gradient of W(x,h),
* as well as the blending function used for gravity.
*/
/* Gravity kernel stuff
* -----------------------------------------------------------------------------------------------
*/
......@@ -210,408 +204,6 @@ __attribute__((always_inline))
#endif
/* --------------------------------------------------------------------------------------------------------------------
*/
#if defined(CUBIC_SPLINE_KERNEL)
/* --------------------------------------------------------------------------------------------------------------------
*/
/* Coefficients for the kernel. */
#define kernel_name "Cubic spline"
#define kernel_degree 3
#define kernel_ivals 2
#define kernel_gamma 2.0f
#define kernel_gamma2 4.0f
#define kernel_gamma3 8.0f
#define kernel_igamma 0.5f
#define kernel_nwneigh \
(4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
6.0858f)
static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__((aligned(16))) = {
3.0 / 4.0 * M_1_PI, -3.0 / 2.0 * M_1_PI, 0.0, M_1_PI,
-0.25 * M_1_PI, 3.0 / 2.0 * M_1_PI, -3.0 * M_1_PI, M_2_PI,
0.0, 0.0, 0.0, 0.0};
#define kernel_root (kernel_coeffs[kernel_degree])
#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
/**
* @brief Computes the cubic spline kernel and its derivative for a given
* distance x. Gives a sensible answer only if x<2.
*/
__attribute__((always_inline)) INLINE static void kernel_deval(float x,
float *W,
float *dW_dx) {
int ind = fminf(x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
float dw_dx = coeffs[0];
for (int k = 2; k <= kernel_degree; k++) {
dw_dx = dw_dx * x + w;
w = x * w + coeffs[k];
}
*W = w;
*dW_dx = dw_dx;
}
#ifdef VECTORIZE
/**
* @brief Computes the cubic spline kernel and its derivative for a given
* distance x (Vectorized version). Gives a sensible answer only if x<2.
*/
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1];
int j, k;
/* Load x and get the interval id. */
ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals)));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
for (j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */
w->v = (c[0].v * x->v) + c[1].v;
dw_dx->v = c[0].v;
/* And we're off! */
for (int k = 2; k <= kernel_degree; k++) {
dw_dx->v = (dw_dx->v * x->v) + w->v;
w->v = (x->v * w->v) + c[k].v;
}
}
#endif
/**
* @brief Computes the cubic spline kernel for a given distance x. Gives a
* sensible answer only if x<2.
*/
__attribute__((always_inline)) INLINE static void kernel_eval(float x,
float *W) {
int ind = fmin(x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
*W = w;
}
/* --------------------------------------------------------------------------------------------------------------------
*/
#elif defined(QUARTIC_SPLINE_KERNEL)
/* --------------------------------------------------------------------------------------------------------------------
*/
/* Coefficients for the kernel. */
#define kernel_name "Quartic spline"
#define kernel_degree 4
#define kernel_ivals 3
#define kernel_gamma 2.5f
#define kernel_gamma2 6.25f
#define kernel_gamma3 15.625f
#define kernel_igamma 0.4f
#define kernel_nwneigh \
(4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
8.2293f)
static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__((aligned(16))) = {
3.0 / 10.0 * M_1_PI, 0.0, -3.0 / 4.0 * M_1_PI,
0.0, 23.0 / 32.0 * M_1_PI, -1.0 / 5.0 * M_1_PI,
M_1_PI, -3.0 / 2.0 * M_1_PI, 0.25 * M_1_PI,
11.0 / 16.0 * M_1_PI, 1.0 / 20.0 * M_1_PI, -0.5 * M_1_PI,
15.0 / 8.0 * M_1_PI, -25.0 / 8.0 * M_1_PI, 125.0 / 64.0 * M_1_PI,
0.0, 0.0, 0.0,
0.0, 0.0};
#define kernel_root (kernel_coeffs[kernel_degree])
#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
/**
* @brief Computes the quartic spline kernel and its derivative for a given
* distance x. Gives a sensible answer only if x<2.5
*/
__attribute__((always_inline)) INLINE static void kernel_deval(float x,
float *W,
float *dW_dx) {
int ind = fminf(x + 0.5, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
float dw_dx = coeffs[0];
for (int k = 2; k <= kernel_degree; k++) {
dw_dx = dw_dx * x + w;
w = x * w + coeffs[k];
}
*W = w;
*dW_dx = dw_dx;
}
#ifdef VECTORIZE
/**
* @brief Computes the quartic spline kernel and its derivative for a given
* distance x (Vectorized version). Gives a sensible answer only if x<2.5
*/
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1];
int j, k;
/* Load x and get the interval id. */
ind.m = vec_ftoi(vec_fmin(x->v + 0.5f, vec_set1((float)kernel_ivals)));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
for (j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */
w->v = (c[0].v * x->v) + c[1].v;
dw_dx->v = c[0].v;
/* And we're off! */
for (int k = 2; k <= kernel_degree; k++) {
dw_dx->v = (dw_dx->v * x->v) + w->v;
w->v = (x->v * w->v) + c[k].v;
}
}
#endif
/**
* @brief Computes the quartic spline kernel for a given distance x. Gives a
* sensible answer only if x<2.5
*/
__attribute__((always_inline)) INLINE static void kernel_eval(float x,
float *W) {
int ind = fmin(x + 0.5f, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
*W = w;
}
/* --------------------------------------------------------------------------------------------------------------------
*/
#elif defined(QUINTIC_SPLINE_KERNEL)
/* --------------------------------------------------------------------------------------------------------------------
*/
/* Coefficients for the kernel. */
#define kernel_name "Quintic spline"
#define kernel_degree 5
#define kernel_ivals 3
#define kernel_gamma 3.f
#define kernel_gamma2 9.f
#define kernel_gamma3 27.f
#define kernel_igamma 1.0f / 3.0f
#define kernel_nwneigh \
(4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
10.5868f)
static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__((aligned(16))) = {
-1.0 / 12.0 * M_1_PI, 1.0 / 4.0 * M_1_PI, 0.0,
-1.0 / 2.0 * M_1_PI, 0.0, 11.0 / 20.0 * M_1_PI,
1.0 / 24.0 * M_1_PI, -3.0 / 8.0 * M_1_PI, 5.0 / 4.0 * M_1_PI,
-7.0 / 4.0 * M_1_PI, 5.0 / 8.0 * M_1_PI, 17.0 / 40.0 * M_1_PI,
-1.0 / 120.0 * M_1_PI, 1.0 / 8.0 * M_1_PI, -3.0 / 4.0 * M_1_PI,
9.0 / 4.0 * M_1_PI, -27.0 / 8.0 * M_1_PI, 81.0 / 40.0 * M_1_PI,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0};
#define kernel_root (kernel_coeffs[kernel_degree])
#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
/**
* @brief Computes the quintic spline kernel and its derivative for a given
* distance x. Gives a sensible answer only if x<3.
*/
__attribute__((always_inline)) INLINE static void kernel_deval(float x,
float *W,
float *dW_dx) {
int ind = fminf(x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
float dw_dx = coeffs[0];
for (int k = 2; k <= kernel_degree; k++) {
dw_dx = dw_dx * x + w;
w = x * w + coeffs[k];
}
*W = w;
*dW_dx = dw_dx;
}
#ifdef VECTORIZE
/**
* @brief Computes the quintic spline kernel and its derivative for a given
* distance x (Vectorized version). Gives a sensible answer only if x<3.
*/
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1];
int j, k;
/* Load x and get the interval id. */
ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals)));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
for (j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */
w->v = (c[0].v * x->v) + c[1].v;
dw_dx->v = c[0].v;
/* And we're off! */
for (int k = 2; k <= kernel_degree; k++) {
dw_dx->v = (dw_dx->v * x->v) + w->v;
w->v = (x->v * w->v) + c[k].v;
}
}
#endif
/**
* @brief Computes the quintic spline kernel for a given distance x. Gives a
* sensible answer only if x<3.
*/
__attribute__((always_inline)) INLINE static void kernel_eval(float x,
float *W) {
int ind = fmin(x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
*W = w;
}
/* --------------------------------------------------------------------------------------------------------------------
*/
#elif defined(WENDLAND_C2_KERNEL)
/* --------------------------------------------------------------------------------------------------------------------
*/
/* Coefficients for the kernel. */
#define kernel_name "Wendland C2"
#define kernel_degree 5
#define kernel_ivals 1
#define kernel_gamma 2.f
#define kernel_gamma2 4.f
#define kernel_gamma3 8.f
#define kernel_igamma 0.5f
#define kernel_nwneigh \
(4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
7.261825f)
static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__((aligned(16))) = {
0.05222272f, -0.39167037f, 1.04445431f, -1.04445431f, 0.f, 0.41778173f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
#define kernel_root (kernel_coeffs[kernel_degree])
#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
/**
* @brief Computes the quintic spline kernel and its derivative for a given
* distance x. Gives a sensible answer only if x<1.
*/
__attribute__((always_inline)) INLINE static void kernel_deval(float x,
float *W,
float *dW_dx) {
int ind = fminf(0.5f * x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
float dw_dx = coeffs[0];
for (int k = 2; k <= kernel_degree; k++) {
dw_dx = dw_dx * x + w;
w = x * w + coeffs[k];
}
*W = w;
*dW_dx = dw_dx;
}
#ifdef VECTORIZE
/**
* @brief Computes the Wendland C2 kernel and its derivative for a given
* distance x (Vectorized version). Gives a sensible answer only if x<1.
*/
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1];
int j, k;
/* Load x and get the interval id. */
ind.m = vec_ftoi(vec_fmin(0.5f * x->v, vec_set1((float)kernel_ivals)));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
for (j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */
w->v = (c[0].v * x->v) + c[1].v;
dw_dx->v = c[0].v;
/* And we're off! */
for (int k = 2; k <= kernel_degree; k++) {
dw_dx->v = (dw_dx->v * x->v) + w->v;
w->v = (x->v * w->v) + c[k].v;
}
}
#endif
/**
* @brief Computes the Wendland C2 kernel for a given distance x. Gives a
* sensible answer only if x<1.
*/
__attribute__((always_inline)) INLINE static void kernel_eval(float x,
float *W) {
int ind = fmin(0.5f * x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
*W = w;
}
/* --------------------------------------------------------------------------------------------------------------------
*/
#else
/* --------------------------------------------------------------------------------------------------------------------
*/
#error "A kernel function must be chosen in const.h !!"