diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index a9e979151fd466ce4538895f52da24af1e48c81b..06f950b0e2f3aec68055ba7976b32dc00980589e 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -35,8 +35,8 @@ #define kernel_name "Cubic spline (M4)" #define kernel_degree 3 /* Degree of the polynomial */ #define kernel_ivals 2 /* Number of branches */ -#define kernel_gamma 1.825742 -#define kernel_constant 16. * M_1_PI +#define kernel_gamma ((float)(1.825742)) +#define kernel_constant ((float)(16. * M_1_PI)) static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((aligned(16))) = {3.f, -3.f, 0.f, 0.5f, /* 0 < u < 0.5 */ -1.f, 3.f, -3.f, 1.f, /* 0.5 < u < 1 */ @@ -49,8 +49,8 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_name "Quartic spline (M5)" #define kernel_degree 4 #define kernel_ivals 5 -#define kernel_gamma 2.018932 -#define kernel_constant 15625. * M_1_PI / 512. +#define kernel_gamma ((float)(2.018932)) +#define kernel_constant ((float)(15625. * M_1_PI / 512.)) static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((aligned(16))) = { 6.f, 0.f, -2.4f, 0.f, 0.368f, /* 0 < u < 0.2 */ @@ -67,8 +67,8 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_name "Quintic spline (M6)" #define kernel_degree 5 #define kernel_ivals 3 -#define kernel_gamma 2.195775 -#define kernel_constant 2187. * M_1_PI / 40. +#define kernel_gamma ((float)(2.195775)) +#define kernel_constant ((float)(2187. * M_1_PI / 40.)) static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((aligned(16))) = { -10.f, 10.f, 0.f, @@ -87,8 +87,8 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_name "Wendland C2" #define kernel_degree 5 #define kernel_ivals 1 -#define kernel_gamma 1.936492 -#define kernel_constant 21. * M_1_PI / 2. +#define kernel_gamma ((float)(1.936492)) +#define kernel_constant ((float)(21. * M_1_PI / 2.)) static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((aligned(16))) = { 4.f, -15.f, 20.f, -10.f, 0.f, 1.f, /* 0 < u < 1 */ @@ -101,8 +101,8 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_name "Wendland C4" #define kernel_degree 8 #define kernel_ivals 1 -#define kernel_gamma 2.207940 -#define kernel_constant 495. * M_1_PI / 32. +#define kernel_gamma ((float)(2.207940)) +#define kernel_constant ((float)(495. * M_1_PI / 32.)) static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((aligned(16))) = { 11.666667f, -64.f, 140.f, -149.333333f, 70.f, @@ -117,8 +117,8 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_name "Wendland C6" #define kernel_degree 11 #define kernel_ivals 1 -#define kernel_gamma 2.449490 -#define kernel_constant 1365. * M_1_PI / 64. +#define kernel_gamma ((float)(2.449490)) +#define kernel_constant ((float)(1365. * M_1_PI / 64.)) static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((aligned(16))) = { 32.f, -231.f, 704.f, -1155.f, 1056.f, -462.f, @@ -137,17 +137,22 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] /* Ok, now comes the real deal. */ /* First some powers of gamma = H/h */ -#define kernel_gamma2 kernel_gamma *kernel_gamma -#define kernel_gamma3 kernel_gamma2 *kernel_gamma -#define kernel_gamma4 kernel_gamma3 *kernel_gamma -#define kernel_igamma 1. / kernel_gamma -#define kernel_igamma2 kernel_igamma *kernel_igamma -#define kernel_igamma3 kernel_igamma2 *kernel_igamma -#define kernel_igamma4 kernel_igamma3 *kernel_igamma +#define kernel_gamma2 ((float)(kernel_gamma *kernel_gamma)) +#define kernel_gamma3 ((float)(kernel_gamma *kernel_gamma *kernel_gamma)) +#define kernel_gamma4 \ + ((float)(kernel_gamma *kernel_gamma *kernel_gamma *kernel_gamma)) +#define kernel_igamma ((float)(1. / kernel_gamma)) +#define kernel_igamma2 ((float)(kernel_igamma *kernel_igamma)) +#define kernel_igamma3 ((float)(kernel_igamma *kernel_igamma *kernel_igamma)) +#define kernel_igamma4 \ + ((float)(kernel_igamma *kernel_igamma *kernel_igamma *kernel_igamma)) + +/* The number of branches */ +#define kernel_ivals_f ((float)(kernel_ivals)) /* Kernel self contribution (i.e. W(0,h)) */ #define kernel_root \ - (kernel_coeffs[kernel_degree]) * kernel_constant *kernel_igamma3 + ((float)(kernel_coeffs[kernel_degree]) * kernel_constant *kernel_igamma3) /** * @brief Computes the kernel function and its derivative. @@ -159,14 +164,20 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] * @param dW_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$. */ __attribute__((always_inline)) INLINE static void kernel_deval( - float u, float *const W, float *const dW_dx) { + float u, float *restrict W, float *restrict dW_dx) { /* Go to the range [0,1[ from [0,H[ */ - const float x = u * (float)kernel_igamma; + const float x = u * kernel_igamma; +#if kernel_ivals == 1 + /* Only one branch in this case */ + const float *const coeffs = &kernel_coeffs[0]; +#else /* Pick the correct branch of the kernel */ - const int ind = (int)fminf(x * (float)kernel_ivals, kernel_ivals); + const int temp = (int)(x * kernel_ivals_f); + const int ind = temp > kernel_ivals ? kernel_ivals : temp; const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; +#endif /* First two terms of the polynomial ... */ float w = coeffs[0] * x + coeffs[1]; @@ -179,8 +190,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval( } /* Return everything */ - *W = w * (float)kernel_constant * (float)kernel_igamma3; - *dW_dx = dw_dx * (float)kernel_constant * (float)kernel_igamma4; + *W = w * kernel_constant * kernel_igamma3; + *dW_dx = dw_dx * kernel_constant * kernel_igamma4; } /** @@ -189,14 +200,20 @@ __attribute__((always_inline)) INLINE static void kernel_deval( * @param u The ratio of the distance to the smoothing length $u = x/h$. * @param W (return) The value of the kernel function $W(x,h)$. */ -__attribute__((always_inline)) INLINE static void kernel_eval(float u, - float *const W) { +__attribute__((always_inline)) + INLINE static void kernel_eval(float u, float *restrict W) { /* Go to the range [0,1[ from [0,H[ */ - const float x = u * (float)kernel_igamma; + const float x = u * kernel_igamma; +#if kernel_ivals == 1 + /* Only one branch in this case */ + const float *const coeffs = &kernel_coeffs[0]; +#else /* Pick the correct branch of the kernel */ - const int ind = (int)fminf(x * (float)kernel_ivals, kernel_ivals); + const int temp = (int)(x * kernel_ivals_f); + const int ind = temp > kernel_ivals ? kernel_ivals : temp; const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; +#endif /* First two terms of the polynomial ... */ float w = coeffs[0] * x + coeffs[1]; @@ -205,7 +222,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u, for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; /* Return everything */ - *W = w * (float)kernel_constant * (float)kernel_igamma3; + *W = w * kernel_constant * kernel_igamma3; } #ifdef VECTORIZE diff --git a/src/parser.c b/src/parser.c index 7329d15e872ec766556e493063bb4f410e99dcdf..49b035db05722d13d1ce48129b8fae8bfc1ad86b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -70,6 +70,7 @@ void parser_read_file(const char *file_name, struct swift_params *params) { /* Initialise parameter count. */ params->paramCount = 0; params->sectionCount = 0; + strcpy(params->fileName, file_name); /* Check if parameter file exits. */ if (file == NULL) { @@ -375,7 +376,7 @@ int parser_get_param_int(const struct swift_params *params, const char *name) { } } - error("Cannot find '%s' in the structure.", name); + error("Cannot find '%s' in the structure, in file '%s'.", name,params->fileName); return 0; } @@ -407,7 +408,7 @@ char parser_get_param_char(const struct swift_params *params, } } - error("Cannot find '%s' in the structure.", name); + error("Cannot find '%s' in the structure, in file '%s'.", name,params->fileName); return 0; } @@ -439,7 +440,7 @@ float parser_get_param_float(const struct swift_params *params, } } - error("Cannot find '%s' in the structure.", name); + error("Cannot find '%s' in the structure, in file '%s'.", name,params->fileName); return 0.f; } @@ -470,7 +471,7 @@ double parser_get_param_double(const struct swift_params *params, } } - error("Cannot find '%s' in the structure.", name); + error("Cannot find '%s' in the structure, in file '%s'.", name,params->fileName); return 0.; } diff --git a/src/parser.h b/src/parser.h index cdbdc477ae8c90a504919618c84d03622099df01..2d50ddc49bf2231235bbb73040b067f77462d33e 100644 --- a/src/parser.h +++ b/src/parser.h @@ -42,6 +42,7 @@ struct swift_params { struct parameter data[PARSER_MAX_NO_OF_PARAMS]; int sectionCount; int paramCount; + char fileName[PARSER_MAX_LINE_SIZE]; }; /* Public API. */ diff --git a/tests/testKernel.c b/tests/testKernel.c index 9ba50ee049201cede5166528fa67153838a707da..27d45375b91e4143036c32b71a217e80ad630222 100644 --- a/tests/testKernel.c +++ b/tests/testKernel.c @@ -20,28 +20,35 @@ #define NO__SSE2__ #include "vector.h" - -#include "swift.h" #include "kernel_hydro.h" -#define numPoints 64 +#include <stdlib.h> +#include <strings.h> + +#define numPoints (1 << 4) int main() { - const float h = 1.2348; + const float h = 1.2348f; + + float u[numPoints] = {0.f}; float W[numPoints] = {0.f}; float dW[numPoints] = {0.f}; printf("\nSerial Output\n"); printf("-------------\n"); + const float numPoints_inv = 1. / numPoints; + + for (int i = 0; i < numPoints; ++i) { + u[i] = i * 2.5f * numPoints_inv / h; + } for (int i = 0; i < numPoints; ++i) { - const float x = i * 2.5f / numPoints; - kernel_deval(x / h, &W[i], &dW[i]); + kernel_deval(u[i], &W[i], &dW[i]); printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i, h, - h * kernel_gamma, x, W[i], dW[i]); + h * kernel_gamma, u[i] * h, W[i], dW[i]); } printf("\nVector Output for VEC_SIZE=%d\n", VEC_SIZE);