Commit 12070bbc authored by James Willis's avatar James Willis
Browse files

Added intrinsic version of kernel_deval.

parent e6d5931b
...@@ -212,6 +212,50 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u, ...@@ -212,6 +212,50 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u,
*W = w * (float)kernel_constant * (float)kernel_igamma3; *W = w * (float)kernel_constant * (float)kernel_igamma3;
} }
#define VECTORIZE
#ifdef VECTORIZE
/**
* @brief Computes the kernel function and its derivative (Vectorised version).
*
* Return 0 if $u > \\gamma = H/h$
*
* @param u The ratio of the distance to the smoothing length $u = x/h$.
* @param W (return) The value of the kernel function $W(x,h)$.
* @param dW_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
*/
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1], x;
int j, k;
/* Go to the range [0,1[ from [0,H[ */
x.v = u->v * vec_set1((float)kernel_igamma);
/* Load x and get the interval id. */
ind.m = vec_ftoi(vec_fmin(x.v * vec_set1((float)kernel_ivals), vec_set1((float)kernel_ivals)));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
for (j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */
w->v = (c[0].v * x.v) + c[1].v;
dw_dx->v = c[0].v;
/* And we're off! */
for (int k = 2; k <= kernel_degree; k++) {
dw_dx->v = (dw_dx->v * x.v) + w->v;
w->v = (x.v * w->v) + c[k].v;
}
}
#endif
/* Some cross-check functions */ /* Some cross-check functions */
void hydro_kernel_dump(int N); void hydro_kernel_dump(int N);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment