diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index 7bf2e01a719a29b731bb437096093b13ca086e37..6992f314e7824df3ba05a257d7eb0c4fdec08580 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -372,6 +372,82 @@ static const vector wendland_const_c4 = FILL_VEC(0.f); static const vector wendland_const_c5 = FILL_VEC(1.f); #endif +/** + * @brief Computes the kernel function and its derivative for two particles + * using interleaved vectors. + * + * Return 0 if $u > \\gamma = H/h$ + * + * @param u The ratio of the distance to the smoothing length $u = x/h$. + * @param w (return) The value of the kernel function $W(x,h)$. + * @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$. + * @param u2 The ratio of the distance to the smoothing length $u = x/h$ for + * second particle. + * @param w2 (return) The value of the kernel function $W(x,h)$ for second + * particle. + * @param dw_dx2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for + * second particle. + */ +__attribute__((always_inline)) INLINE static void kernel_deval_1_vec( + vector *u, vector *w, vector *dw_dx) { + + /* Go to the range [0,1[ from [0,H[ */ + vector x; + x.v = vec_mul(u->v, kernel_gamma_inv_vec.v); + +#ifdef WENDLAND_C2_KERNEL + /* Init the iteration for Horner's scheme. */ + w->v = vec_fma(wendland_const_c0.v, x.v, wendland_const_c1.v); + dw_dx->v = wendland_const_c0.v; + + /* Calculate the polynomial interleaving vector operations */ + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + w->v = vec_fma(x.v, w->v, wendland_const_c2.v); + + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + w->v = vec_fma(x.v, w->v, wendland_const_c3.v); + + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + w->v = vec_fma(x.v, w->v, wendland_const_c4.v); + + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + w->v = vec_fma(x.v, w->v, wendland_const_c5.v); + + /* Return everything */ + w->v = + vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v)); + dw_dx->v = vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v, + kernel_gamma_inv_dim_plus_one_vec.v)); +#else + + /* Load x and get the interval id. */ + vector ind; + ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v)); + + /* load the coefficients. */ + vector c[kernel_degree + 1]; + for (int k = 0; k < VEC_SIZE; k++) + for (int j = 0; j < kernel_degree + 1; j++) { + c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; + } + + /* Init the iteration for Horner's scheme. */ + w->v = (c[0].v * x.v) + c[1].v; + dw_dx->v = c[0].v; + + /* And we're off! */ + for (int k = 2; k <= kernel_degree; k++) { + dw_dx->v = (dw_dx->v * x.v) + w->v; + w->v = (x.v * w->v) + c[k].v; + } + /* Return everything */ + w->v = w->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v; + dw_dx->v = + dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; + +#endif +} + /** * @brief Computes the kernel function and its derivative for two particles * using interleaved vectors.