Skip to content
Snippets Groups Projects
Commit 660bef5f authored by James Willis's avatar James Willis
Browse files

Added macro FILL_VEC to setup constants as vectors, depending on the vector size.

parent bd3e3f2e
No related branches found
No related tags found
1 merge request!150Vectorise kernel
......@@ -214,6 +214,39 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u,
#ifdef VECTORIZE
#ifdef __MIC__
#define FILL_VEC(constant) .f[0] = constant, \
.f[1] = constant, \
.f[2] = constant, \
.f[3] = constant, \
.f[4] = constant, \
.f[5] = constant, \
.f[6] = constant, \
.f[7] = constant, \
.f[8] = constant, \
.f[9] = constant, \
.f[10] = constant, \
.f[11] = constant, \
.f[12] = constant, \
.f[13] = constant, \
.f[14] = constant, \
.f[15] = constant
#elif defined(__AVX__) || defined (__AVX2__)
#define FILL_VEC(constant) .f[0] = constant, \
.f[1] = constant, \
.f[2] = constant, \
.f[3] = constant, \
.f[4] = constant, \
.f[5] = constant, \
.f[6] = constant, \
.f[7] = constant
#elif defined(__SSE2__)
#define FILL_VEC(constant) .f[0] = constant, \
.f[1] = constant, \
.f[2] = constant, \
.f[3] = constant
#endif
/**
* @brief Computes the kernel function and its derivative (Vectorised version).
*
......@@ -224,50 +257,16 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u,
* @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
*/
static const vector kernel_igamma_vec = {.f[0] = (float)kernel_igamma,
.f[1] = (float)kernel_igamma,
.f[2] = (float)kernel_igamma,
.f[3] = (float)kernel_igamma,
.f[4] = (float)kernel_igamma,
.f[5] = (float)kernel_igamma,
.f[6] = (float)kernel_igamma,
.f[7] = (float)kernel_igamma};
static const vector kernel_ivals_vec = {.f[0] = (float)kernel_ivals,
.f[1] = (float)kernel_ivals,
.f[2] = (float)kernel_ivals,
.f[3] = (float)kernel_ivals,
.f[4] = (float)kernel_ivals,
.f[5] = (float)kernel_ivals,
.f[6] = (float)kernel_ivals,
.f[7] = (float)kernel_ivals};
static const vector kernel_constant_vec = {.f[0] = (float)kernel_constant,
.f[1] = (float)kernel_constant,
.f[2] = (float)kernel_constant,
.f[3] = (float)kernel_constant,
.f[4] = (float)kernel_constant,
.f[5] = (float)kernel_constant,
.f[6] = (float)kernel_constant,
.f[7] = (float)kernel_constant};
static const vector kernel_igamma3_vec = {.f[0] = (float)kernel_igamma3,
.f[1] = (float)kernel_igamma3,
.f[2] = (float)kernel_igamma3,
.f[3] = (float)kernel_igamma3,
.f[4] = (float)kernel_igamma3,
.f[5] = (float)kernel_igamma3,
.f[6] = (float)kernel_igamma3,
.f[7] = (float)kernel_igamma3};
static const vector kernel_igamma4_vec = {.f[0] = (float)kernel_igamma4,
.f[1] = (float)kernel_igamma4,
.f[2] = (float)kernel_igamma4,
.f[3] = (float)kernel_igamma4,
.f[4] = (float)kernel_igamma4,
.f[5] = (float)kernel_igamma4,
.f[6] = (float)kernel_igamma4,
.f[7] = (float)kernel_igamma4};
static const vector kernel_igamma_vec = {FILL_VEC((float)kernel_igamma)};
static const vector kernel_ivals_vec = {FILL_VEC((float)kernel_ivals)};
static const vector kernel_constant_vec = {FILL_VEC((float)kernel_constant)};
static const vector kernel_igamma3_vec = {FILL_VEC((float)kernel_igamma3)};
static const vector kernel_igamma4_vec = {FILL_VEC((float)kernel_igamma4)};
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment