Commit 660bef5f by James Willis

### Added macro FILL_VEC to setup constants as vectors, depending on the vector size.

parent bd3e3f2e
 ... ... @@ -214,6 +214,39 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u, #ifdef VECTORIZE #ifdef __MIC__ #define FILL_VEC(constant) .f[0] = constant, \ .f[1] = constant, \ .f[2] = constant, \ .f[3] = constant, \ .f[4] = constant, \ .f[5] = constant, \ .f[6] = constant, \ .f[7] = constant, \ .f[8] = constant, \ .f[9] = constant, \ .f[10] = constant, \ .f[11] = constant, \ .f[12] = constant, \ .f[13] = constant, \ .f[14] = constant, \ .f[15] = constant #elif defined(__AVX__) || defined (__AVX2__) #define FILL_VEC(constant) .f[0] = constant, \ .f[1] = constant, \ .f[2] = constant, \ .f[3] = constant, \ .f[4] = constant, \ .f[5] = constant, \ .f[6] = constant, \ .f[7] = constant #elif defined(__SSE2__) #define FILL_VEC(constant) .f[0] = constant, \ .f[1] = constant, \ .f[2] = constant, \ .f[3] = constant #endif /** * @brief Computes the kernel function and its derivative (Vectorised version). * ... ... @@ -224,50 +257,16 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u, * @param dw_dx (return) The norm of the gradient of \$|\\nabla W(x,h)|\$. */ static const vector kernel_igamma_vec = {.f[0] = (float)kernel_igamma, .f[1] = (float)kernel_igamma, .f[2] = (float)kernel_igamma, .f[3] = (float)kernel_igamma, .f[4] = (float)kernel_igamma, .f[5] = (float)kernel_igamma, .f[6] = (float)kernel_igamma, .f[7] = (float)kernel_igamma}; static const vector kernel_ivals_vec = {.f[0] = (float)kernel_ivals, .f[1] = (float)kernel_ivals, .f[2] = (float)kernel_ivals, .f[3] = (float)kernel_ivals, .f[4] = (float)kernel_ivals, .f[5] = (float)kernel_ivals, .f[6] = (float)kernel_ivals, .f[7] = (float)kernel_ivals}; static const vector kernel_constant_vec = {.f[0] = (float)kernel_constant, .f[1] = (float)kernel_constant, .f[2] = (float)kernel_constant, .f[3] = (float)kernel_constant, .f[4] = (float)kernel_constant, .f[5] = (float)kernel_constant, .f[6] = (float)kernel_constant, .f[7] = (float)kernel_constant}; static const vector kernel_igamma3_vec = {.f[0] = (float)kernel_igamma3, .f[1] = (float)kernel_igamma3, .f[2] = (float)kernel_igamma3, .f[3] = (float)kernel_igamma3, .f[4] = (float)kernel_igamma3, .f[5] = (float)kernel_igamma3, .f[6] = (float)kernel_igamma3, .f[7] = (float)kernel_igamma3}; static const vector kernel_igamma4_vec = {.f[0] = (float)kernel_igamma4, .f[1] = (float)kernel_igamma4, .f[2] = (float)kernel_igamma4, .f[3] = (float)kernel_igamma4, .f[4] = (float)kernel_igamma4, .f[5] = (float)kernel_igamma4, .f[6] = (float)kernel_igamma4, .f[7] = (float)kernel_igamma4}; static const vector kernel_igamma_vec = {FILL_VEC((float)kernel_igamma)}; static const vector kernel_ivals_vec = {FILL_VEC((float)kernel_ivals)}; static const vector kernel_constant_vec = {FILL_VEC((float)kernel_constant)}; static const vector kernel_igamma3_vec = {FILL_VEC((float)kernel_igamma3)}; static const vector kernel_igamma4_vec = {FILL_VEC((float)kernel_igamma4)}; __attribute__((always_inline)) INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) { ... ...
