Skip to content
Snippets Groups Projects
Commit fde569a8 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Code formatting

parent 1475cd05
No related branches found
No related tags found
1 merge request!150Vectorise kernel
...@@ -236,18 +236,18 @@ static const vector kernel_igamma4_vec = FILL_VEC((float)kernel_igamma4); ...@@ -236,18 +236,18 @@ static const vector kernel_igamma4_vec = FILL_VEC((float)kernel_igamma4);
__attribute__((always_inline)) __attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) { INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1], x;
int j, k;
/* Go to the range [0,1[ from [0,H[ */ /* Go to the range [0,1[ from [0,H[ */
vector x;
x.v = u->v * kernel_igamma_vec.v; x.v = u->v * kernel_igamma_vec.v;
/* Load x and get the interval id. */ /* Load x and get the interval id. */
vector ind;
ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v)); ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v));
/* load the coefficients. */ /* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++) vector c[kernel_degree + 1];
for (j = 0; j < kernel_degree + 1; j++) for (int k = 0; k < VEC_SIZE; k++)
for (int j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */ /* Init the iteration for Horner's scheme. */
......
...@@ -79,22 +79,12 @@ ...@@ -79,22 +79,12 @@
_mm512_set1_epi64(ptrs[0])), \ _mm512_set1_epi64(ptrs[0])), \
1) 1)
#define vec_gather(base, offsets) _mm512_i32gather_ps(offsets.m, base, 1) #define vec_gather(base, offsets) _mm512_i32gather_ps(offsets.m, base, 1)
#define FILL_VEC(constant) {.f[0] = constant, \ #define FILL_VEC(a) \
.f[1] = constant, \ { \
.f[2] = constant, \ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \
.f[3] = constant, \ .f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \
.f[4] = constant, \ .f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \
.f[5] = constant, \ }
.f[6] = constant, \
.f[7] = constant, \
.f[8] = constant, \
.f[9] = constant, \
.f[10] = constant, \
.f[11] = constant, \
.f[12] = constant, \
.f[13] = constant, \
.f[14] = constant, \
.f[15] = constant}
#elif defined(NO__AVX__) #elif defined(NO__AVX__)
#define VECTORIZE #define VECTORIZE
#define VEC_SIZE 8 #define VEC_SIZE 8
...@@ -123,14 +113,11 @@ ...@@ -123,14 +113,11 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a) #define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a, b) _mm256_min_pd(a, b) #define vec_dbl_fmin(a, b) _mm256_min_pd(a, b)
#define vec_dbl_fmax(a, b) _mm256_max_pd(a, b) #define vec_dbl_fmax(a, b) _mm256_max_pd(a, b)
#define FILL_VEC(constant) {.f[0] = constant, \ #define FILL_VEC(a) \
.f[1] = constant, \ { \
.f[2] = constant, \ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \
.f[3] = constant, \ .f[6] = a, .f[7] = a \
.f[4] = constant, \ }
.f[5] = constant, \
.f[6] = constant, \
.f[7] = constant}
#ifdef __AVX2__ #ifdef __AVX2__
#define VEC_HAVE_GATHER #define VEC_HAVE_GATHER
#define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1) #define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1)
...@@ -163,10 +150,8 @@ ...@@ -163,10 +150,8 @@
#define vec_dbl_ftoi(a) _mm_cvttpd_epi32(a) #define vec_dbl_ftoi(a) _mm_cvttpd_epi32(a)
#define vec_dbl_fmin(a, b) _mm_min_pd(a, b) #define vec_dbl_fmin(a, b) _mm_min_pd(a, b)
#define vec_dbl_fmax(a, b) _mm_max_pd(a, b) #define vec_dbl_fmax(a, b) _mm_max_pd(a, b)
#define FILL_VEC(constant) {.f[0] = constant, \ #define FILL_VEC(a) \
.f[1] = constant, \ { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a }
.f[2] = constant, \
.f[3] = constant}
#else #else
#define VEC_SIZE 4 #define VEC_SIZE 4
#endif #endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment