Skip to content
Snippets Groups Projects
Commit fde569a8 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Code formatting

parent 1475cd05
Branches
Tags
1 merge request!150Vectorise kernel
......@@ -236,18 +236,18 @@ static const vector kernel_igamma4_vec = FILL_VEC((float)kernel_igamma4);
__attribute__((always_inline))
INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) {
vector ind, c[kernel_degree + 1], x;
int j, k;
/* Go to the range [0,1[ from [0,H[ */
vector x;
x.v = u->v * kernel_igamma_vec.v;
/* Load x and get the interval id. */
vector ind;
ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
for (j = 0; j < kernel_degree + 1; j++)
vector c[kernel_degree + 1];
for (int k = 0; k < VEC_SIZE; k++)
for (int j = 0; j < kernel_degree + 1; j++)
c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
/* Init the iteration for Horner's scheme. */
......
......@@ -79,22 +79,12 @@
_mm512_set1_epi64(ptrs[0])), \
1)
#define vec_gather(base, offsets) _mm512_i32gather_ps(offsets.m, base, 1)
#define FILL_VEC(constant) {.f[0] = constant, \
.f[1] = constant, \
.f[2] = constant, \
.f[3] = constant, \
.f[4] = constant, \
.f[5] = constant, \
.f[6] = constant, \
.f[7] = constant, \
.f[8] = constant, \
.f[9] = constant, \
.f[10] = constant, \
.f[11] = constant, \
.f[12] = constant, \
.f[13] = constant, \
.f[14] = constant, \
.f[15] = constant}
#define FILL_VEC(a) \
{ \
.f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \
.f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \
.f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \
}
#elif defined(NO__AVX__)
#define VECTORIZE
#define VEC_SIZE 8
......@@ -123,14 +113,11 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a, b) _mm256_min_pd(a, b)
#define vec_dbl_fmax(a, b) _mm256_max_pd(a, b)
#define FILL_VEC(constant) {.f[0] = constant, \
.f[1] = constant, \
.f[2] = constant, \
.f[3] = constant, \
.f[4] = constant, \
.f[5] = constant, \
.f[6] = constant, \
.f[7] = constant}
#define FILL_VEC(a) \
{ \
.f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \
.f[6] = a, .f[7] = a \
}
#ifdef __AVX2__
#define VEC_HAVE_GATHER
#define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1)
......@@ -163,10 +150,8 @@
#define vec_dbl_ftoi(a) _mm_cvttpd_epi32(a)
#define vec_dbl_fmin(a, b) _mm_min_pd(a, b)
#define vec_dbl_fmax(a, b) _mm_max_pd(a, b)
#define FILL_VEC(constant) {.f[0] = constant, \
.f[1] = constant, \
.f[2] = constant, \
.f[3] = constant}
#define FILL_VEC(a) \
{ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a }
#else
#define VEC_SIZE 4
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment