Commit fde569a8 by Matthieu Schaller

### Code formatting

parent 1475cd05
 ... ... @@ -236,18 +236,18 @@ static const vector kernel_igamma4_vec = FILL_VEC((float)kernel_igamma4); __attribute__((always_inline)) INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) { vector ind, c[kernel_degree + 1], x; int j, k; /* Go to the range [0,1[ from [0,H[ */ vector x; x.v = u->v * kernel_igamma_vec.v; /* Load x and get the interval id. */ vector ind; ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v)); /* load the coefficients. */ for (k = 0; k < VEC_SIZE; k++) for (j = 0; j < kernel_degree + 1; j++) vector c[kernel_degree + 1]; for (int k = 0; k < VEC_SIZE; k++) for (int j = 0; j < kernel_degree + 1; j++) c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; /* Init the iteration for Horner's scheme. */ ... ...
 ... ... @@ -79,22 +79,12 @@ _mm512_set1_epi64(ptrs[0])), \ 1) #define vec_gather(base, offsets) _mm512_i32gather_ps(offsets.m, base, 1) #define FILL_VEC(constant) {.f[0] = constant, \ .f[1] = constant, \ .f[2] = constant, \ .f[3] = constant, \ .f[4] = constant, \ .f[5] = constant, \ .f[6] = constant, \ .f[7] = constant, \ .f[8] = constant, \ .f[9] = constant, \ .f[10] = constant, \ .f[11] = constant, \ .f[12] = constant, \ .f[13] = constant, \ .f[14] = constant, \ .f[15] = constant} #define FILL_VEC(a) \ { \ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \ .f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \ .f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \ } #elif defined(NO__AVX__) #define VECTORIZE #define VEC_SIZE 8 ... ... @@ -123,14 +113,11 @@ #define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a) #define vec_dbl_fmin(a, b) _mm256_min_pd(a, b) #define vec_dbl_fmax(a, b) _mm256_max_pd(a, b) #define FILL_VEC(constant) {.f[0] = constant, \ .f[1] = constant, \ .f[2] = constant, \ .f[3] = constant, \ .f[4] = constant, \ .f[5] = constant, \ .f[6] = constant, \ .f[7] = constant} #define FILL_VEC(a) \ { \ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \ .f[6] = a, .f[7] = a \ } #ifdef __AVX2__ #define VEC_HAVE_GATHER #define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1) ... ... @@ -163,10 +150,8 @@ #define vec_dbl_ftoi(a) _mm_cvttpd_epi32(a) #define vec_dbl_fmin(a, b) _mm_min_pd(a, b) #define vec_dbl_fmax(a, b) _mm_max_pd(a, b) #define FILL_VEC(constant) {.f[0] = constant, \ .f[1] = constant, \ .f[2] = constant, \ .f[3] = constant} #define FILL_VEC(a) \ { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a } #else #define VEC_SIZE 4 #endif ... ...
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment