diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index f634a59d7ee769951e6560d46a92053c144cc766..862e2b0ac2cf59e59f5530ea0598999937bcd653 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -468,12 +468,11 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec( w->v = vec_fma(x.v, w->v, wendland_const_c5.v); #elif defined(CUBIC_SPLINE_KERNEL) vector w2, dw_dx2; - vector mask_reg1, mask_reg2; + mask_t mask_reg1, mask_reg2; /* Form a mask for each part of the kernel. */ - mask_reg1.v = vec_cmp_lt(x.v, cond.v); /* 0 < x < 0.5 */ - mask_reg2.v = vec_cmp_gte(x.v, cond.v); /* 0.5 < x < 1 */ - ; + vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ + vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together * using masks. */ @@ -496,10 +495,10 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec( w2.v = vec_fma(x.v, w2.v, cubic_2_const_c3.v); /* Mask out unneeded values. */ - w->v = vec_and(w->v, mask_reg1.v); - w2.v = vec_and(w2.v, mask_reg2.v); - dw_dx->v = vec_and(dw_dx->v, mask_reg1.v); - dw_dx2.v = vec_and(dw_dx2.v, mask_reg2.v); + w->v = vec_and_mask(w->v, mask_reg1); + w2.v = vec_and_mask(w2.v, mask_reg2); + dw_dx->v = vec_and_mask(dw_dx->v, mask_reg1); + dw_dx2.v = vec_and_mask(dw_dx2.v, mask_reg2); /* Added both w and w2 together to form complete result. */ w->v = vec_add(w->v, w2.v); @@ -580,15 +579,13 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( #elif defined(CUBIC_SPLINE_KERNEL) vector w_2, dw_dx_2; vector w2_2, dw_dx2_2; - vector mask_reg1, mask_reg2, mask_reg1_v2, mask_reg2_v2; + mask_t mask_reg1, mask_reg2, mask_reg1_v2, mask_reg2_v2; /* Form a mask for each part of the kernel. */ - mask_reg1.v = vec_cmp_lt(x.v, cond.v); /* 0 < x < 0.5 */ - mask_reg1_v2.v = vec_cmp_lt(x2.v, cond.v); /* 0 < x < 0.5 */ - mask_reg2.v = vec_cmp_gte(x.v, cond.v); /* 0.5 < x < 1 */ - ; - mask_reg2_v2.v = vec_cmp_gte(x2.v, cond.v); /* 0.5 < x < 1 */ - ; + vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ + vec_create_mask(mask_reg1_v2, vec_cmp_lt(x2.v, cond.v)); /* 0 < x < 0.5 */ + vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ + vec_create_mask(mask_reg2_v2, vec_cmp_gte(x2.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together * using masks. */ @@ -623,14 +620,14 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( w2_2.v = vec_fma(x2.v, w2_2.v, cubic_2_const_c3.v); /* Mask out unneeded values. */ - w->v = vec_and(w->v, mask_reg1.v); - w2->v = vec_and(w2->v, mask_reg1_v2.v); - w_2.v = vec_and(w_2.v, mask_reg2.v); - w2_2.v = vec_and(w2_2.v, mask_reg2_v2.v); - dw_dx->v = vec_and(dw_dx->v, mask_reg1.v); - dw_dx2->v = vec_and(dw_dx2->v, mask_reg1_v2.v); - dw_dx_2.v = vec_and(dw_dx_2.v, mask_reg2.v); - dw_dx2_2.v = vec_and(dw_dx2_2.v, mask_reg2_v2.v); + w->v = vec_and_mask(w->v, mask_reg1); + w2->v = vec_and_mask(w2->v, mask_reg1_v2); + w_2.v = vec_and_mask(w_2.v, mask_reg2); + w2_2.v = vec_and_mask(w2_2.v, mask_reg2_v2); + dw_dx->v = vec_and_mask(dw_dx->v, mask_reg1); + dw_dx2->v = vec_and_mask(dw_dx2->v, mask_reg1_v2); + dw_dx_2.v = vec_and_mask(dw_dx_2.v, mask_reg2); + dw_dx2_2.v = vec_and_mask(dw_dx2_2.v, mask_reg2_v2); /* Added both w and w2 together to form complete result. */ w->v = vec_add(w->v, w_2.v); @@ -639,12 +636,12 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( dw_dx2->v = vec_add(dw_dx2->v, dw_dx2_2.v); /* Return everything */ - w->v = w->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v; - w2->v = w2->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v; + w->v = vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v)); + w2->v = vec_mul(w2->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v)); dw_dx->v = - dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; + vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_plus_one_vec.v)); dw_dx2->v = - dw_dx2->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; + vec_mul(dw_dx2->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_plus_one_vec.v)); #endif }