From 8cf40a42ff27186b2e4b0e2a4fc0b7b5fc83a7eb Mon Sep 17 00:00:00 2001 From: James Willis <james.s.willis@durham.ac.uk> Date: Wed, 19 Jul 2017 15:05:08 +0100 Subject: [PATCH] Don't require a mask for the first region of the Cubic Spline kernel. --- src/kernel_hydro.h | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index ba6f387a9a..e97f11eca5 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -467,10 +467,9 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec( w->v = vec_fma(x.v, w->v, wendland_const_c5.v); #elif defined(CUBIC_SPLINE_KERNEL) vector w2, dw_dx2; - mask_t mask_reg1, mask_reg2; + mask_t mask_reg2; /* Form a mask for each part of the kernel. */ - vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together @@ -573,11 +572,9 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( #elif defined(CUBIC_SPLINE_KERNEL) vector w_2, dw_dx_2; vector w2_2, dw_dx2_2; - mask_t mask_reg1, mask_reg2, mask_reg1_v2, mask_reg2_v2; + mask_t mask_reg2, mask_reg2_v2; /* Form a mask for each part of the kernel. */ - vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ - vec_create_mask(mask_reg1_v2, vec_cmp_lt(x2.v, cond.v)); /* 0 < x < 0.5 */ vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ vec_create_mask(mask_reg2_v2, vec_cmp_gte(x2.v, cond.v)); /* 0.5 < x < 1 */ @@ -657,10 +654,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u, w->v = vec_fma(x.v, w->v, wendland_const_c5.v); #elif defined(CUBIC_SPLINE_KERNEL) vector w2; - mask_t mask_reg1, mask_reg2; + mask_t mask_reg2; /* Form a mask for each part of the kernel. */ - vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together @@ -718,10 +714,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec( #elif defined(CUBIC_SPLINE_KERNEL) vector dw_dx2; - mask_t mask_reg1, mask_reg2; + mask_t mask_reg2; /* Form a mask for each part of the kernel. */ - vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together @@ -788,14 +783,12 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec( #elif defined(CUBIC_SPLINE_KERNEL) vector dw_dx2, dw_dx2_2; - mask_t mask_reg1, mask_reg2; - mask_t mask_reg1_2, mask_reg2_2; + mask_t mask_reg2; + mask_t mask_reg2_v2; /* Form a mask for each part of the kernel. */ - vec_create_mask(mask_reg1, vec_cmp_lt(x.v, cond.v)); /* 0 < x < 0.5 */ - vec_create_mask(mask_reg1_2, vec_cmp_lt(x_2.v, cond.v)); /* 0 < x < 0.5 */ vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ - vec_create_mask(mask_reg2_2, vec_cmp_gte(x_2.v, cond.v)); /* 0.5 < x < 1 */ + vec_create_mask(mask_reg2_v2, vec_cmp_gte(x_2.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together * using masks. */ @@ -813,14 +806,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec( dw_dx2_2.v = vec_fma(dw_dx2_2.v, x_2.v, cubic_2_dwdx_const_c2.v); /* Mask out unneeded values. */ - dw_dx->v = vec_and_mask(dw_dx->v, mask_reg1); - dw_dx_2->v = vec_and_mask(dw_dx_2->v, mask_reg1_2); - dw_dx2.v = vec_and_mask(dw_dx2.v, mask_reg2); - dw_dx2_2.v = vec_and_mask(dw_dx2_2.v, mask_reg2_2); - - /* Added both dwdx and dwdx2 together to form complete result. */ - dw_dx->v = vec_add(dw_dx->v, dw_dx2.v); - dw_dx_2->v = vec_add(dw_dx_2->v, dw_dx2_2.v); + dw_dx->v = vec_blend(mask_reg2, dw_dx->v, dw_dx2.v); + dw_dx_2->v = vec_blend(mask_reg2_v2, dw_dx_2->v, dw_dx2_2.v); + #else #error "Vectorisation not supported for this kernel!!!" #endif -- GitLab