From aa6bc3bfdaa006df726a7ba34a12d500e6792adc Mon Sep 17 00:00:00 2001 From: James Willis <james.s.willis@durham.ac.uk> Date: Tue, 13 Dec 2016 10:57:40 +0000 Subject: [PATCH] Replaced all calls to VEC_RECIPROCAL and VEC_RECIPROCAL_SQRT with vec_reciprocal and vec_reciprocal_sqrt. --- src/hydro/Gadget2/hydro_iact.h | 35 +++++++++++++--------------------- src/runner_doiact_vec.c | 6 +++--- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 927dd049f4..3fef18b4f4 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -155,20 +155,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - /*vec_rsqrt does not have the level of accuracy we need, so an extra term is - * added below.*/ - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); + hi_inv = vec_reciprocal(hi); xi.v = r.v * hi_inv.v; hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); + hj_inv = vec_reciprocal(hj); xj.v = r.v * hj_inv.v; /* Compute the kernel function. */ @@ -327,15 +322,11 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - /*vec_rsqrt does not have the level of accuracy we need, so an extra term is - * added below.*/ - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); + hi_inv = vec_reciprocal(hi); xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); @@ -427,8 +418,8 @@ runner_iact_nonsym_2_vec_density( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); r2_2.v = vec_load(&R2[VEC_SIZE]); - VEC_RECIPROCAL_SQRT(r2.v, ri.v); - VEC_RECIPROCAL_SQRT(r2_2.v, ri2.v); + ri = vec_reciprocal_sqrt(r2); + ri2 = vec_reciprocal_sqrt(r2_2); r.v = vec_mul(r2.v, ri.v); r_2.v = vec_mul(r2_2.v, ri2.v); @@ -758,12 +749,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - VEC_RECIPROCAL_SQRT(r2.v, ri.v); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; /* Get the kernel for hi. */ hi.v = vec_load(Hi); - VEC_RECIPROCAL(hi.v, hi_inv.v); + hi_inv = vec_reciprocal(hi); hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */ xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); @@ -771,7 +762,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Get the kernel for hj. */ hj.v = vec_load(Hj); - VEC_RECIPROCAL(hj.v, hj_inv.v); + hj_inv = vec_reciprocal(hj); hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */ xj.v = r.v * hj_inv.v; kernel_deval_vec(&xj, &wj, &wj_dx); @@ -1037,12 +1028,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - VEC_RECIPROCAL_SQRT(r2.v, ri.v); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; /* Get the kernel for hi. */ hi.v = vec_load(Hi); - VEC_RECIPROCAL(hi.v, hi_inv.v); + hi_inv = vec_reciprocal(hi); hid_inv = pow_dimension_plus_one_vec(hi_inv); xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); @@ -1050,7 +1041,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Get the kernel for hj. */ hj.v = vec_load(Hj); - VEC_RECIPROCAL(hj.v, hj_inv.v); + hj_inv = vec_reciprocal(hj); hjd_inv = pow_dimension_plus_one_vec(hj_inv); xj.v = r.v * hj_inv.v; kernel_deval_vec(&xj, &wj, &wj_dx); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 49de264e79..085f005b67 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -332,7 +332,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( /* Get the inverse of hi. */ vector v_hi_inv; - VEC_RECIPROCAL(v_hi.v, v_hi_inv.v); + v_hi_inv = vec_reciprocal(v_hi); rhoSum.v = vec_setzero(); rho_dhSum.v = vec_setzero(); @@ -599,8 +599,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( vector v_hi_inv, v_hi_inv2; - VEC_RECIPROCAL(v_hi.v, v_hi_inv.v); - VEC_RECIPROCAL(v_hi2.v, v_hi_inv2.v); + v_hi_inv = vec_reciprocal(v_hi); + v_hi_inv2 = vec_reciprocal(v_hi2); rhoSum.v = vec_setzero(); rho_dhSum.v = vec_setzero(); -- GitLab