Commit aa6bc3bf authored by James Willis's avatar James Willis
Browse files

Replaced all calls to VEC_RECIPROCAL and VEC_RECIPROCAL_SQRT with...

Replaced all calls to VEC_RECIPROCAL and VEC_RECIPROCAL_SQRT with vec_reciprocal and vec_reciprocal_sqrt.
parent 8cd6adc7
...@@ -155,20 +155,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( ...@@ -155,20 +155,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density(
/* Get the radius and inverse radius. */ /* Get the radius and inverse radius. */
r2.v = vec_load(R2); r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v); ri = vec_reciprocal_sqrt(r2);
/*vec_rsqrt does not have the level of accuracy we need, so an extra term is
* added below.*/
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v; r.v = r2.v * ri.v;
hi.v = vec_load(Hi); hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v); hi_inv = vec_reciprocal(hi);
hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
xi.v = r.v * hi_inv.v; xi.v = r.v * hi_inv.v;
hj.v = vec_load(Hj); hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v); hj_inv = vec_reciprocal(hj);
hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f));
xj.v = r.v * hj_inv.v; xj.v = r.v * hj_inv.v;
/* Compute the kernel function. */ /* Compute the kernel function. */
...@@ -327,15 +322,11 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, ...@@ -327,15 +322,11 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
/* Get the radius and inverse radius. */ /* Get the radius and inverse radius. */
r2.v = vec_load(R2); r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v); ri = vec_reciprocal_sqrt(r2);
/*vec_rsqrt does not have the level of accuracy we need, so an extra term is
* added below.*/
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v; r.v = r2.v * ri.v;
hi.v = vec_load(Hi); hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v); hi_inv = vec_reciprocal(hi);
hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
xi.v = r.v * hi_inv.v; xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx); kernel_deval_vec(&xi, &wi, &wi_dx);
...@@ -427,8 +418,8 @@ runner_iact_nonsym_2_vec_density( ...@@ -427,8 +418,8 @@ runner_iact_nonsym_2_vec_density(
/* Get the radius and inverse radius. */ /* Get the radius and inverse radius. */
r2.v = vec_load(R2); r2.v = vec_load(R2);
r2_2.v = vec_load(&R2[VEC_SIZE]); r2_2.v = vec_load(&R2[VEC_SIZE]);
VEC_RECIPROCAL_SQRT(r2.v, ri.v); ri = vec_reciprocal_sqrt(r2);
VEC_RECIPROCAL_SQRT(r2_2.v, ri2.v); ri2 = vec_reciprocal_sqrt(r2_2);
r.v = vec_mul(r2.v, ri.v); r.v = vec_mul(r2.v, ri.v);
r_2.v = vec_mul(r2_2.v, ri2.v); r_2.v = vec_mul(r2_2.v, ri2.v);
...@@ -758,12 +749,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ...@@ -758,12 +749,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
/* Get the radius and inverse radius. */ /* Get the radius and inverse radius. */
r2.v = vec_load(R2); r2.v = vec_load(R2);
VEC_RECIPROCAL_SQRT(r2.v, ri.v); ri = vec_reciprocal_sqrt(r2);
r.v = r2.v * ri.v; r.v = r2.v * ri.v;
/* Get the kernel for hi. */ /* Get the kernel for hi. */
hi.v = vec_load(Hi); hi.v = vec_load(Hi);
VEC_RECIPROCAL(hi.v, hi_inv.v); hi_inv = vec_reciprocal(hi);
hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */ hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */
xi.v = r.v * hi_inv.v; xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx); kernel_deval_vec(&xi, &wi, &wi_dx);
...@@ -771,7 +762,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ...@@ -771,7 +762,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
/* Get the kernel for hj. */ /* Get the kernel for hj. */
hj.v = vec_load(Hj); hj.v = vec_load(Hj);
VEC_RECIPROCAL(hj.v, hj_inv.v); hj_inv = vec_reciprocal(hj);
hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */ hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */
xj.v = r.v * hj_inv.v; xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx); kernel_deval_vec(&xj, &wj, &wj_dx);
...@@ -1037,12 +1028,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ...@@ -1037,12 +1028,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
/* Get the radius and inverse radius. */ /* Get the radius and inverse radius. */
r2.v = vec_load(R2); r2.v = vec_load(R2);
VEC_RECIPROCAL_SQRT(r2.v, ri.v); ri = vec_reciprocal_sqrt(r2);
r.v = r2.v * ri.v; r.v = r2.v * ri.v;
/* Get the kernel for hi. */ /* Get the kernel for hi. */
hi.v = vec_load(Hi); hi.v = vec_load(Hi);
VEC_RECIPROCAL(hi.v, hi_inv.v); hi_inv = vec_reciprocal(hi);
hid_inv = pow_dimension_plus_one_vec(hi_inv); hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v; xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx); kernel_deval_vec(&xi, &wi, &wi_dx);
...@@ -1050,7 +1041,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ...@@ -1050,7 +1041,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
/* Get the kernel for hj. */ /* Get the kernel for hj. */
hj.v = vec_load(Hj); hj.v = vec_load(Hj);
VEC_RECIPROCAL(hj.v, hj_inv.v); hj_inv = vec_reciprocal(hj);
hjd_inv = pow_dimension_plus_one_vec(hj_inv); hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v; xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx); kernel_deval_vec(&xj, &wj, &wj_dx);
......
...@@ -332,7 +332,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( ...@@ -332,7 +332,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
/* Get the inverse of hi. */ /* Get the inverse of hi. */
vector v_hi_inv; vector v_hi_inv;
VEC_RECIPROCAL(v_hi.v, v_hi_inv.v); v_hi_inv = vec_reciprocal(v_hi);
rhoSum.v = vec_setzero(); rhoSum.v = vec_setzero();
rho_dhSum.v = vec_setzero(); rho_dhSum.v = vec_setzero();
...@@ -599,8 +599,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( ...@@ -599,8 +599,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2(
vector v_hi_inv, v_hi_inv2; vector v_hi_inv, v_hi_inv2;
VEC_RECIPROCAL(v_hi.v, v_hi_inv.v); v_hi_inv = vec_reciprocal(v_hi);
VEC_RECIPROCAL(v_hi2.v, v_hi_inv2.v); v_hi_inv2 = vec_reciprocal(v_hi2);
rhoSum.v = vec_setzero(); rhoSum.v = vec_setzero();
rho_dhSum.v = vec_setzero(); rho_dhSum.v = vec_setzero();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment