Commit 74ef633e authored by James Willis's avatar James Willis
Browse files

Added argument to interaction so that masking only occurs when needed.

parent 73090f83
......@@ -484,7 +484,7 @@ runner_iact_nonsym_2_vec_density(
vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj,
vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum,
vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum,
mask_t mask, mask_t mask2) {
mask_t mask, mask_t mask2, short mask_cond) {
vector r, ri, r2, xi, wi, wi_dx;
vector mj;
......@@ -615,24 +615,47 @@ runner_iact_nonsym_2_vec_density(
curlvzSum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)),
curlvzSum->v);
#else
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj.v, wi.v), mask);
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj2.v, wi2.v), mask2);
rho_dhSum->v = vec_mask_sub(rho_dhSum->v, vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v))), mask);
rho_dhSum->v = vec_mask_sub(rho_dhSum->v, vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v,
vec_mul(xi2.v, wi_dx2.v))), mask2);
wcountSum->v = vec_mask_add(wcountSum->v, wi.v, mask);
wcountSum->v = vec_mask_add(wcountSum->v, wi2.v, mask2);
wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, vec_mul(xi.v, wi_dx.v), mask);
wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, vec_mul(xi2.v, wi_dx2.v), mask2);
div_vSum->v = vec_mask_sub(div_vSum->v, vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask);
div_vSum->v = vec_mask_sub(div_vSum->v, vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)), mask2);
curlvxSum->v = vec_mask_add(curlvxSum->v,vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask);
curlvxSum->v = vec_mask_add(curlvxSum->v,vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)), mask2);
curlvySum->v = vec_mask_add(curlvySum->v,vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask);
curlvySum->v = vec_mask_add(curlvySum->v,vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)), mask2);
curlvzSum->v = vec_mask_add(curlvzSum->v,vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), mask);
curlvzSum->v = vec_mask_add(curlvzSum->v,vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), mask2);
/* Mask only when needed. */
if(mask_cond) {
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj.v, wi.v), mask);
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj2.v, wi2.v), mask2);
rho_dhSum->v = vec_mask_sub(rho_dhSum->v, vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v))), mask);
rho_dhSum->v = vec_mask_sub(rho_dhSum->v, vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v,
vec_mul(xi2.v, wi_dx2.v))), mask2);
wcountSum->v = vec_mask_add(wcountSum->v, wi.v, mask);
wcountSum->v = vec_mask_add(wcountSum->v, wi2.v, mask2);
wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, vec_mul(xi.v, wi_dx.v), mask);
wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, vec_mul(xi2.v, wi_dx2.v), mask2);
div_vSum->v = vec_mask_sub(div_vSum->v, vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask);
div_vSum->v = vec_mask_sub(div_vSum->v, vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)), mask2);
curlvxSum->v = vec_mask_add(curlvxSum->v,vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask);
curlvxSum->v = vec_mask_add(curlvxSum->v,vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)), mask2);
curlvySum->v = vec_mask_add(curlvySum->v,vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask);
curlvySum->v = vec_mask_add(curlvySum->v,vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)), mask2);
curlvzSum->v = vec_mask_add(curlvzSum->v,vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), mask);
curlvzSum->v = vec_mask_add(curlvzSum->v,vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), mask2);
}
else {
rhoSum->v += vec_mul(mj.v, wi.v);
rhoSum->v += vec_mul(mj2.v, wi2.v);
rho_dhSum->v -= vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v)));
rho_dhSum->v -= vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v,
vec_mul(xi2.v, wi_dx2.v)));
wcountSum->v += wi.v;
wcountSum->v += wi2.v;
wcount_dhSum->v -= vec_mul(xi.v, wi_dx.v);
wcount_dhSum->v -= vec_mul(xi2.v, wi_dx2.v);
div_vSum->v -= vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v));
div_vSum->v -= vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v));
curlvxSum->v += vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v));
curlvxSum->v += vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v));
curlvySum->v += vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v));
curlvySum->v += vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v));
curlvzSum->v += vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v));
curlvzSum->v += vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v));
}
#endif
}
#endif
......
......@@ -109,7 +109,7 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align],
&int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align],
&int_cache->mq[*icount_align], rhoSum, rho_dhSum, wcountSum,
wcount_dhSum, div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, int_mask2);
wcount_dhSum, div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, int_mask2, 1);
}
}
......@@ -231,7 +231,7 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
&int_cache->dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
&int_cache->vxq[pjd], &int_cache->vyq[pjd], &int_cache->vzq[pjd],
&int_cache->mq[pjd], rhoSum, rho_dhSum, wcountSum, wcount_dhSum,
div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, int_mask2);
div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, int_mask2, 0);
}
/* Reset interaction count. */
......@@ -777,7 +777,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
&int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
&int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
&int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum,
&div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2);
&div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2, 0);
}
/* Perform horizontal adds on vector sums and store result in particle pi.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment