Commit 62aecd22 authored by James Willis's avatar James Willis
Browse files

Use intrinsics for vector operations as arithmetic overloading is not...

Use intrinsics for vector operations as arithmetic overloading is not supported by the Intel compiler yet for AVX-512 vectors.
parent 086e4982
...@@ -578,24 +578,24 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz, ...@@ -578,24 +578,24 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz,
curlvzSum->v = vec_mask_add( curlvzSum->v = vec_mask_add(
curlvzSum->v, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), mask2); curlvzSum->v, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), mask2);
} else { } else {
rhoSum->v += vec_mul(mj.v, wi.v); rhoSum->v = vec_add(rhoSum->v, vec_mul(mj.v, wi.v));
rhoSum->v += vec_mul(mj2.v, wi2.v); rhoSum->v = vec_add(rhoSum->v, vec_mul(mj2.v, wi2.v));
rho_dhSum->v -= vec_mul( rho_dhSum->v = vec_sub(rho_dhSum->v, vec_mul(
mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(xi.v, wi_dx.v))); mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(xi.v, wi_dx.v))));
rho_dhSum->v -= vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v, rho_dhSum->v = vec_sub(rho_dhSum->v, vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v,
vec_mul(xi2.v, wi_dx2.v))); vec_mul(xi2.v, wi_dx2.v))));
wcountSum->v += wi.v; wcountSum->v = vec_add(wcountSum->v, wi.v);
wcountSum->v += wi2.v; wcountSum->v = vec_add(wcountSum->v, wi2.v);
wcount_dhSum->v -= vec_mul(xi.v, wi_dx.v); wcount_dhSum->v = vec_sub(wcount_dhSum->v, vec_mul(xi.v, wi_dx.v));
wcount_dhSum->v -= vec_mul(xi2.v, wi_dx2.v); wcount_dhSum->v = vec_sub(wcount_dhSum->v, vec_mul(xi2.v, wi_dx2.v));
div_vSum->v -= vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)); div_vSum->v = vec_sub(div_vSum->v, vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)));
div_vSum->v -= vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)); div_vSum->v = vec_sub(div_vSum->v, vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)));
curlvxSum->v += vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)); curlvxSum->v = vec_add(curlvxSum->v, vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)));
curlvxSum->v += vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)); curlvxSum->v = vec_add(curlvxSum->v, vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)));
curlvySum->v += vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)); curlvySum->v = vec_add(curlvySum->v, vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)));
curlvySum->v += vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)); curlvySum->v = vec_add(curlvySum->v, vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)));
curlvzSum->v += vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)); curlvzSum->v = vec_add(curlvzSum->v, vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)));
curlvzSum->v += vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)); curlvzSum->v = vec_add(curlvzSum->v, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)));
} }
} }
#endif #endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment