Commit 87e7ef21 authored by James Willis's avatar James Willis
Browse files

Correct bug with AVX-512 masking.

parent 0f4de63e
......@@ -1270,7 +1270,7 @@ runner_iact_nonsym_1_vec_force(
a_hydro_ySum->v = vec_mask_sub(a_hydro_ySum->v, piay.v, mask);
a_hydro_zSum->v = vec_mask_sub(a_hydro_zSum->v, piaz.v, mask);
h_dtSum->v = vec_mask_sub(h_dtSum->v, pih_dt.v, mask);
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and_mask(v_sig, mask));
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and_mask(v_sig.v, mask));
entropy_dtSum->v = vec_mask_add(entropy_dtSum->v, entropy_dt.v, mask);
#else
......@@ -1455,8 +1455,8 @@ runner_iact_nonsym_2_vec_force(
a_hydro_zSum->v = vec_mask_sub(a_hydro_zSum->v, piaz_2.v, mask_2);
h_dtSum->v = vec_mask_sub(h_dtSum->v, pih_dt.v, mask);
h_dtSum->v = vec_mask_sub(h_dtSum->v, pih_dt_2.v, mask_2);
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and_mask(v_sig, mask));
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and_mask(v_sig_2, mask_2));
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and_mask(v_sig.v, mask));
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and_mask(v_sig_2.v, mask_2));
entropy_dtSum->v = vec_mask_add(entropy_dtSum->v, entropy_dt.v, mask);
entropy_dtSum->v = vec_mask_add(entropy_dtSum->v, entropy_dt_2.v, mask_2);
} else {
......
......@@ -175,7 +175,7 @@
#define vec_form_int_mask(a) _mm256_movemask_ps(a.v)
#define vec_and(a, b) _mm256_and_ps(a, b)
#define vec_mask_and(a, b) _mm256_and_ps(a.v, b.v)
#define vec_and_mask(a, mask) vec_mask_and(a, mask)
#define vec_and_mask(a, mask) _mm256_and_ps(a, mask.v)
#define vec_init_mask(mask) mask.m = vec_setint1(0xFFFFFFFF)
#define vec_create_mask(mask, cond) mask.v = cond
#define vec_zero_mask(mask) mask.v = vec_setzero()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment