diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 79df48a2378fff3e784fe9652397be4903d295d2..e0c64072eaa6bd7c30c83a00654301288857416d 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -745,7 +745,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v)); /* Combine all 3 masks and form integer mask. */ - v_doi_mask.v = vec_and(v_doi_mask.v, v_doi_mask_self_check.v); + vec_combine_masks(v_doi_mask, v_doi_mask_self_check); doi_mask = vec_form_int_mask(v_doi_mask); /* If there are any interactions perform them. */ diff --git a/src/vector.h b/src/vector.h index 6a7c6837989025785c1f9134004f2ebcc226a205..70dbd16710837831230567f6eb0fcaeef453cd28 100644 --- a/src/vector.h +++ b/src/vector.h @@ -91,6 +91,7 @@ #define vec_init_mask_true(mask) ({ mask = 0xFFFF; }) #define vec_zero_mask(mask) ({ mask = 0; }) #define vec_create_mask(mask, cond) ({ mask = cond; }) +#define vec_combine_masks(mask1, mask2) ({ mask1 = vec_mask_and(mask1,mask2); }) #define vec_pad_mask(mask, pad) ({ mask = mask >> (pad); }) #define vec_blend(mask, a, b) _mm512_mask_blend_ps(mask, a, b) #define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0)) @@ -186,6 +187,7 @@ #define vec_and_mask(a, mask) _mm256_and_ps(a, mask.v) #define vec_init_mask_true(mask) mask.m = vec_setint1(0xFFFFFFFF) #define vec_create_mask(mask, cond) mask.v = cond +#define vec_combine_masks(mask1, mask2) ({ mask1.v = vec_mask_and(mask1,mask2); }) #define vec_zero_mask(mask) mask.v = vec_setzero() #define vec_pad_mask(mask, pad) \ for (int i = VEC_SIZE - (pad); i < VEC_SIZE; i++) mask.i[i] = 0