Commit 67df6740 authored by James Willis's avatar James Willis
Browse files

Combine masks with a macro to be compatible with AVX-512.

parent 364afb97
...@@ -745,7 +745,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -745,7 +745,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v)); vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v));
/* Combine all 3 masks and form integer mask. */ /* Combine all 3 masks and form integer mask. */
v_doi_mask.v = vec_and(v_doi_mask.v, v_doi_mask_self_check.v); vec_combine_masks(v_doi_mask, v_doi_mask_self_check);
doi_mask = vec_form_int_mask(v_doi_mask); doi_mask = vec_form_int_mask(v_doi_mask);
/* If there are any interactions perform them. */ /* If there are any interactions perform them. */
......
...@@ -91,6 +91,7 @@ ...@@ -91,6 +91,7 @@
#define vec_init_mask_true(mask) ({ mask = 0xFFFF; }) #define vec_init_mask_true(mask) ({ mask = 0xFFFF; })
#define vec_zero_mask(mask) ({ mask = 0; }) #define vec_zero_mask(mask) ({ mask = 0; })
#define vec_create_mask(mask, cond) ({ mask = cond; }) #define vec_create_mask(mask, cond) ({ mask = cond; })
#define vec_combine_masks(mask1, mask2) ({ mask1 = vec_mask_and(mask1,mask2); })
#define vec_pad_mask(mask, pad) ({ mask = mask >> (pad); }) #define vec_pad_mask(mask, pad) ({ mask = mask >> (pad); })
#define vec_blend(mask, a, b) _mm512_mask_blend_ps(mask, a, b) #define vec_blend(mask, a, b) _mm512_mask_blend_ps(mask, a, b)
#define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0)) #define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0))
...@@ -186,6 +187,7 @@ ...@@ -186,6 +187,7 @@
#define vec_and_mask(a, mask) _mm256_and_ps(a, mask.v) #define vec_and_mask(a, mask) _mm256_and_ps(a, mask.v)
#define vec_init_mask_true(mask) mask.m = vec_setint1(0xFFFFFFFF) #define vec_init_mask_true(mask) mask.m = vec_setint1(0xFFFFFFFF)
#define vec_create_mask(mask, cond) mask.v = cond #define vec_create_mask(mask, cond) mask.v = cond
#define vec_combine_masks(mask1, mask2) ({ mask1.v = vec_mask_and(mask1,mask2); })
#define vec_zero_mask(mask) mask.v = vec_setzero() #define vec_zero_mask(mask) mask.v = vec_setzero()
#define vec_pad_mask(mask, pad) \ #define vec_pad_mask(mask, pad) \
for (int i = VEC_SIZE - (pad); i < VEC_SIZE; i++) mask.i[i] = 0 for (int i = VEC_SIZE - (pad); i < VEC_SIZE; i++) mask.i[i] = 0
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment