Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
b2a5ba65
Commit
b2a5ba65
authored
May 18, 2017
by
James Willis
Browse files
Tidy masking up so that AVX-512 logic is hidden from user and occurs in vector.h.
parent
aa9064bb
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/runner_doiact_vec.c
View file @
b2a5ba65
...
...
@@ -176,7 +176,7 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param v_viz #vector of z velocity of pi.
*/
__attribute__
((
always_inline
))
INLINE
static
void
storeInteractions
(
const
in
t
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
const
shor
t
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
vector
*
v_dz
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
...
...
@@ -624,7 +624,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
#ifdef WITH_VECTORIZATION
const
struct
engine
*
e
=
r
->
e
;
int
doi_mask
;
struct
part
*
restrict
pi
;
int
count_align
;
int
num_vec_proc
=
NUM_VEC_PROC
;
...
...
@@ -749,36 +748,26 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_r2
.
v
=
vec_fma
(
v_dz_tmp
.
v
,
v_dz_tmp
.
v
,
v_r2
.
v
);
v_r2_2
.
v
=
vec_fma
(
v_dz_tmp2
.
v
,
v_dz_tmp2
.
v
,
v_r2_2
.
v
);
/* Form a mask from r2 < hig2 and r2 > 0.*/
#ifdef HAVE_AVX512_F
// KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check;
KNL_MASK_16
doi_mask_check
,
doi_mask2
,
doi_mask2_check
;
doi_mask_check
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
doi_mask
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
doi_mask2_check
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
doi_mask2
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
doi_mask
=
doi_mask
&
doi_mask_check
;
doi_mask2
=
doi_mask2
&
doi_mask2_check
;
#else
vector
v_doi_mask
,
v_doi_mask_check
,
v_doi_mask2
,
v_doi_mask2_check
;
int
doi_mask2
;
/* Form a mask from r2 < hig2 and r2 > 0.*/
mask_t
v_doi_mask
,
v_doi_mask_check
,
v_doi_mask2
,
v_doi_mask2_check
;
short
doi_mask
,
doi_mask2
;
/* Form r2 > 0 mask and r2 < hig2 mask. */
v_doi_mask_check
.
v
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
.
v
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
v_doi_mask_check
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
/* Form r2 > 0 mask and r2 < hig2 mask. */
v_doi_mask2_check
.
v
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
v_doi_mask2
.
v
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
v_doi_mask2_check
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
v_doi_mask2
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
/* Combine two masks and form integer mask. */
doi_mask
=
vec_cmp_result
(
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_check
.
v
));
doi_mask2
=
vec_cmp_result
(
vec_and
(
v_doi_mask2
.
v
,
v_doi_mask2_check
.
v
));
#endif
/* HAVE_AVX512_F */
/* Combine the two masks. */
mask_t
doi_mask_combi
,
doi_mask2_combi
;
doi_mask_combi
=
vec_mask_and
(
v_doi_mask
,
v_doi_mask_check
);
doi_mask2_combi
=
vec_mask_and
(
v_doi_mask2
,
v_doi_mask2_check
);
/* Form integer mask. */
doi_mask
=
vec_cmp_result
(
doi_mask_combi
);
doi_mask2
=
vec_cmp_result
(
doi_mask2_combi
);
/* If there are any interactions left pack interaction values into c2
* cache. */
...
...
src/vector.h
View file @
b2a5ba65
...
...
@@ -60,7 +60,9 @@
#define vec_dbl_set(a, b, c, d, e, f, g, h) \
_mm512_set_pd(h, g, f, e, d, c, b, a)
#define vec_add(a, b) _mm512_add_ps(a, b)
#define vec_mask_add(a, b, mask) _mm512_mask_add_ps(a, mask, b, a)
#define vec_sub(a, b) _mm512_sub_ps(a, b)
#define vec_mask_sub(a, b, mask) _mm512_mask_sub_ps(a, mask, a, b)
#define vec_mul(a, b) _mm512_mul_ps(a, b)
#define vec_fma(a, b, c) _mm512_fmadd_ps(a, b, c)
#define vec_sqrt(a) _mm512_sqrt_ps(a)
...
...
@@ -75,7 +77,9 @@
#define vec_cmp_lt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ)
#define vec_cmp_lte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ)
#define vec_cmp_gte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ)
#define vec_cmp_result(a) a
#define vec_and(a, b) _mm512_and_ps(a, b)
#define vec_mask_and(a, b) a & b
#define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0))
#define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 1))
#define vec_dbl_tofloat(a, b) _mm512_insertf128(_mm512_castps128_ps512(a), b, 1)
...
...
@@ -142,7 +146,9 @@
#define vec_set(a, b, c, d, e, f, g, h) _mm256_set_ps(h, g, f, e, d, c, b, a)
#define vec_dbl_set(a, b, c, d) _mm256_set_pd(d, c, b, a)
#define vec_add(a, b) _mm256_add_ps(a, b)
#define vec_mask_add(a, b, mask) vec_add(a, vec_and(b,mask))
#define vec_sub(a, b) _mm256_sub_ps(a, b)
#define vec_mask_sub(a, b, mask) vec_sub(a, vec_and(b,mask))
#define vec_mul(a, b) _mm256_mul_ps(a, b)
#define vec_sqrt(a) _mm256_sqrt_ps(a)
#define vec_rcp(a) _mm256_rcp_ps(a)
...
...
@@ -158,6 +164,7 @@
#define vec_cmp_gte(a, b) _mm256_cmp_ps(a, b, _CMP_GE_OQ)
#define vec_cmp_result(a) _mm256_movemask_ps(a)
#define vec_and(a, b) _mm256_and_ps(a, b)
#define vec_mask_and(a, b) _mm256_and_ps(a, b)
#define vec_todbl_lo(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 0))
#define vec_todbl_hi(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 1))
#define vec_dbl_tofloat(a, b) _mm256_insertf128(_mm256_castps128_ps256(a), b, 1)
...
...
@@ -346,6 +353,13 @@ typedef union {
int
i
[
VEC_SIZE
];
}
vector
;
/* Define the mask type depending on the instruction set used. */
#ifdef HAVE_AVX512_F
typedef
__mmask16
mask_t
;
#else
typedef
VEC_FLOAT
mask_t
;
#endif
/**
* @brief Calculates the inverse ($1/x$) of a vector using intrinsics and a
* Newton iteration to obtain the correct level of accuracy.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment