Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
b2a5ba65
Commit
b2a5ba65
authored
8 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Tidy masking up so that AVX-512 logic is hidden from user and occurs in vector.h.
parent
aa9064bb
No related branches found
No related tags found
1 merge request
!406
Doself2 vectorisation
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/runner_doiact_vec.c
+16
-27
16 additions, 27 deletions
src/runner_doiact_vec.c
src/vector.h
+14
-0
14 additions, 0 deletions
src/vector.h
with
30 additions
and
27 deletions
src/runner_doiact_vec.c
+
16
−
27
View file @
b2a5ba65
...
...
@@ -176,7 +176,7 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param v_viz #vector of z velocity of pi.
*/
__attribute__
((
always_inline
))
INLINE
static
void
storeInteractions
(
const
in
t
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
const
shor
t
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
vector
*
v_dz
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
...
...
@@ -624,7 +624,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
#ifdef WITH_VECTORIZATION
const
struct
engine
*
e
=
r
->
e
;
int
doi_mask
;
struct
part
*
restrict
pi
;
int
count_align
;
int
num_vec_proc
=
NUM_VEC_PROC
;
...
...
@@ -749,36 +748,26 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_r2
.
v
=
vec_fma
(
v_dz_tmp
.
v
,
v_dz_tmp
.
v
,
v_r2
.
v
);
v_r2_2
.
v
=
vec_fma
(
v_dz_tmp2
.
v
,
v_dz_tmp2
.
v
,
v_r2_2
.
v
);
/* Form a mask from r2 < hig2 and r2 > 0.*/
#ifdef HAVE_AVX512_F
// KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check;
KNL_MASK_16
doi_mask_check
,
doi_mask2
,
doi_mask2_check
;
doi_mask_check
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
doi_mask
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
doi_mask2_check
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
doi_mask2
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
doi_mask
=
doi_mask
&
doi_mask_check
;
doi_mask2
=
doi_mask2
&
doi_mask2_check
;
#else
vector
v_doi_mask
,
v_doi_mask_check
,
v_doi_mask2
,
v_doi_mask2_check
;
int
doi_mask2
;
/* Form a mask from r2 < hig2 and r2 > 0.*/
mask_t
v_doi_mask
,
v_doi_mask_check
,
v_doi_mask2
,
v_doi_mask2_check
;
short
doi_mask
,
doi_mask2
;
/* Form r2 > 0 mask and r2 < hig2 mask. */
v_doi_mask_check
.
v
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
.
v
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
v_doi_mask_check
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
/* Form r2 > 0 mask and r2 < hig2 mask. */
v_doi_mask2_check
.
v
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
v_doi_mask2
.
v
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
v_doi_mask2_check
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
v_doi_mask2
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
/* Combine two masks and form integer mask. */
doi_mask
=
vec_cmp_result
(
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_check
.
v
));
doi_mask2
=
vec_cmp_result
(
vec_and
(
v_doi_mask2
.
v
,
v_doi_mask2_check
.
v
));
#endif
/* HAVE_AVX512_F */
/* Combine the two masks. */
mask_t
doi_mask_combi
,
doi_mask2_combi
;
doi_mask_combi
=
vec_mask_and
(
v_doi_mask
,
v_doi_mask_check
);
doi_mask2_combi
=
vec_mask_and
(
v_doi_mask2
,
v_doi_mask2_check
);
/* Form integer mask. */
doi_mask
=
vec_cmp_result
(
doi_mask_combi
);
doi_mask2
=
vec_cmp_result
(
doi_mask2_combi
);
/* If there are any interactions left pack interaction values into c2
* cache. */
...
...
This diff is collapsed.
Click to expand it.
src/vector.h
+
14
−
0
View file @
b2a5ba65
...
...
@@ -60,7 +60,9 @@
#define vec_dbl_set(a, b, c, d, e, f, g, h) \
_mm512_set_pd(h, g, f, e, d, c, b, a)
#define vec_add(a, b) _mm512_add_ps(a, b)
#define vec_mask_add(a, b, mask) _mm512_mask_add_ps(a, mask, b, a)
#define vec_sub(a, b) _mm512_sub_ps(a, b)
#define vec_mask_sub(a, b, mask) _mm512_mask_sub_ps(a, mask, a, b)
#define vec_mul(a, b) _mm512_mul_ps(a, b)
#define vec_fma(a, b, c) _mm512_fmadd_ps(a, b, c)
#define vec_sqrt(a) _mm512_sqrt_ps(a)
...
...
@@ -75,7 +77,9 @@
#define vec_cmp_lt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ)
#define vec_cmp_lte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ)
#define vec_cmp_gte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ)
#define vec_cmp_result(a) a
#define vec_and(a, b) _mm512_and_ps(a, b)
#define vec_mask_and(a, b) a & b
#define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0))
#define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 1))
#define vec_dbl_tofloat(a, b) _mm512_insertf128(_mm512_castps128_ps512(a), b, 1)
...
...
@@ -142,7 +146,9 @@
#define vec_set(a, b, c, d, e, f, g, h) _mm256_set_ps(h, g, f, e, d, c, b, a)
#define vec_dbl_set(a, b, c, d) _mm256_set_pd(d, c, b, a)
#define vec_add(a, b) _mm256_add_ps(a, b)
#define vec_mask_add(a, b, mask) vec_add(a, vec_and(b,mask))
#define vec_sub(a, b) _mm256_sub_ps(a, b)
#define vec_mask_sub(a, b, mask) vec_sub(a, vec_and(b,mask))
#define vec_mul(a, b) _mm256_mul_ps(a, b)
#define vec_sqrt(a) _mm256_sqrt_ps(a)
#define vec_rcp(a) _mm256_rcp_ps(a)
...
...
@@ -158,6 +164,7 @@
#define vec_cmp_gte(a, b) _mm256_cmp_ps(a, b, _CMP_GE_OQ)
#define vec_cmp_result(a) _mm256_movemask_ps(a)
#define vec_and(a, b) _mm256_and_ps(a, b)
#define vec_mask_and(a, b) _mm256_and_ps(a, b)
#define vec_todbl_lo(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 0))
#define vec_todbl_hi(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 1))
#define vec_dbl_tofloat(a, b) _mm256_insertf128(_mm256_castps128_ps256(a), b, 1)
...
...
@@ -346,6 +353,13 @@ typedef union {
int
i
[
VEC_SIZE
];
}
vector
;
/* Define the mask type depending on the instruction set used. */
#ifdef HAVE_AVX512_F
typedef
__mmask16
mask_t
;
#else
typedef
VEC_FLOAT
mask_t
;
#endif
/**
* @brief Calculates the inverse ($1/x$) of a vector using intrinsics and a
* Newton iteration to obtain the correct level of accuracy.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment