Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
f194e08b
Commit
f194e08b
authored
8 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Use mask_t in runner_doself2_force_vec.
parent
caea34d3
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!406
Doself2 vectorisation
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/hydro/Gadget2/hydro_iact.h
+14
-14
14 additions, 14 deletions
src/hydro/Gadget2/hydro_iact.h
src/runner_doiact_vec.c
+10
-34
10 additions, 34 deletions
src/runner_doiact_vec.c
src/vector.h
+9
-7
9 additions, 7 deletions
src/vector.h
with
33 additions
and
55 deletions
src/hydro/Gadget2/hydro_iact.h
+
14
−
14
View file @
f194e08b
...
...
@@ -1142,7 +1142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
#ifdef WITH_VECTORIZATION
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_1_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
*
vix
,
vector
*
viy
,
vector
*
viz
,
vector
*
pirho
,
vector
*
grad_hi
,
vector
*
piPOrho2
,
vector
*
balsara_i
,
vector
*
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
*
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
vector
mask
)
{
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
)
{
#ifdef WITH_VECTORIZATION
...
...
@@ -1372,7 +1372,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_1_vec_force
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_2_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
*
vix
,
vector
*
viy
,
vector
*
viz
,
vector
*
pirho
,
vector
*
grad_hi
,
vector
*
piPOrho2
,
vector
*
balsara_i
,
vector
*
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
*
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
vector
mask
,
vector
mask_2
)
{
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
,
mask_t
mask_2
)
{
#ifdef WITH_VECTORIZATION
...
...
@@ -1527,18 +1527,18 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_2_vec_force
entropy_dt_2
.
v
=
mj_2
.
v
*
visc_term_2
.
v
*
dvdr_2
.
v
;
/* Store the forces back on the particles. */
a_hydro_xSum
->
v
-
=
vec_
and
(
piax
.
v
,
mask
.
v
);
a_hydro_xSum
->
v
-
=
vec_
and
(
piax_2
.
v
,
mask_2
.
v
);
a_hydro_ySum
->
v
-
=
vec_
and
(
piay
.
v
,
mask
.
v
);
a_hydro_ySum
->
v
-
=
vec_
and
(
piay_2
.
v
,
mask_2
.
v
);
a_hydro_zSum
->
v
-
=
vec_
and
(
piaz
.
v
,
mask
.
v
);
a_hydro_zSum
->
v
-
=
vec_
and
(
piaz_2
.
v
,
mask_2
.
v
);
h_dtSum
->
v
-
=
vec_
and
(
pih_dt
.
v
,
mask
.
v
);
h_dtSum
->
v
-
=
vec_
and
(
pih_dt_2
.
v
,
mask_2
.
v
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
(
v_sig
.
v
,
mask
.
v
));
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
(
v_sig_2
.
v
,
mask_2
.
v
));
entropy_dtSum
->
v
+
=
vec_
and
(
entropy_dt
.
v
,
mask
.
v
);
entropy_dtSum
->
v
+
=
vec_
and
(
entropy_dt_2
.
v
,
mask_2
.
v
);
a_hydro_xSum
->
v
=
vec_
mask_sub
(
a_hydro_xSum
->
v
,
piax
.
v
,
mask
);
a_hydro_xSum
->
v
=
vec_
mask_sub
(
a_hydro_xSum
->
v
,
piax_2
.
v
,
mask_2
);
a_hydro_ySum
->
v
=
vec_
mask_sub
(
a_hydro_ySum
->
v
,
piay
.
v
,
mask
);
a_hydro_ySum
->
v
=
vec_
mask_sub
(
a_hydro_ySum
->
v
,
piay_2
.
v
,
mask_2
);
a_hydro_zSum
->
v
=
vec_
mask_sub
(
a_hydro_zSum
->
v
,
piaz
.
v
,
mask
);
a_hydro_zSum
->
v
=
vec_
mask_sub
(
a_hydro_zSum
->
v
,
piaz_2
.
v
,
mask_2
);
h_dtSum
->
v
=
vec_
mask_sub
(
h_dtSum
->
v
,
pih_dt
.
v
,
mask
);
h_dtSum
->
v
=
vec_
mask_sub
(
h_dtSum
->
v
,
pih_dt_2
.
v
,
mask_2
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
_mask
(
v_sig
,
mask
));
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
_mask
(
v_sig_2
,
mask_2
));
entropy_dtSum
->
v
=
vec_
mask_add
(
entropy_dtSum
->
v
,
entropy_dt
.
v
,
mask
);
entropy_dtSum
->
v
=
vec_
mask_add
(
entropy_dtSum
->
v
,
entropy_dt_2
.
v
,
mask_2
);
#else
...
...
This diff is collapsed.
Click to expand it.
src/runner_doiact_vec.c
+
10
−
34
View file @
f194e08b
...
...
@@ -276,10 +276,7 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
vector
*
v_rhoi
,
vector
*
v_grad_hi
,
vector
*
v_pOrhoi2
,
vector
*
v_balsara_i
,
vector
*
v_ci
,
int
*
icount_align
,
int
num_vec_proc
)
{
#ifdef HAVE_AVX512_F
KNL_MASK_16
knl_mask
,
knl_mask2
;
#endif
vector
int_mask
,
int_mask2
;
mask_t
int_mask
,
int_mask2
;
/* Work out the number of remainder interactions and pad secondary cache. */
*
icount_align
=
icount
;
...
...
@@ -288,16 +285,10 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
*
icount_align
+=
pad
;
/* Initialise masks to true. */
#ifdef HAVE_AVX512_F
knl_mask
=
0xFFFF
;
knl_mask2
=
0xFFFF
;
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#else
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#endif
/* Initialise masks to true. */
vec_init_mask
(
int_mask
);
vec_init_mask
(
int_mask2
);
/* Pad secondary cache so that there are no contributions in the interaction
* function. */
for
(
int
i
=
icount
;
i
<
*
icount_align
;
i
++
)
{
...
...
@@ -319,19 +310,10 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
/* Zero parts of mask that represent the padded values.*/
if
(
pad
<
VEC_SIZE
)
{
#ifdef HAVE_AVX512_F
knl_mask2
=
knl_mask2
>>
pad
;
#else
for
(
int
i
=
VEC_SIZE
-
pad
;
i
<
VEC_SIZE
;
i
++
)
int_mask2
.
i
[
i
]
=
0
;
#endif
vec_pad_mask
(
int_mask2
,
pad
);
}
else
{
#ifdef HAVE_AVX512_F
knl_mask
=
knl_mask
>>
(
VEC_SIZE
-
rem
);
knl_mask2
=
0
;
#else
for
(
int
i
=
rem
;
i
<
VEC_SIZE
;
i
++
)
int_mask
.
i
[
i
]
=
0
;
int_mask2
.
v
=
vec_setzero
();
#endif
vec_pad_mask
(
int_mask
,
VEC_SIZE
-
rem
);
vec_zero_mask
(
int_mask2
);
}
/* Perform remainder interaction and remove remainder from aligned
...
...
@@ -341,12 +323,7 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
runner_iact_nonsym_2_vec_force
(
&
int_cache
->
r2q
[
*
icount_align
],
&
int_cache
->
dxq
[
*
icount_align
],
&
int_cache
->
dyq
[
*
icount_align
],
&
int_cache
->
dzq
[
*
icount_align
],
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
int_cache
->
vxq
[
*
icount_align
],
&
int_cache
->
vyq
[
*
icount_align
],
&
int_cache
->
vzq
[
*
icount_align
],
&
int_cache
->
rhoq
[
*
icount_align
],
&
int_cache
->
grad_hq
[
*
icount_align
],
&
int_cache
->
pOrho2q
[
*
icount_align
],
&
int_cache
->
balsaraq
[
*
icount_align
],
&
int_cache
->
soundspeedq
[
*
icount_align
],
&
int_cache
->
mq
[
*
icount_align
],
v_hi_inv
,
&
int_cache
->
h_invq
[
*
icount_align
],
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
int_mask
,
int_mask2
#ifdef HAVE_AVX512_F
,
knl_mask
,
knl_mask2
);
#else
);
#endif
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
int_mask
,
int_mask2
);
}
}
...
...
@@ -992,8 +969,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
VEC_HADD
(
a_hydro_zSum
,
pi
->
a_hydro
[
2
]);
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
for
(
int
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
pi
->
force
.
v_sig
=
max
(
pi
->
force
.
v_sig
,
v_sigSum
.
f
[
k
]);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
/* Reset interaction count. */
...
...
This diff is collapsed.
Click to expand it.
src/vector.h
+
9
−
7
View file @
f194e08b
...
...
@@ -81,6 +81,7 @@
#define vec_form_int_mask(a) a
#define vec_and(a, b) _mm512_and_ps(a, b)
#define vec_mask_and(a, b) a & b
#define vec_and_mask(a, mask) _mm512_maskz_expand_ps(mask, a)
#define vec_init_mask(mask) mask = 0xFFFF
#define vec_zero_mask(mask) mask = 0
#define vec_create_mask(mask, cond) mask = cond
...
...
@@ -130,6 +131,9 @@
#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \
pack += __builtin_popcount(mask);
/* Finds the horizontal maximum of vector b and returns a float. */
#define VEC_HMAX(a, b) a = _mm512_reduce_max_ps(b.v)
/* Performs a left-pack on a vector based upon a mask and returns the result. */
#define VEC_LEFT_PACK(a, mask, result) \
_mm512_mask_compressstoreu_ps(result, mask, a)
...
...
@@ -171,6 +175,7 @@
#define vec_form_int_mask(a) _mm256_movemask_ps(a.v)
#define vec_and(a, b) _mm256_and_ps(a, b)
#define vec_mask_and(a, b) _mm256_and_ps(a.v, b.v)
#define vec_and_mask(a, mask) vec_mask_and(a, mask)
#define vec_init_mask(mask) mask.m = vec_setint1(0xFFFFFFFF)
#define vec_create_mask(mask, cond) mask.v = cond
#define vec_zero_mask(mask) mask.v = vec_setzero()
...
...
@@ -201,13 +206,10 @@
b += a.f[0] + a.f[4];
/* Performs a horizontal maximum on the vector and takes the maximum of the result with a float, b. */
#define VEC_HMAX(a, b) \
{ \
__m256 y = _mm256_permute2f128_ps(a.v, a.v, 1);
/* Permute 128-bit values, y = [a.high, a.low] */
\
__m256 m1 = _mm256_max_ps(a.v, y);
/* m1[0] = max(x[0], x[3]), m1[1] = max(x[1], x[4]), etc. */
\
__m256 m2 = _mm256_permute_ps(m1, 177);
/* Set m2[0] = m1[1], m2[1] = m1[0], m2[2] = m1[3] etc. */
\
__m256 m = _mm256_max_ps(m1, m2);
/* m[0] and m[7] contain maximums of each part of vector. */
\
b = fmaxf(fmaxf(b,m[0]),m[7]); \
#define VEC_HMAX(a, b) \
{ \
for(int k=0; k<VEC_SIZE; k++) \
b = max(b, a.f[k]); \
}
/* Returns the lower 128-bits of the 256-bit vector. */
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment