Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
f194e08b
Commit
f194e08b
authored
May 21, 2017
by
James Willis
Browse files
Use mask_t in runner_doself2_force_vec.
parent
caea34d3
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/hydro/Gadget2/hydro_iact.h
View file @
f194e08b
...
...
@@ -1142,7 +1142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
#ifdef WITH_VECTORIZATION
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_1_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
*
vix
,
vector
*
viy
,
vector
*
viz
,
vector
*
pirho
,
vector
*
grad_hi
,
vector
*
piPOrho2
,
vector
*
balsara_i
,
vector
*
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
*
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
vector
mask
)
{
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
)
{
#ifdef WITH_VECTORIZATION
...
...
@@ -1372,7 +1372,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_1_vec_force
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_2_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
*
vix
,
vector
*
viy
,
vector
*
viz
,
vector
*
pirho
,
vector
*
grad_hi
,
vector
*
piPOrho2
,
vector
*
balsara_i
,
vector
*
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
*
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
vector
mask
,
vector
mask_2
)
{
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
,
mask_t
mask_2
)
{
#ifdef WITH_VECTORIZATION
...
...
@@ -1527,18 +1527,18 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_2_vec_force
entropy_dt_2
.
v
=
mj_2
.
v
*
visc_term_2
.
v
*
dvdr_2
.
v
;
/* Store the forces back on the particles. */
a_hydro_xSum
->
v
-
=
vec_
and
(
piax
.
v
,
mask
.
v
);
a_hydro_xSum
->
v
-
=
vec_
and
(
piax_2
.
v
,
mask_2
.
v
);
a_hydro_ySum
->
v
-
=
vec_
and
(
piay
.
v
,
mask
.
v
);
a_hydro_ySum
->
v
-
=
vec_
and
(
piay_2
.
v
,
mask_2
.
v
);
a_hydro_zSum
->
v
-
=
vec_
and
(
piaz
.
v
,
mask
.
v
);
a_hydro_zSum
->
v
-
=
vec_
and
(
piaz_2
.
v
,
mask_2
.
v
);
h_dtSum
->
v
-
=
vec_
and
(
pih_dt
.
v
,
mask
.
v
);
h_dtSum
->
v
-
=
vec_
and
(
pih_dt_2
.
v
,
mask_2
.
v
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
(
v_sig
.
v
,
mask
.
v
));
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
(
v_sig_2
.
v
,
mask_2
.
v
));
entropy_dtSum
->
v
+
=
vec_
and
(
entropy_dt
.
v
,
mask
.
v
);
entropy_dtSum
->
v
+
=
vec_
and
(
entropy_dt_2
.
v
,
mask_2
.
v
);
a_hydro_xSum
->
v
=
vec_
mask_sub
(
a_hydro_xSum
->
v
,
piax
.
v
,
mask
);
a_hydro_xSum
->
v
=
vec_
mask_sub
(
a_hydro_xSum
->
v
,
piax_2
.
v
,
mask_2
);
a_hydro_ySum
->
v
=
vec_
mask_sub
(
a_hydro_ySum
->
v
,
piay
.
v
,
mask
);
a_hydro_ySum
->
v
=
vec_
mask_sub
(
a_hydro_ySum
->
v
,
piay_2
.
v
,
mask_2
);
a_hydro_zSum
->
v
=
vec_
mask_sub
(
a_hydro_zSum
->
v
,
piaz
.
v
,
mask
);
a_hydro_zSum
->
v
=
vec_
mask_sub
(
a_hydro_zSum
->
v
,
piaz_2
.
v
,
mask_2
);
h_dtSum
->
v
=
vec_
mask_sub
(
h_dtSum
->
v
,
pih_dt
.
v
,
mask
);
h_dtSum
->
v
=
vec_
mask_sub
(
h_dtSum
->
v
,
pih_dt_2
.
v
,
mask_2
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
_mask
(
v_sig
,
mask
));
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and
_mask
(
v_sig_2
,
mask_2
));
entropy_dtSum
->
v
=
vec_
mask_add
(
entropy_dtSum
->
v
,
entropy_dt
.
v
,
mask
);
entropy_dtSum
->
v
=
vec_
mask_add
(
entropy_dtSum
->
v
,
entropy_dt_2
.
v
,
mask_2
);
#else
...
...
src/runner_doiact_vec.c
View file @
f194e08b
...
...
@@ -276,10 +276,7 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
vector
*
v_rhoi
,
vector
*
v_grad_hi
,
vector
*
v_pOrhoi2
,
vector
*
v_balsara_i
,
vector
*
v_ci
,
int
*
icount_align
,
int
num_vec_proc
)
{
#ifdef HAVE_AVX512_F
KNL_MASK_16
knl_mask
,
knl_mask2
;
#endif
vector
int_mask
,
int_mask2
;
mask_t
int_mask
,
int_mask2
;
/* Work out the number of remainder interactions and pad secondary cache. */
*
icount_align
=
icount
;
...
...
@@ -288,16 +285,10 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
*
icount_align
+=
pad
;
/* Initialise masks to true. */
#ifdef HAVE_AVX512_F
knl_mask
=
0xFFFF
;
knl_mask2
=
0xFFFF
;
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#else
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#endif
/* Initialise masks to true. */
vec_init_mask
(
int_mask
);
vec_init_mask
(
int_mask2
);
/* Pad secondary cache so that there are no contributions in the interaction
* function. */
for
(
int
i
=
icount
;
i
<
*
icount_align
;
i
++
)
{
...
...
@@ -319,19 +310,10 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
/* Zero parts of mask that represent the padded values.*/
if
(
pad
<
VEC_SIZE
)
{
#ifdef HAVE_AVX512_F
knl_mask2
=
knl_mask2
>>
pad
;
#else
for
(
int
i
=
VEC_SIZE
-
pad
;
i
<
VEC_SIZE
;
i
++
)
int_mask2
.
i
[
i
]
=
0
;
#endif
vec_pad_mask
(
int_mask2
,
pad
);
}
else
{
#ifdef HAVE_AVX512_F
knl_mask
=
knl_mask
>>
(
VEC_SIZE
-
rem
);
knl_mask2
=
0
;
#else
for
(
int
i
=
rem
;
i
<
VEC_SIZE
;
i
++
)
int_mask
.
i
[
i
]
=
0
;
int_mask2
.
v
=
vec_setzero
();
#endif
vec_pad_mask
(
int_mask
,
VEC_SIZE
-
rem
);
vec_zero_mask
(
int_mask2
);
}
/* Perform remainder interaction and remove remainder from aligned
...
...
@@ -341,12 +323,7 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
runner_iact_nonsym_2_vec_force
(
&
int_cache
->
r2q
[
*
icount_align
],
&
int_cache
->
dxq
[
*
icount_align
],
&
int_cache
->
dyq
[
*
icount_align
],
&
int_cache
->
dzq
[
*
icount_align
],
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
int_cache
->
vxq
[
*
icount_align
],
&
int_cache
->
vyq
[
*
icount_align
],
&
int_cache
->
vzq
[
*
icount_align
],
&
int_cache
->
rhoq
[
*
icount_align
],
&
int_cache
->
grad_hq
[
*
icount_align
],
&
int_cache
->
pOrho2q
[
*
icount_align
],
&
int_cache
->
balsaraq
[
*
icount_align
],
&
int_cache
->
soundspeedq
[
*
icount_align
],
&
int_cache
->
mq
[
*
icount_align
],
v_hi_inv
,
&
int_cache
->
h_invq
[
*
icount_align
],
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
int_mask
,
int_mask2
#ifdef HAVE_AVX512_F
,
knl_mask
,
knl_mask2
);
#else
);
#endif
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
int_mask
,
int_mask2
);
}
}
...
...
@@ -992,8 +969,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
VEC_HADD
(
a_hydro_zSum
,
pi
->
a_hydro
[
2
]);
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
for
(
int
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
pi
->
force
.
v_sig
=
max
(
pi
->
force
.
v_sig
,
v_sigSum
.
f
[
k
]);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
/* Reset interaction count. */
...
...
src/vector.h
View file @
f194e08b
...
...
@@ -81,6 +81,7 @@
#define vec_form_int_mask(a) a
#define vec_and(a, b) _mm512_and_ps(a, b)
#define vec_mask_and(a, b) a & b
#define vec_and_mask(a, mask) _mm512_maskz_expand_ps(mask, a)
#define vec_init_mask(mask) mask = 0xFFFF
#define vec_zero_mask(mask) mask = 0
#define vec_create_mask(mask, cond) mask = cond
...
...
@@ -130,6 +131,9 @@
#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \
pack += __builtin_popcount(mask);
/* Finds the horizontal maximum of vector b and returns a float. */
#define VEC_HMAX(a, b) a = _mm512_reduce_max_ps(b.v)
/* Performs a left-pack on a vector based upon a mask and returns the result. */
#define VEC_LEFT_PACK(a, mask, result) \
_mm512_mask_compressstoreu_ps(result, mask, a)
...
...
@@ -171,6 +175,7 @@
#define vec_form_int_mask(a) _mm256_movemask_ps(a.v)
#define vec_and(a, b) _mm256_and_ps(a, b)
#define vec_mask_and(a, b) _mm256_and_ps(a.v, b.v)
#define vec_and_mask(a, mask) vec_mask_and(a, mask)
#define vec_init_mask(mask) mask.m = vec_setint1(0xFFFFFFFF)
#define vec_create_mask(mask, cond) mask.v = cond
#define vec_zero_mask(mask) mask.v = vec_setzero()
...
...
@@ -201,13 +206,10 @@
b += a.f[0] + a.f[4];
/* Performs a horizontal maximum on the vector and takes the maximum of the result with a float, b. */
#define VEC_HMAX(a, b) \
{ \
__m256 y = _mm256_permute2f128_ps(a.v, a.v, 1);
/* Permute 128-bit values, y = [a.high, a.low] */
\
__m256 m1 = _mm256_max_ps(a.v, y);
/* m1[0] = max(x[0], x[3]), m1[1] = max(x[1], x[4]), etc. */
\
__m256 m2 = _mm256_permute_ps(m1, 177);
/* Set m2[0] = m1[1], m2[1] = m1[0], m2[2] = m1[3] etc. */
\
__m256 m = _mm256_max_ps(m1, m2);
/* m[0] and m[7] contain maximums of each part of vector. */
\
b = fmaxf(fmaxf(b,m[0]),m[7]); \
#define VEC_HMAX(a, b) \
{ \
for(int k=0; k<VEC_SIZE; k++) \
b = max(b, a.f[k]); \
}
/* Returns the lower 128-bits of the 256-bit vector. */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment