Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
447bc7a1
Commit
447bc7a1
authored
May 21, 2017
by
James Willis
Browse files
Added mask_t to left-packing with AVX2 and AVX-512 instruction sets.
parent
58e78fbd
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/runner_doiact_vec.c
View file @
447bc7a1
...
...
@@ -152,7 +152,7 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param v_viz #vector of z velocity of pi.
*/
__attribute__
((
always_inline
))
INLINE
static
void
storeInteractions
(
const
shor
t
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
const
in
t
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
vector
*
v_dz
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
...
...
@@ -162,34 +162,20 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
/* Left-pack values needed into the secondary cache using the interaction mask.
*/
#if defined(HAVE_AVX2) || defined(HAVE_AVX512_F)
int
pack
=
0
;
#ifdef HAVE_AVX512_F
pack
+=
__builtin_popcount
(
mask
);
VEC_LEFT_PACK
(
v_r2
->
v
,
mask
,
&
int_cache
->
r2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_dx
->
v
,
mask
,
&
int_cache
->
dxq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dy
->
v
,
mask
,
&
int_cache
->
dyq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dz
->
v
,
mask
,
&
int_cache
->
dzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
m
[
pjd
]),
mask
,
&
int_cache
->
mq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vx
[
pjd
]),
mask
,
&
int_cache
->
vxq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vy
[
pjd
]),
mask
,
&
int_cache
->
vyq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vz
[
pjd
]),
mask
,
&
int_cache
->
vzq
[
*
icount
]);
#else
vector
v_mask
;
VEC_FORM_PACKED_MASK
(
mask
,
v_mask
.
m
,
pack
);
VEC_LEFT_PACK
(
v_r2
->
v
,
v_mask
.
m
,
&
int_cache
->
r2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_dx
->
v
,
v_mask
.
m
,
&
int_cache
->
dxq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dy
->
v
,
v_mask
.
m
,
&
int_cache
->
dyq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dz
->
v
,
v_mask
.
m
,
&
int_cache
->
dzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
m
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
mq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vx
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
vxq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vy
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
vyq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vz
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
vzq
[
*
icount
]);
#endif
/* HAVE_AVX512_F */
(
*
icount
)
+=
pack
;
mask_t
packed_mask
;
VEC_FORM_PACKED_MASK
(
mask
,
packed_mask
);
VEC_LEFT_PACK
(
v_r2
->
v
,
packed_mask
,
&
int_cache
->
r2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_dx
->
v
,
packed_mask
,
&
int_cache
->
dxq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dy
->
v
,
packed_mask
,
&
int_cache
->
dyq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dz
->
v
,
packed_mask
,
&
int_cache
->
dzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
m
[
pjd
]),
packed_mask
,
&
int_cache
->
mq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vx
[
pjd
]),
packed_mask
,
&
int_cache
->
vxq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vy
[
pjd
]),
packed_mask
,
&
int_cache
->
vyq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vz
[
pjd
]),
packed_mask
,
&
int_cache
->
vzq
[
*
icount
]);
/* Increment interaction count by number of bits set in mask. */
(
*
icount
)
+=
__builtin_popcount
(
mask
);
#else
/* Quicker to do it serially in AVX rather than use intrinsics. */
for
(
int
bit_index
=
0
;
bit_index
<
VEC_SIZE
;
bit_index
++
)
{
...
...
@@ -375,51 +361,31 @@ __attribute__((always_inline)) INLINE static void storeForceInteractions(
/* Left-pack values needed into the secondary cache using the interaction mask.
*/
#if defined(HAVE_AVX2) || defined(HAVE_AVX512_F)
int
pack
=
0
;
/* Invert hj. */
vector
v_hj
,
v_hj_inv
;
v_hj
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
v_hj_inv
=
vec_reciprocal
(
v_hj
);
#ifdef HAVE_AVX512_F
pack
+=
__builtin_popcount
(
mask
);
VEC_LEFT_PACK
(
v_r2
->
v
,
mask
,
&
int_cache
->
r2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_dx
->
v
,
mask
,
&
int_cache
->
dxq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dy
->
v
,
mask
,
&
int_cache
->
dyq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dz
->
v
,
mask
,
&
int_cache
->
dzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
m
[
pjd
]),
mask
,
&
int_cache
->
mq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vx
[
pjd
]),
mask
,
&
int_cache
->
vxq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vy
[
pjd
]),
mask
,
&
int_cache
->
vyq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vz
[
pjd
]),
mask
,
&
int_cache
->
vzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
rho
[
pjd
]),
mask
,
&
int_cache
->
rhoq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
grad_h
[
pjd
]),
mask
,
&
int_cache
->
grad_hq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
pOrho2
[
pjd
]),
mask
,
&
int_cache
->
pOrho2q
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
balsara
[
pjd
]),
mask
,
&
int_cache
->
balsaraq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
soundspeed
[
pjd
]),
mask
,
&
int_cache
->
soundspeedq
[
*
icount
]);
VEC_LEFT_PACK
(
v_hj_inv
->
v
,
mask
,
&
int_cache
->
h_invq
[
*
icount
]);
#else
vector
v_mask
;
VEC_FORM_PACKED_MASK
(
mask
,
v_mask
.
m
,
pack
);
VEC_LEFT_PACK
(
v_r2
->
v
,
v_mask
.
m
,
&
int_cache
->
r2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_dx
->
v
,
v_mask
.
m
,
&
int_cache
->
dxq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dy
->
v
,
v_mask
.
m
,
&
int_cache
->
dyq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dz
->
v
,
v_mask
.
m
,
&
int_cache
->
dzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
m
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
mq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vx
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
vxq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vy
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
vyq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vz
[
pjd
]),
v_mask
.
m
,
&
int_cache
->
vzq
[
*
icount
]);
VEC_LEFT_PACK
(
v_rhoj
->
v
,
v_mask
.
m
,
&
int_cache
->
rhoq
[
*
icount
]);
VEC_LEFT_PACK
(
v_grad_hj
->
v
,
v_mask
.
m
,
&
int_cache
->
grad_hq
[
*
icount
]);
VEC_LEFT_PACK
(
v_pOrhoj2
->
v
,
v_mask
.
m
,
&
int_cache
->
pOrho2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_balsara_j
->
v
,
v_mask
.
m
,
&
int_cache
->
balsaraq
[
*
icount
]);
VEC_LEFT_PACK
(
v_cj
->
v
,
v_mask
.
m
,
&
int_cache
->
soundspeedq
[
*
icount
]);
VEC_LEFT_PACK
(
v_hj_inv
->
v
,
v_mask
.
m
,
&
int_cache
->
h_invq
[
*
icount
]);
#endif
/* HAVE_AVX512_F */
(
*
icount
)
+=
pack
;
mask_t
packed_mask
;
VEC_FORM_PACKED_MASK
(
mask
,
packed_mask
);
VEC_LEFT_PACK
(
v_r2
->
v
,
packed_mask
,
&
int_cache
->
r2q
[
*
icount
]);
VEC_LEFT_PACK
(
v_dx
->
v
,
packed_mask
,
&
int_cache
->
dxq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dy
->
v
,
packed_mask
,
&
int_cache
->
dyq
[
*
icount
]);
VEC_LEFT_PACK
(
v_dz
->
v
,
packed_mask
,
&
int_cache
->
dzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
m
[
pjd
]),
packed_mask
,
&
int_cache
->
mq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vx
[
pjd
]),
packed_mask
,
&
int_cache
->
vxq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vy
[
pjd
]),
packed_mask
,
&
int_cache
->
vyq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
vz
[
pjd
]),
packed_mask
,
&
int_cache
->
vzq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
rho
[
pjd
]),
packed_mask
,
&
int_cache
->
rhoq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
grad_h
[
pjd
]),
packed_mask
,
&
int_cache
->
grad_hq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
pOrho2
[
pjd
]),
packed_mask
,
&
int_cache
->
pOrho2q
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
balsara
[
pjd
]),
packed_mask
,
&
int_cache
->
balsaraq
[
*
icount
]);
VEC_LEFT_PACK
(
vec_load
(
&
cell_cache
->
soundspeed
[
pjd
]),
packed_mask
,
&
int_cache
->
soundspeedq
[
*
icount
]);
VEC_LEFT_PACK
(
v_hj_inv
->
v
,
packed_mask
,
&
int_cache
->
h_invq
[
*
icount
]);
/* Increment interaction count by number of bits set in mask. */
(
*
icount
)
+=
__builtin_popcount
(
mask
);
#else
/* Quicker to do it serially in AVX rather than use intrinsics. */
for
(
int
bit_index
=
0
;
bit_index
<
VEC_SIZE
;
bit_index
++
)
{
...
...
src/vector.h
View file @
447bc7a1
...
...
@@ -126,10 +126,10 @@
for (int i = 0; i < VEC_SIZE; i++) b += a.f[i]; \
}
#endif
/* Calculates the number of set bits in the mask and adds the result to an int.
*/
#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \
pack += __builtin_popcount(
mask)
;
/* Do nothing in the case of AVX-512 as there are already
* instructions for left-packing.*/
#define VEC_FORM_PACKED_MASK(mask, packed_
mask)
/* Finds the horizontal maximum of vector b and returns a float. */
#define VEC_HMAX(a, b) a = _mm512_reduce_max_ps(b.v)
...
...
@@ -230,14 +230,13 @@
/* Takes an integer mask and forms a left-packed integer vector
* containing indices of the set bits in the integer mask.
* Also returns the total number of bits set in the mask. */
#define VEC_FORM_PACKED_MASK(mask,
v_mask, pack)
\
#define VEC_FORM_PACKED_MASK(mask,
packed_mask)
\
{ \
unsigned long expanded_mask = _pdep_u64(mask, 0x0101010101010101); \
expanded_mask *= 0xFF; \
unsigned long wanted_indices = _pext_u64(identity_indices, expanded_mask); \
__m128i bytevec = _mm_cvtsi64_si128(wanted_indices); \
v_mask = _mm256_cvtepu8_epi32(bytevec); \
pack += __builtin_popcount(mask); \
packed_mask.m = _mm256_cvtepu8_epi32(bytevec); \
}
/* Performs a left-pack on a vector based upon a mask and returns the result. */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment