Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
686314cf
Commit
686314cf
authored
8 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Perform interaction with interleaved vectors.
parent
a1b7d8e6
No related branches found
No related tags found
1 merge request
!406
Doself2 vectorisation
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/runner_doiact_vec.c
+16
-35
16 additions, 35 deletions
src/runner_doiact_vec.c
with
16 additions
and
35 deletions
src/runner_doiact_vec.c
+
16
−
35
View file @
686314cf
...
...
@@ -322,18 +322,18 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
,
vector
v_rhoi
,
vector
v_grad_hi
,
vector
v_pOrhoi2
,
vector
v_balsara_i
,
vector
v_ci
,
int
*
icount_align
)
{
int
*
icount_align
,
int
num_vec_proc
)
{
#ifdef HAVE_AVX512_F
KNL_MASK_16
knl_mask
,
knl_mask2
;
#endif
vector
int_mask
;
//
, int_mask2;
vector
int_mask
,
int_mask2
;
/* Work out the number of remainder interactions and pad secondary cache. */
*
icount_align
=
icount
;
int
rem
=
icount
%
(
1
*
VEC_SIZE
);
int
rem
=
icount
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
int
pad
=
(
1
*
VEC_SIZE
)
-
rem
;
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
*
icount_align
+=
pad
;
/* Initialise masks to true. */
...
...
@@ -344,7 +344,7 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#else
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
//
int_mask2.m = vec_setint1(0xFFFFFFFF);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#endif
/* Pad secondary cache so that there are no contributions in the interaction
* function. */
...
...
@@ -370,16 +370,15 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
#ifdef HAVE_AVX512_F
knl_mask2
=
knl_mask2
>>
pad
;
#else
for
(
int
i
=
VEC_SIZE
-
pad
;
i
<
VEC_SIZE
;
i
++
)
int_mask
.
i
[
i
]
=
0
;
for
(
int
i
=
VEC_SIZE
-
pad
;
i
<
VEC_SIZE
;
i
++
)
int_mask
2
.
i
[
i
]
=
0
;
#endif
}
else
{
#ifdef HAVE_AVX512_F
knl_mask
=
knl_mask
>>
(
VEC_SIZE
-
rem
);
knl_mask2
=
0
;
#else
error
(
"Pad should not be greater than VEC_SIZE."
);
for
(
int
i
=
rem
;
i
<
VEC_SIZE
;
i
++
)
int_mask
.
i
[
i
]
=
0
;
//
int_mask2.v = vec_setzero();
int_mask2
.
v
=
vec_setzero
();
#endif
}
...
...
@@ -387,28 +386,10 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
* interaction count. */
*
icount_align
=
icount
-
rem
;
vector
r2
,
dx
,
dy
,
dz
;
vector
vjx
,
vjy
,
vjz
,
mj
,
rhoj
,
grad_hj
,
pOrhoj2
,
balsara_j
,
cj
,
hj_inv
;
r2
.
v
=
vec_load
(
&
int_cache
->
r2q
[
*
icount_align
]);
dx
.
v
=
vec_load
(
&
int_cache
->
dxq
[
*
icount_align
]);
dy
.
v
=
vec_load
(
&
int_cache
->
dyq
[
*
icount_align
]);
dz
.
v
=
vec_load
(
&
int_cache
->
dzq
[
*
icount_align
]);
vjx
.
v
=
vec_load
(
&
int_cache
->
vxq
[
*
icount_align
]);
vjy
.
v
=
vec_load
(
&
int_cache
->
vyq
[
*
icount_align
]);
vjz
.
v
=
vec_load
(
&
int_cache
->
vzq
[
*
icount_align
]);
mj
.
v
=
vec_load
(
&
int_cache
->
mq
[
*
icount_align
]);
rhoj
.
v
=
vec_load
(
&
int_cache
->
rhoq
[
*
icount_align
]);
grad_hj
.
v
=
vec_load
(
&
int_cache
->
grad_hq
[
*
icount_align
]);
pOrhoj2
.
v
=
vec_load
(
&
int_cache
->
pOrho2q
[
*
icount_align
]);
balsara_j
.
v
=
vec_load
(
&
int_cache
->
balsaraq
[
*
icount_align
]);
cj
.
v
=
vec_load
(
&
int_cache
->
soundspeedq
[
*
icount_align
]);
hj_inv
.
v
=
vec_load
(
&
int_cache
->
h_invq
[
*
icount_align
]);
runner_iact_nonsym_1_vec_force_2
(
&
r2
,
&
dx
,
&
dy
,
&
dz
,
&
v_vix
,
&
v_viy
,
&
v_viz
,
&
v_rhoi
,
&
v_grad_hi
,
&
v_pOrhoi2
,
&
v_balsara_i
,
&
v_ci
,
&
vjx
,
&
vjy
,
&
vjz
,
&
rhoj
,
&
grad_hj
,
&
pOrhoj2
,
&
balsara_j
,
&
cj
,
&
mj
,
v_hi_inv
,
hj_inv
,
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
int_mask
runner_iact_nonsym_2_vec_force
(
&
int_cache
->
r2q
[
*
icount_align
],
&
int_cache
->
dxq
[
*
icount_align
],
&
int_cache
->
dyq
[
*
icount_align
],
&
int_cache
->
dzq
[
*
icount_align
],
&
v_vix
,
&
v_viy
,
&
v_viz
,
&
v_rhoi
,
&
v_grad_hi
,
&
v_pOrhoi2
,
&
v_balsara_i
,
&
v_ci
,
&
int_cache
->
vxq
[
*
icount_align
],
&
int_cache
->
vyq
[
*
icount_align
],
&
int_cache
->
vzq
[
*
icount_align
],
&
int_cache
->
rhoq
[
*
icount_align
],
&
int_cache
->
grad_hq
[
*
icount_align
],
&
int_cache
->
pOrho2q
[
*
icount_align
],
&
int_cache
->
balsaraq
[
*
icount_align
],
&
int_cache
->
soundspeedq
[
*
icount_align
],
&
int_cache
->
mq
[
*
icount_align
],
v_hi_inv
,
&
int_cache
->
h_invq
[
*
icount_align
],
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
int_mask
,
int_mask2
#ifdef HAVE_AVX512_F
,
knl_mask
,
knl_mask2
);
#else
...
...
@@ -547,7 +528,7 @@ __attribute__((always_inline)) INLINE static void storeForceInteractions(
calcRemForceInteractions
(
int_cache
,
*
icount
,
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
*
v_rhoi
,
*
v_grad_hi
,
*
v_pOrhoi2
,
*
v_balsara_i
,
*
v_ci
,
&
icount_align
);
&
icount_align
,
1
);
vector
int_mask
;
//, int_mask2;
...
...
@@ -1991,12 +1972,12 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
calcRemForceInteractions
(
&
int_cache
,
icount
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
icount_align
);
&
icount_align
,
2
);
//printFloatVector(a_hydro_xSum,"a_hydro_xSum after rem",8);
/* Initialise masks to true in case remainder interactions have been
* performed. */
vector
int_mask
;
//
, int_mask2;
vector
int_mask
,
int_mask2
;
#ifdef HAVE_AVX512_F
KNL_MASK_16
knl_mask
=
0xFFFF
;
KNL_MASK_16
knl_mask2
=
0xFFFF
;
...
...
@@ -2008,12 +1989,12 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
#endif
/* Perform interaction with 2 vectors. */
for
(
int
pjd
=
0
;
pjd
<
icount_align
;
pjd
+=
(
1
*
VEC_SIZE
))
{
for
(
int
pjd
=
0
;
pjd
<
icount_align
;
pjd
+=
(
2
*
VEC_SIZE
))
{
runner_iact_nonsym_2_vec_force
(
&
int_cache
.
r2q
[
pjd
],
&
int_cache
.
dxq
[
pjd
],
&
int_cache
.
dyq
[
pjd
],
&
int_cache
.
dzq
[
pjd
],
&
v_vix
,
&
v_viy
,
&
v_viz
,
&
v_rhoi
,
&
v_grad_hi
,
&
v_pOrhoi2
,
&
v_balsara_i
,
&
v_ci
,
&
int_cache
.
vxq
[
pjd
],
&
int_cache
.
vyq
[
pjd
],
&
int_cache
.
vzq
[
pjd
],
&
int_cache
.
rhoq
[
pjd
],
&
int_cache
.
grad_hq
[
pjd
],
&
int_cache
.
pOrho2q
[
pjd
],
&
int_cache
.
balsaraq
[
pjd
],
&
int_cache
.
soundspeedq
[
pjd
],
&
int_cache
.
mq
[
pjd
],
v_hi_inv
,
&
int_cache
.
h_invq
[
pjd
],
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
int_mask
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
int_mask
,
int_mask2
#ifdef HAVE_AVX512_F
knl_mask
,
knl_mask2
);
#else
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment