Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
06f2a50a
Commit
06f2a50a
authored
Dec 13, 2016
by
James Willis
Browse files
Use c2_cache to store interactions and perform HADD on each pi in the first loop.
parent
b7cd52ae
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/runner_doiact_vec.c
View file @
06f2a50a
...
...
@@ -878,14 +878,16 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
#ifdef WITH_VECTORIZATION
const
struct
engine
*
restrict
e
=
r
->
e
;
#ifdef WITH_VECTORIZATION
int
icount
=
0
;
int
icount
=
0
;
//, icount_align = 0;
struct
c2_cache
int_cache
;
vector
v_hi
,
v_vix
,
v_viy
,
v_viz
,
v_hig2
;
//, v_r2;
float
r2q
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hiq
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hjq
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
dxq
[
3
*
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
struct
part
*
piq
[
VEC_SIZE
],
*
pjq
[
VEC_SIZE
];
#endif
TIMER_TIC
;
...
...
@@ -939,6 +941,40 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
for
(
int
k
=
0
;
k
<
3
;
k
++
)
pix
[
k
]
=
pi
->
x
[
k
]
-
shift
[
k
];
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
//vector pix, piy, piz;
//const float hi = cell_cache->h[pid];
/* Fill particle pi vectors. */
//pix.v = vec_set1((float)(pi->x[0] - ci->loc[0] - shift[0]));
//piy.v = vec_set1((float)(pi->x[1] - ci->loc[1] - shift[1]));
//piz.v = vec_set1((float)(pi->x[2] - ci->loc[2] - shift[2]));
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
pi
->
v
[
0
]);
v_viy
.
v
=
vec_set1
(
pi
->
v
[
1
]);
v_viz
.
v
=
vec_set1
(
pi
->
v
[
2
]);
//const float hig2 = hi * hi * kernel_gamma2;
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
vector
rhoSum
,
rho_dhSum
,
wcountSum
,
wcount_dhSum
,
div_vSum
,
curlvxSum
,
curlvySum
,
curlvzSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
v_hi_inv
=
vec_reciprocal
(
v_hi
);
rhoSum
.
v
=
vec_setzero
();
rho_dhSum
.
v
=
vec_setzero
();
wcountSum
.
v
=
vec_setzero
();
wcount_dhSum
.
v
=
vec_setzero
();
div_vSum
.
v
=
vec_setzero
();
curlvxSum
.
v
=
vec_setzero
();
curlvySum
.
v
=
vec_setzero
();
curlvzSum
.
v
=
vec_setzero
();
/* Loop over the parts in cj. */
for
(
int
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
<
di
;
pjd
++
)
{
...
...
@@ -963,19 +999,48 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
#else
/* Add this interaction to the queue. */
r2q
[
icount
]
=
r2
;
dxq
[
3
*
icount
+
0
]
=
dx
[
0
];
dxq
[
3
*
icount
+
1
]
=
dx
[
1
];
dxq
[
3
*
icount
+
2
]
=
dx
[
2
];
hiq
[
icount
]
=
hi
;
int_cache
.
r2q
[
icount
]
=
r2
;
int_cache
.
dxq
[
icount
]
=
dx
[
0
];
int_cache
.
dyq
[
icount
]
=
dx
[
1
];
int_cache
.
dzq
[
icount
]
=
dx
[
2
];
int_cache
.
mq
[
icount
]
=
pj
->
mass
;
int_cache
.
vxq
[
icount
]
=
pj
->
v
[
0
];
int_cache
.
vyq
[
icount
]
=
pj
->
v
[
1
];
int_cache
.
vzq
[
icount
]
=
pj
->
v
[
2
];
hjq
[
icount
]
=
pj
->
h
;
piq
[
icount
]
=
pi
;
pjq
[
icount
]
=
pj
;
icount
+=
1
;
/* Flush? */
if
(
icount
==
VEC_SIZE
)
{
runner_iact_nonsym_vec_density
(
r2q
,
dxq
,
hiq
,
hjq
,
piq
,
pjq
);
#ifdef HAVE_AVX512_F
KNL_MASK_16
knl_mask
,
knl_mask2
;
#endif
vector
int_mask
,
int_mask2
;
#ifdef HAVE_AVX512_F
knl_mask
=
0xFFFF
;
knl_mask2
=
0xFFFF
;
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#else
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#endif
runner_iact_nonsym_1_vec_density
(
&
int_cache
.
r2q
[
0
],
&
int_cache
.
dxq
[
0
],
&
int_cache
.
dyq
[
0
],
&
int_cache
.
dzq
[
0
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
.
vxq
[
0
],
&
int_cache
.
vyq
[
0
],
&
int_cache
.
vzq
[
0
],
&
int_cache
.
mq
[
0
],
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
int_mask
,
int_mask2
,
#ifdef HAVE_AVX512_F
knl_mask
,
knl_mask2
);
#else
0
,
0
);
#endif
icount
=
0
;
}
...
...
@@ -984,6 +1049,29 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
}
/* loop over the parts in cj. */
/* Perform horizontal adds on vector sums and store result in particle pi.
*/
VEC_HADD
(
rhoSum
,
pi
->
rho
);
VEC_HADD
(
rho_dhSum
,
pi
->
density
.
rho_dh
);
VEC_HADD
(
wcountSum
,
pi
->
density
.
wcount
);
VEC_HADD
(
wcount_dhSum
,
pi
->
density
.
wcount_dh
);
VEC_HADD
(
div_vSum
,
pi
->
density
.
div_v
);
VEC_HADD
(
curlvxSum
,
pi
->
density
.
rot_v
[
0
]);
VEC_HADD
(
curlvySum
,
pi
->
density
.
rot_v
[
1
]);
VEC_HADD
(
curlvzSum
,
pi
->
density
.
rot_v
[
2
]);
if
(
icount
>
0
)
{
for
(
int
k
=
0
;
k
<
icount
;
k
++
)
{
float
dxq2
[
3
];
dxq2
[
0
]
=
int_cache
.
dxq
[
k
];
dxq2
[
1
]
=
int_cache
.
dyq
[
k
];
dxq2
[
2
]
=
int_cache
.
dzq
[
k
];
runner_iact_nonsym_density
(
int_cache
.
r2q
[
k
],
&
dxq2
[
3
*
k
],
hi
,
hjq
[
k
],
pi
,
pjq
[
k
]);
}
}
icount
=
0
;
}
/* loop over the parts in ci. */
/* Loop over the parts in cj. */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment