Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
7bd8e8b3
Commit
7bd8e8b3
authored
8 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Removed unused, broken vectorised version of runner_doself1_density.
parent
de4d7b38
No related branches found
No related tags found
1 merge request
!338
Update test kernel
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/runner_doiact_vec.c
+0
-364
0 additions, 364 deletions
src/runner_doiact_vec.c
with
0 additions
and
364 deletions
src/runner_doiact_vec.c
+
0
−
364
View file @
7bd8e8b3
...
...
@@ -604,370 +604,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
#endif
/* WITH_VECTORIZATION */
}
/**
* @brief Compute the cell self-interaction (non-symmetric) using vector
* intrinsics with two particle pis at a time.
*
* CURRENTLY BROKEN DO NOT USE.
*
* @param r The #runner.
* @param c The #cell.
*/
__attribute__
((
always_inline
))
INLINE
void
runner_doself1_density_vec_2
(
struct
runner
*
r
,
struct
cell
*
restrict
c
)
{
#ifdef WITH_VECTORIZATION
const
struct
engine
*
e
=
r
->
e
;
int
doi_mask
;
int
doi2_mask
;
struct
part
*
restrict
pi
;
struct
part
*
restrict
pi2
;
int
count_align
;
vector
v_hi
,
v_vix
,
v_viy
,
v_viz
,
v_hig2
,
v_r2
;
vector
v_hi2
,
v_vix2
,
v_viy2
,
v_viz2
,
v_hig2_2
,
v2_r2
;
TIMER_TIC
if
(
!
cell_is_active
(
c
,
e
))
return
;
if
(
!
cell_is_drifted
(
c
,
e
))
cell_drift_particles
(
c
,
e
);
/* TODO: Need to find two active particles, not just one. */
struct
part
*
restrict
parts
=
c
->
parts
;
const
int
count
=
c
->
count
;
/* Get the particle cache from the runner and re-allocate
* the cache if it is not big enough for the cell. */
struct
cache
*
restrict
cell_cache
=
&
r
->
ci_cache
;
if
(
cell_cache
->
count
<
count
)
{
cache_init
(
cell_cache
,
count
);
}
/* Read the particles from the cell and store them locally in the cache. */
cache_read_particles
(
c
,
&
r
->
ci_cache
);
/* Create two secondary caches. */
int
icount
=
0
,
icount_align
=
0
;
struct
c2_cache
int_cache
;
int
icount2
=
0
,
icount_align2
=
0
;
struct
c2_cache
int_cache2
;
/* Loop over the particles in the cell. */
for
(
int
pid
=
0
;
pid
<
count
;
pid
+=
2
)
{
/* Get a pointer to the ith particle and next i particle. */
pi
=
&
parts
[
pid
];
pi2
=
&
parts
[
pid
+
1
];
/* Is the ith particle active? */
if
(
!
part_is_active
(
pi
,
e
))
continue
;
vector
pix
,
piy
,
piz
;
vector
pix2
,
piy2
,
piz2
;
const
float
hi
=
cell_cache
->
h
[
pid
];
const
float
hi2
=
cell_cache
->
h
[
pid
+
1
];
/* Fill pi position vector. */
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
]);
v_viy
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
]);
v_viz
.
v
=
vec_set1
(
cell_cache
->
vz
[
pid
]);
pix2
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
+
1
]);
piy2
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
+
1
]);
piz2
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
+
1
]);
v_hi2
.
v
=
vec_set1
(
hi2
);
v_vix2
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
+
1
]);
v_viy2
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
+
1
]);
v_viz2
.
v
=
vec_set1
(
cell_cache
->
vz
[
pid
+
1
]);
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
const
float
hig2_2
=
hi2
*
hi2
*
kernel_gamma2
;
v_hig2
.
v
=
vec_set1
(
hig2
);
v_hig2_2
.
v
=
vec_set1
(
hig2_2
);
vector
rhoSum
,
rho_dhSum
,
wcountSum
,
wcount_dhSum
,
div_vSum
,
curlvxSum
,
curlvySum
,
curlvzSum
;
vector
rhoSum2
,
rho_dhSum2
,
wcountSum2
,
wcount_dhSum2
,
div_vSum2
,
curlvxSum2
,
curlvySum2
,
curlvzSum2
;
vector
v_hi_inv
,
v_hi_inv2
;
v_hi_inv
=
vec_reciprocal
(
v_hi
);
v_hi_inv2
=
vec_reciprocal
(
v_hi2
);
rhoSum
.
v
=
vec_setzero
();
rho_dhSum
.
v
=
vec_setzero
();
wcountSum
.
v
=
vec_setzero
();
wcount_dhSum
.
v
=
vec_setzero
();
div_vSum
.
v
=
vec_setzero
();
curlvxSum
.
v
=
vec_setzero
();
curlvySum
.
v
=
vec_setzero
();
curlvzSum
.
v
=
vec_setzero
();
rhoSum2
.
v
=
vec_setzero
();
rho_dhSum2
.
v
=
vec_setzero
();
wcountSum2
.
v
=
vec_setzero
();
wcount_dhSum2
.
v
=
vec_setzero
();
div_vSum2
.
v
=
vec_setzero
();
curlvxSum2
.
v
=
vec_setzero
();
curlvySum2
.
v
=
vec_setzero
();
curlvzSum2
.
v
=
vec_setzero
();
/* Pad cache if there is a serial remainder. */
count_align
=
count
;
int
rem
=
count
%
(
NUM_VEC_PROC
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
int
pad
=
(
NUM_VEC_PROC
*
VEC_SIZE
)
-
rem
;
count_align
+=
pad
;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for
(
int
i
=
count
;
i
<
count_align
;
i
++
)
{
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
}
}
vector
pjx
,
pjy
,
pjz
;
vector
pjvx
,
pjvy
,
pjvz
,
mj
;
vector
pjx2
,
pjy2
,
pjz2
;
vector
pjvx2
,
pjvy2
,
pjvz2
,
mj2
;
/* Find all of particle pi's interacions and store needed values in
* secondary cache.*/
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
NUM_VEC_PROC
*
VEC_SIZE
))
{
/* Load 2 sets of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
pjvx
.
v
=
vec_load
(
&
cell_cache
->
vx
[
pjd
]);
pjvy
.
v
=
vec_load
(
&
cell_cache
->
vy
[
pjd
]);
pjvz
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
]);
mj
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
]);
pjx2
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
pjy2
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
pjz2
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
pjvx2
.
v
=
vec_load
(
&
cell_cache
->
vx
[
pjd
+
VEC_SIZE
]);
pjvy2
.
v
=
vec_load
(
&
cell_cache
->
vy
[
pjd
+
VEC_SIZE
]);
pjvz2
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
+
VEC_SIZE
]);
mj2
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
+
VEC_SIZE
]);
/* Compute the pairwise distance. */
vector
v_dx_tmp
,
v_dy_tmp
,
v_dz_tmp
;
vector
v_dx_tmp2
,
v_dy_tmp2
,
v_dz_tmp2
,
v_r2_2
;
vector
v_dx2_tmp
,
v_dy2_tmp
,
v_dz2_tmp
;
vector
v_dx2_tmp2
,
v_dy2_tmp2
,
v_dz2_tmp2
,
v2_r2_2
;
v_dx_tmp
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dy_tmp
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dz_tmp
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
v_dx_tmp2
.
v
=
vec_sub
(
pix
.
v
,
pjx2
.
v
);
v_dy_tmp2
.
v
=
vec_sub
(
piy
.
v
,
pjy2
.
v
);
v_dz_tmp2
.
v
=
vec_sub
(
piz
.
v
,
pjz2
.
v
);
v_dx2_tmp
.
v
=
vec_sub
(
pix2
.
v
,
pjx
.
v
);
v_dy2_tmp
.
v
=
vec_sub
(
piy2
.
v
,
pjy
.
v
);
v_dz2_tmp
.
v
=
vec_sub
(
piz2
.
v
,
pjz
.
v
);
v_dx2_tmp2
.
v
=
vec_sub
(
pix2
.
v
,
pjx2
.
v
);
v_dy2_tmp2
.
v
=
vec_sub
(
piy2
.
v
,
pjy2
.
v
);
v_dz2_tmp2
.
v
=
vec_sub
(
piz2
.
v
,
pjz2
.
v
);
v_r2
.
v
=
vec_mul
(
v_dx_tmp
.
v
,
v_dx_tmp
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy_tmp
.
v
,
v_dy_tmp
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz_tmp
.
v
,
v_dz_tmp
.
v
,
v_r2
.
v
);
v_r2_2
.
v
=
vec_mul
(
v_dx_tmp2
.
v
,
v_dx_tmp2
.
v
);
v_r2_2
.
v
=
vec_fma
(
v_dy_tmp2
.
v
,
v_dy_tmp2
.
v
,
v_r2_2
.
v
);
v_r2_2
.
v
=
vec_fma
(
v_dz_tmp2
.
v
,
v_dz_tmp2
.
v
,
v_r2_2
.
v
);
v2_r2
.
v
=
vec_mul
(
v_dx2_tmp
.
v
,
v_dx2_tmp
.
v
);
v2_r2
.
v
=
vec_fma
(
v_dy2_tmp
.
v
,
v_dy2_tmp
.
v
,
v2_r2
.
v
);
v2_r2
.
v
=
vec_fma
(
v_dz2_tmp
.
v
,
v_dz2_tmp
.
v
,
v2_r2
.
v
);
v2_r2_2
.
v
=
vec_mul
(
v_dx2_tmp2
.
v
,
v_dx2_tmp2
.
v
);
v2_r2_2
.
v
=
vec_fma
(
v_dy2_tmp2
.
v
,
v_dy2_tmp2
.
v
,
v2_r2_2
.
v
);
v2_r2_2
.
v
=
vec_fma
(
v_dz2_tmp2
.
v
,
v_dz2_tmp2
.
v
,
v2_r2_2
.
v
);
/* Form a mask from r2 < hig2 and r2 > 0.*/
#ifdef HAVE_AVX512_F
// KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check;
KNL_MASK_16
doi_mask_check
,
doi_mask2
,
doi_mask2_check
;
KNL_MASK_16
doi2_mask_check
,
doi2_mask2
,
doi2_mask2_check
;
doi_mask_check
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
doi_mask
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
doi2_mask_check
=
vec_cmp_gt
(
v2_r2
.
v
,
vec_setzero
());
doi2_mask
=
vec_cmp_lt
(
v2_r2
.
v
,
v_hig2_2
.
v
);
doi_mask2_check
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
doi_mask2
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
doi2_mask2_check
=
vec_cmp_gt
(
v2_r2_2
.
v
,
vec_setzero
());
doi2_mask2
=
vec_cmp_lt
(
v2_r2_2
.
v
,
v_hig2_2
.
v
);
doi_mask
=
doi_mask
&
doi_mask_check
;
doi_mask2
=
doi_mask2
&
doi_mask2_check
;
doi2_mask
=
doi2_mask
&
doi2_mask_check
;
doi2_mask2
=
doi2_mask2
&
doi2_mask2_check
;
#else
vector
v_doi_mask
,
v_doi_mask_check
,
v_doi_mask2
,
v_doi_mask2_check
;
int
doi_mask2
;
vector
v_doi2_mask
,
v_doi2_mask_check
,
v_doi2_mask2
,
v_doi2_mask2_check
;
int
doi2_mask2
;
v_doi_mask_check
.
v
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
.
v
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
v_doi2_mask_check
.
v
=
vec_cmp_gt
(
v2_r2
.
v
,
vec_setzero
());
v_doi2_mask
.
v
=
vec_cmp_lt
(
v2_r2
.
v
,
v_hig2_2
.
v
);
v_doi_mask2_check
.
v
=
vec_cmp_gt
(
v_r2_2
.
v
,
vec_setzero
());
v_doi_mask2
.
v
=
vec_cmp_lt
(
v_r2_2
.
v
,
v_hig2
.
v
);
v_doi2_mask2_check
.
v
=
vec_cmp_gt
(
v2_r2_2
.
v
,
vec_setzero
());
v_doi2_mask2
.
v
=
vec_cmp_lt
(
v2_r2_2
.
v
,
v_hig2_2
.
v
);
doi_mask
=
vec_cmp_result
(
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_check
.
v
));
doi_mask2
=
vec_cmp_result
(
vec_and
(
v_doi_mask2
.
v
,
v_doi_mask2_check
.
v
));
doi2_mask
=
vec_cmp_result
(
vec_and
(
v_doi2_mask
.
v
,
v_doi2_mask_check
.
v
));
doi2_mask2
=
vec_cmp_result
(
vec_and
(
v_doi2_mask2
.
v
,
v_doi2_mask2_check
.
v
));
#endif
/* HAVE_AVX512_F */
/* Hit or miss? */
// if (doi_mask) {
storeInteractions
(
doi_mask
,
pjd
,
&
v_r2
,
&
v_dx_tmp
,
&
v_dy_tmp
,
&
v_dz_tmp
,
&
mj
,
&
pjvx
,
&
pjvy
,
&
pjvz
,
cell_cache
,
&
int_cache
,
&
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
//}
// if (doi2_mask) {
storeInteractions
(
doi2_mask
,
pjd
,
&
v2_r2
,
&
v_dx2_tmp
,
&
v_dy2_tmp
,
&
v_dz2_tmp
,
&
mj
,
&
pjvx
,
&
pjvy
,
&
pjvz
,
cell_cache
,
&
int_cache2
,
&
icount2
,
&
rhoSum2
,
&
rho_dhSum2
,
&
wcountSum2
,
&
wcount_dhSum2
,
&
div_vSum2
,
&
curlvxSum2
,
&
curlvySum2
,
&
curlvzSum2
,
v_hi_inv2
,
v_vix2
,
v_viy2
,
v_viz2
);
//}
/* Hit or miss? */
// if (doi_mask2) {
storeInteractions
(
doi_mask2
,
pjd
+
VEC_SIZE
,
&
v_r2_2
,
&
v_dx_tmp2
,
&
v_dy_tmp2
,
&
v_dz_tmp2
,
&
mj2
,
&
pjvx2
,
&
pjvy2
,
&
pjvz2
,
cell_cache
,
&
int_cache
,
&
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
//}
// if (doi2_mask2) {
storeInteractions
(
doi2_mask2
,
pjd
+
VEC_SIZE
,
&
v2_r2_2
,
&
v_dx2_tmp2
,
&
v_dy2_tmp2
,
&
v_dz2_tmp2
,
&
mj2
,
&
pjvx2
,
&
pjvy2
,
&
pjvz2
,
cell_cache
,
&
int_cache2
,
&
icount2
,
&
rhoSum2
,
&
rho_dhSum2
,
&
wcountSum2
,
&
wcount_dhSum2
,
&
div_vSum2
,
&
curlvxSum2
,
&
curlvySum2
,
&
curlvzSum2
,
v_hi_inv2
,
v_vix2
,
v_viy2
,
v_viz2
);
//}
}
/* Perform padded vector remainder interactions if any are present. */
calcRemInteractions
(
&
int_cache
,
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
icount_align
);
calcRemInteractions
(
&
int_cache2
,
icount2
,
&
rhoSum2
,
&
rho_dhSum2
,
&
wcountSum2
,
&
wcount_dhSum2
,
&
div_vSum2
,
&
curlvxSum2
,
&
curlvySum2
,
&
curlvzSum2
,
v_hi_inv2
,
v_vix2
,
v_viy2
,
v_viz2
,
&
icount_align2
);
/* Initialise masks to true incase remainder interactions have been
* performed. */
vector
int_mask
,
int_mask2
;
vector
int2_mask
,
int2_mask2
;
#ifdef HAVE_AVX512_F
KNL_MASK_16
knl_mask
=
0xFFFF
;
KNL_MASK_16
knl_mask2
=
0xFFFF
;
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int2_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int2_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#else
int_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int2_mask
.
m
=
vec_setint1
(
0xFFFFFFFF
);
int2_mask2
.
m
=
vec_setint1
(
0xFFFFFFFF
);
#endif
/* Perform interaction with 2 vectors. */
for
(
int
pjd
=
0
;
pjd
<
icount_align
;
pjd
+=
(
NUM_VEC_PROC
*
VEC_SIZE
))
{
runner_iact_nonsym_2_vec_density
(
&
int_cache
.
r2q
[
pjd
],
&
int_cache
.
dxq
[
pjd
],
&
int_cache
.
dyq
[
pjd
],
&
int_cache
.
dzq
[
pjd
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
.
vxq
[
pjd
],
&
int_cache
.
vyq
[
pjd
],
&
int_cache
.
vzq
[
pjd
],
&
int_cache
.
mq
[
pjd
],
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
int_mask
,
int_mask2
,
#ifdef HAVE_AVX512_F
knl_mask
,
knl_mask2
);
#else
0
,
0
);
#endif
}
for
(
int
pjd
=
0
;
pjd
<
icount_align2
;
pjd
+=
(
NUM_VEC_PROC
*
VEC_SIZE
))
{
runner_iact_nonsym_2_vec_density
(
&
int_cache2
.
r2q
[
pjd
],
&
int_cache2
.
dxq
[
pjd
],
&
int_cache2
.
dyq
[
pjd
],
&
int_cache2
.
dzq
[
pjd
],
v_hi_inv2
,
v_vix2
,
v_viy2
,
v_viz2
,
&
int_cache2
.
vxq
[
pjd
],
&
int_cache2
.
vyq
[
pjd
],
&
int_cache2
.
vzq
[
pjd
],
&
int_cache2
.
mq
[
pjd
],
&
rhoSum2
,
&
rho_dhSum2
,
&
wcountSum2
,
&
wcount_dhSum2
,
&
div_vSum2
,
&
curlvxSum2
,
&
curlvySum2
,
&
curlvzSum2
,
int2_mask
,
int2_mask2
,
#ifdef HAVE_AVX512_F
knl_mask
,
knl_mask2
);
#else
0
,
0
);
#endif
}
/* Perform horizontal adds on vector sums and store result in particle pi.
*/
VEC_HADD
(
rhoSum
,
pi
->
rho
);
VEC_HADD
(
rho_dhSum
,
pi
->
density
.
rho_dh
);
VEC_HADD
(
wcountSum
,
pi
->
density
.
wcount
);
VEC_HADD
(
wcount_dhSum
,
pi
->
density
.
wcount_dh
);
VEC_HADD
(
div_vSum
,
pi
->
density
.
div_v
);
VEC_HADD
(
curlvxSum
,
pi
->
density
.
rot_v
[
0
]);
VEC_HADD
(
curlvySum
,
pi
->
density
.
rot_v
[
1
]);
VEC_HADD
(
curlvzSum
,
pi
->
density
.
rot_v
[
2
]);
VEC_HADD
(
rhoSum2
,
pi2
->
rho
);
VEC_HADD
(
rho_dhSum2
,
pi2
->
density
.
rho_dh
);
VEC_HADD
(
wcountSum2
,
pi2
->
density
.
wcount
);
VEC_HADD
(
wcount_dhSum2
,
pi2
->
density
.
wcount_dh
);
VEC_HADD
(
div_vSum2
,
pi2
->
density
.
div_v
);
VEC_HADD
(
curlvxSum2
,
pi2
->
density
.
rot_v
[
0
]);
VEC_HADD
(
curlvySum2
,
pi2
->
density
.
rot_v
[
1
]);
VEC_HADD
(
curlvzSum2
,
pi2
->
density
.
rot_v
[
2
]);
/* Reset interaction count. */
icount
=
0
;
icount2
=
0
;
}
/* loop over all particles. */
TIMER_TOC
(
timer_doself_density
);
#endif
/* WITH_VECTORIZATION */
}
/**
* @brief Compute the density interactions between a cell pair (non-symmetric)
* using vector intrinsics.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment