Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
8758adf1
Commit
8758adf1
authored
8 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Vectorise the first inner loop with intrinsics.
parent
6f600db9
No related branches found
No related tags found
1 merge request
!320
Dopair1 vectorisation merge
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/runner_doiact_vec.c
+49
-36
49 additions, 36 deletions
src/runner_doiact_vec.c
with
49 additions
and
36 deletions
src/runner_doiact_vec.c
+
49
−
36
View file @
8758adf1
...
...
@@ -956,25 +956,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
const
double
di
=
sort_i
[
pid
].
d
+
hi
*
kernel_gamma
+
dx_max
-
rshift
;
if
(
di
<
dj_min
)
continue
;
float
pix
=
ci_cache
->
x
[
ci_cache_idx
];
float
piy
=
ci_cache
->
y
[
ci_cache_idx
];
float
piz
=
ci_cache
->
z
[
ci_cache_idx
];
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
//vector pix, piy, piz;
//const float hi = cell_cache->h[pid];
vector
pix
,
piy
,
piz
;
/* Fill particle pi vectors. */
//
pix.v = vec_set1(
(float)(pi->x[0] - ci->loc[0] - shift[0])
);
//
piy.v = vec_set1(
(float)(pi->x[1] - ci->loc[1] - shift[1])
);
//
piz.v = vec_set1(
(float)(pi->x[2] - ci->loc[2] - shift[2])
);
pix
.
v
=
vec_set1
(
ci_cache
->
x
[
ci_cache_idx
]
);
piy
.
v
=
vec_set1
(
ci_cache
->
y
[
ci_cache_idx
]
);
piz
.
v
=
vec_set1
(
ci_cache
->
z
[
ci_cache_idx
]
);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
pi
->
v
[
0
]);
v_viy
.
v
=
vec_set1
(
pi
->
v
[
1
]);
v_viz
.
v
=
vec_set1
(
pi
->
v
[
2
]);
v_vix
.
v
=
vec_set1
(
ci_cache
->
vx
[
ci_cache_idx
]);
v_viy
.
v
=
vec_set1
(
ci_cache
->
vy
[
ci_cache_idx
]);
v_viz
.
v
=
vec_set1
(
ci_cache
->
vz
[
ci_cache_idx
]);
//const float hig2 = hi * hi * kernel_gamma2;
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
...
...
@@ -1010,42 +1004,61 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for
(
int
i
=
exit_iteration
;
i
<
exit_iteration_align
;
i
++
)
{
cj_cache
.
x
[
i
]
=
pix
;
cj_cache
.
y
[
i
]
=
piy
;
cj_cache
.
z
[
i
]
=
piz
;
cj_cache
.
x
[
i
]
=
pix
.
f
[
0
]
;
cj_cache
.
y
[
i
]
=
piy
.
f
[
0
]
;
cj_cache
.
z
[
i
]
=
piz
.
f
[
0
]
;
}
}
vector
pjx
,
pjy
,
pjz
;
vector
pjvx
,
pjvy
,
pjvz
,
mj
;
/* Loop over the parts in cj. */
//for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
for
(
int
pjd
=
0
;
pjd
<
exit_iteration
;
pjd
++
)
{
for
(
int
pjd
=
0
;
pjd
<
exit_iteration_align
;
pjd
+=
VEC_SIZE
)
{
/* Get the cache index to the jth particle. */
//int cj_cache_idx = sort_j[pjd].i;
int
cj_cache_idx
=
pjd
;
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
/* Load 2 sets of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cj_cache
.
x
[
cj_cache_idx
]);
pjy
.
v
=
vec_load
(
&
cj_cache
.
y
[
cj_cache_idx
]);
pjz
.
v
=
vec_load
(
&
cj_cache
.
z
[
cj_cache_idx
]);
pjvx
.
v
=
vec_load
(
&
cj_cache
.
vx
[
cj_cache_idx
]);
pjvy
.
v
=
vec_load
(
&
cj_cache
.
vy
[
cj_cache_idx
]);
pjvz
.
v
=
vec_load
(
&
cj_cache
.
vz
[
cj_cache_idx
]);
mj
.
v
=
vec_load
(
&
cj_cache
.
m
[
cj_cache_idx
]);
/* Compute the pairwise distance. */
float
dx
=
pix
-
cj_cache
.
x
[
cj_cache_idx
];
float
dy
=
piy
-
cj_cache
.
y
[
cj_cache_idx
];
float
dz
=
piz
-
cj_cache
.
z
[
cj_cache_idx
];
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
/* Hit or miss? */
if
(
r2
<
hig2
)
{
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy
.
v
,
v_dy
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz
.
v
,
v_dz
.
v
,
v_r2
.
v
);
/* Add this interaction to the queue. */
int_cache
.
r2q
[
icount
]
=
r2
;
int_cache
.
dxq
[
icount
]
=
dx
;
int_cache
.
dyq
[
icount
]
=
dy
;
int_cache
.
dzq
[
icount
]
=
dz
;
int_cache
.
mq
[
icount
]
=
cj_cache
.
m
[
cj_cache_idx
];
int_cache
.
vxq
[
icount
]
=
cj_cache
.
vx
[
cj_cache_idx
];
int_cache
.
vyq
[
icount
]
=
cj_cache
.
vy
[
cj_cache_idx
];
int_cache
.
vzq
[
icount
]
=
cj_cache
.
vz
[
cj_cache_idx
];
icount
++
;
}
vector
v_doi_mask
,
v_doi_mask_check
;
int
doi_mask
;
/* Form r2 > 0 mask and r2 < hig2 mask. */
v_doi_mask_check
.
v
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
.
v
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
/* Combine two masks and form integer mask. */
doi_mask
=
vec_cmp_result
(
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_check
.
v
));
/* If there are any interactions left pack interaction values into c2
* cache. */
if
(
doi_mask
)
storeInteractions
(
doi_mask
,
cj_cache_idx
,
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
&
mj
,
&
pjvx
,
&
pjvy
,
&
pjvz
,
&
cj_cache
,
&
int_cache
,
&
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
}
/* loop over the parts in cj. */
/* Perform padded vector remainder interactions if any are present. */
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment