Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
42ca2f3f
Commit
42ca2f3f
authored
7 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Comments and formatting.
parent
5da42d3e
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!440
Dopair2 vectorisation
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/runner_doiact_vec.c
+61
-55
61 additions, 55 deletions
src/runner_doiact_vec.c
with
61 additions
and
55 deletions
src/runner_doiact_vec.c
+
61
−
55
View file @
42ca2f3f
...
...
@@ -363,13 +363,16 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
*
init_pj
=
last_pj
;
}
__attribute__
((
always_inline
))
INLINE
static
void
populate_max_index_no_cache_force
(
const
struct
cell
*
ci
,
const
struct
cell
*
cj
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
float
dx_max
,
const
float
rshift
,
const
double
hi_max
,
const
double
hj_max
,
const
double
di_max
,
const
double
dj_min
,
int
*
max_index_i
,
int
*
max_index_j
,
int
*
init_pi
,
int
*
init_pj
,
const
struct
engine
*
e
)
{
__attribute__
((
always_inline
))
INLINE
static
void
populate_max_index_no_cache_force
(
const
struct
cell
*
ci
,
const
struct
cell
*
cj
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
float
dx_max
,
const
float
rshift
,
const
double
hi_max
,
const
double
hj_max
,
const
double
di_max
,
const
double
dj_min
,
int
*
max_index_i
,
int
*
max_index_j
,
int
*
init_pi
,
int
*
init_pj
,
const
struct
engine
*
e
)
{
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
...
...
@@ -473,22 +476,26 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache_fo
int
first_pi_est
=
first_pi
;
int
max_index_i_last
=
max_index_i
[
ci
->
count
-
1
];
if
(
max_index_j
[
0
]
<
first_pi
)
{
/* Set the maximum indices for pi particles to last_pj if the first particle
* in cj is in range of more particles than first_pi. This ensures that all
* interactions are found.*/
if
(
max_index_j
[
0
]
<
first_pi
)
{
first_pi
=
max_index_j
[
0
];
for
(
int
i
=
first_pi
;
i
<
ci
->
count
;
i
++
)
{
for
(
int
i
=
first_pi
;
i
<
ci
->
count
;
i
++
)
{
max_index_i
[
i
]
=
last_pj
;
}
}
if
(
max_index_i_last
>
last_pj
)
{
/* Set the maximum indices for pj particles to first_pi_est if the last
* particle in ci is in range of more particles than last_pj. This ensures
* that all interactions are found.*/
if
(
max_index_i_last
>
last_pj
)
{
last_pj
=
max_index_i_last
;
for
(
int
i
=
0
;
i
<=
last_pj
;
i
++
)
{
for
(
int
i
=
0
;
i
<=
last_pj
;
i
++
)
{
max_index_j
[
i
]
=
first_pi_est
;
}
}
*
init_pi
=
first_pi
;
...
...
@@ -1336,7 +1343,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
* @param cj The second #cell.
*/
void
runner_dopair2_force_vec
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
)
{
struct
cell
*
cj
)
{
#ifdef WITH_VECTORIZATION
const
struct
engine
*
restrict
e
=
r
->
e
;
...
...
@@ -1417,8 +1424,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const
float
dx_max
=
(
ci
->
dx_max_sort
+
cj
->
dx_max_sort
);
/* Check if any particles are active and return if there are not. */
//int numActive = 0;
//for (int pid = count_i - 1;
//
int numActive = 0;
//
for (int pid = count_i - 1;
// pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
// struct part *restrict pi = &parts_i[sort_i[pid].i];
// if (part_is_active(pi, e)) {
...
...
@@ -1427,8 +1434,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
// }
//}
//if (!numActive) {
// for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
// if (!numActive) {
// for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max <
// di_max;
// pjd++) {
// struct part *restrict pj = &parts_j[sort_j[pjd].i];
// if (part_is_active(pj, e)) {
...
...
@@ -1438,7 +1446,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
// }
//}
//if (numActive == 0) return;
//
if (numActive == 0) return;
/* Get both particle caches from the runner and re-allocate
* them if they are not big enough for the cells. */
...
...
@@ -1462,11 +1470,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* Also find the first pi that interacts with any particle in cj and the last
* pj that interacts with any particle in ci. */
//populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max, hj_max, di_max, dj_min, max_di,
// max_dj, &first_pi, &last_pj, e);
populate_max_index_no_cache_force
(
ci
,
cj
,
sort_i
,
sort_j
,
dx_max
,
rshift
,
hi_max
,
hj_max
,
di_max
,
dj_min
,
max_index_i
,
max_index_j
,
&
first_pi
,
&
last_pj
,
e
);
populate_max_index_no_cache_force
(
ci
,
cj
,
sort_i
,
sort_j
,
dx_max
,
rshift
,
hi_max
,
hj_max
,
di_max
,
dj_min
,
max_index_i
,
max_index_j
,
&
first_pi
,
&
last_pj
,
e
);
/* Limits of the outer loops. */
int
first_pi_loop
=
first_pi
;
...
...
@@ -1476,14 +1482,14 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
* to read into the cache. */
last_pj
=
max
(
last_pj
,
max_index_i
[
count_i
-
1
]);
first_pi
=
min
(
first_pi
,
max_index_j
[
0
]);
/* Read the needed particles into the two caches. */
int
first_pi_align
=
first_pi
;
int
last_pj_align
=
last_pj
;
cache_read_two_partial_cells_sorted_force
(
ci
,
cj
,
ci_cache
,
cj_cache
,
sort_i
,
sort_j
,
shift
,
&
first_pi_align
,
&
last_pj_align
,
1
);
sort_j
,
shift
,
&
first_pi_align
,
&
last_pj_align
,
1
);
/* Get the number of particles read into the ci cache. */
int
ci_cache_count
=
count_i
-
first_pi_align
;
...
...
@@ -1501,7 +1507,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Skip this particle if no particle in cj is within range of it. */
const
float
hi
=
ci_cache
->
h
[
ci_cache_idx
];
const
double
di_test
=
sort_i
[
pid
].
d
+
hi
*
kernel_gamma
+
dx_max
-
rshift
;
const
double
di_test
=
sort_i
[
pid
].
d
+
hi
*
kernel_gamma
+
dx_max
-
rshift
;
if
(
di_test
<
dj_min
)
continue
;
/* Determine the exit iteration of the interaction loop. */
...
...
@@ -1530,8 +1537,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Reset cumulative sums of update vectors. */
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
entropy_dtSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
v_hi_inv
=
vec_reciprocal
(
v_hi
);
...
...
@@ -1554,7 +1561,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
}
vector
pjx
,
pjy
,
pjz
,
hj
,
hjg2
;
/* Loop over the parts in cj. */
for
(
int
pjd
=
0
;
pjd
<
exit_iteration_align
;
pjd
+=
VEC_SIZE
)
{
...
...
@@ -1602,18 +1609,17 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
if
(
doi_mask
)
{
vector
v_hj_inv
;
v_hj_inv
=
vec_reciprocal
(
hj
);
runner_iact_nonsym_1_vec_force
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
cj_cache
->
vx
[
cj_cache_idx
],
&
cj_cache
->
vy
[
cj_cache_idx
],
&
cj_cache
->
vz
[
cj_cache_idx
],
&
cj_cache
->
rho
[
cj_cache_idx
],
&
cj_cache
->
grad_h
[
cj_cache_idx
],
&
cj_cache
->
pOrho2
[
cj_cache_idx
],
&
cj_cache
->
balsara
[
cj_cache_idx
],
&
cj_cache
->
soundspeed
[
cj_cache_idx
],
&
cj_cache
->
m
[
cj_cache_idx
],
v_hi_inv
,
v_hj_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
cj_cache
->
vz
[
cj_cache_idx
],
&
cj_cache
->
rho
[
cj_cache_idx
],
&
cj_cache
->
grad_h
[
cj_cache_idx
],
&
cj_cache
->
pOrho2
[
cj_cache_idx
],
&
cj_cache
->
balsara
[
cj_cache_idx
],
&
cj_cache
->
soundspeed
[
cj_cache_idx
],
&
cj_cache
->
m
[
cj_cache_idx
]
,
v_hi_inv
,
v_hj_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_doi_mask
);
}
}
/* loop over the parts in cj. */
...
...
@@ -1626,12 +1632,12 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
}
/* loop over the parts in ci. */
}
if
(
cell_is_active
(
cj
,
e
))
{
/* Loop over the parts in cj until nothing is within range in ci. */
for
(
int
pjd
=
0
;
pjd
<=
last_pj_loop
;
pjd
++
)
{
...
...
@@ -1645,7 +1651,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/*TODO: rshift term. */
/* Skip this particle if no particle in ci is within range of it. */
const
float
hj
=
cj_cache
->
h
[
cj_cache_idx
];
const
double
dj_test
=
sort_j
[
pjd
].
d
-
hj
*
kernel_gamma
-
dx_max
-
rshift
;
const
double
dj_test
=
sort_j
[
pjd
].
d
-
hj
*
kernel_gamma
-
dx_max
-
rshift
;
if
(
dj_test
>
di_max
)
continue
;
/* Determine the exit iteration of the interaction loop. */
...
...
@@ -1676,7 +1683,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Reset cumulative sums of update vectors. */
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
entropy_dtSum
;
/* Get the inverse of hj. */
vector
v_hj_inv
;
...
...
@@ -1745,18 +1752,17 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
if
(
doj_mask
)
{
vector
v_hi_inv
;
v_hi_inv
=
vec_reciprocal
(
hi
);
runner_iact_nonsym_1_vec_force
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vjx
,
v_vjy
,
v_vjz
,
v_rhoj
,
v_grad_hj
,
v_pOrhoj2
,
v_balsara_j
,
v_cj
,
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vjx
,
v_vjy
,
v_vjz
,
v_rhoj
,
v_grad_hj
,
v_pOrhoj2
,
v_balsara_j
,
v_cj
,
&
ci_cache
->
vx
[
ci_cache_idx
],
&
ci_cache
->
vy
[
ci_cache_idx
],
&
ci_cache
->
vz
[
ci_cache_idx
],
&
ci_cache
->
rho
[
ci_cache_idx
],
&
ci_cache
->
grad_h
[
ci_cache_idx
],
&
ci_cache
->
pOrho2
[
ci_cache_idx
],
&
ci_cache
->
balsara
[
ci_cache_idx
],
&
ci_cache
->
soundspeed
[
ci_cache_idx
],
&
ci_cache
->
m
[
ci_cache_idx
],
v_hj_inv
,
v_hi_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
ci_cache
->
vz
[
ci_cache_idx
],
&
ci_cache
->
rho
[
ci_cache_idx
],
&
ci_cache
->
grad_h
[
ci_cache_idx
],
&
ci_cache
->
pOrho2
[
ci_cache_idx
],
&
ci_cache
->
balsara
[
ci_cache_idx
],
&
ci_cache
->
soundspeed
[
ci_cache_idx
],
&
ci_cache
->
m
[
ci_cache_idx
]
,
v_hj_inv
,
v_hi_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_doj_mask
);
}
}
/* loop over the parts in ci. */
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment