Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
e88c008a
Commit
e88c008a
authored
9 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
First attempt at a vectorised version of the non-symmetrical force interaction.
parent
43b7823b
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!204
Intrinsic vectorisation
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/hydro/Gadget2/hydro_iact.h
+185
-9
185 additions, 9 deletions
src/hydro/Gadget2/hydro_iact.h
with
185 additions
and
9 deletions
src/hydro/Gadget2/hydro_iact.h
+
185
−
9
View file @
e88c008a
...
@@ -409,8 +409,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
...
@@ -409,8 +409,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
const
float
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
const
float
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
/* Compute gradient terms */
/* Compute gradient terms */
const
float
P_over_rho_i
=
pi
->
force
.
P_over_rho
;
const
float
P_over_rho
2
_i
=
pi
->
force
.
P_over_rho
2
;
const
float
P_over_rho_j
=
pj
->
force
.
P_over_rho
;
const
float
P_over_rho
2
_j
=
pj
->
force
.
P_over_rho
2
;
/* Compute sound speeds */
/* Compute sound speeds */
const
float
ci
=
pi
->
force
.
soundspeed
;
const
float
ci
=
pi
->
force
.
soundspeed
;
...
@@ -439,7 +439,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
...
@@ -439,7 +439,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
/* Now, convolve with the kernel */
/* Now, convolve with the kernel */
const
float
visc_term
=
0
.
5
f
*
visc
*
(
wi_dr
+
wj_dr
)
*
r_inv
;
const
float
visc_term
=
0
.
5
f
*
visc
*
(
wi_dr
+
wj_dr
)
*
r_inv
;
const
float
sph_term
=
(
P_over_rho_i
*
wi_dr
+
P_over_rho_j
*
wj_dr
)
*
r_inv
;
const
float
sph_term
=
(
P_over_rho
2
_i
*
wi_dr
+
P_over_rho
2
_j
*
wj_dr
)
*
r_inv
;
/* Eventually got the acceleration */
/* Eventually got the acceleration */
const
float
acc
=
visc_term
+
sph_term
;
const
float
acc
=
visc_term
+
sph_term
;
...
@@ -511,8 +511,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
...
@@ -511,8 +511,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
const
float
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
const
float
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
/* Compute gradient terms */
/* Compute gradient terms */
const
float
P_over_rho_i
=
pi
->
force
.
P_over_rho
;
const
float
P_over_rho
2
_i
=
pi
->
force
.
P_over_rho
2
;
const
float
P_over_rho_j
=
pj
->
force
.
P_over_rho
;
const
float
P_over_rho
2
_j
=
pj
->
force
.
P_over_rho
2
;
/* Compute sound speeds */
/* Compute sound speeds */
const
float
ci
=
pi
->
force
.
soundspeed
;
const
float
ci
=
pi
->
force
.
soundspeed
;
...
@@ -541,7 +541,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
...
@@ -541,7 +541,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
/* Now, convolve with the kernel */
/* Now, convolve with the kernel */
const
float
visc_term
=
0
.
5
f
*
visc
*
(
wi_dr
+
wj_dr
)
*
r_inv
;
const
float
visc_term
=
0
.
5
f
*
visc
*
(
wi_dr
+
wj_dr
)
*
r_inv
;
const
float
sph_term
=
(
P_over_rho_i
*
wi_dr
+
P_over_rho_j
*
wj_dr
)
*
r_inv
;
const
float
sph_term
=
(
P_over_rho
2
_i
*
wi_dr
+
P_over_rho
2
_j
*
wj_dr
)
*
r_inv
;
/* Eventually got the acceleration */
/* Eventually got the acceleration */
const
float
acc
=
visc_term
+
sph_term
;
const
float
acc
=
visc_term
+
sph_term
;
...
@@ -567,9 +567,185 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
...
@@ -567,9 +567,185 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_vec_force
(
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Hi
,
float
*
Hj
,
struct
part
**
pi
,
float
*
R2
,
float
*
Dx
,
float
*
Hi
,
float
*
Hj
,
struct
part
**
pi
,
struct
part
**
pj
)
{
struct
part
**
pj
)
{
error
(
"A vectorised version of the Gadget2 non-symmetric force interaction "
#ifdef WITH_VECTORIZATION
"function does not exist yet!"
);
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hi2_inv
,
hj2_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho
,
pjPOrho
,
pirho
,
pjrho
;
vector
mj
;
vector
f
;
vector
dx
[
3
];
vector
vi
[
3
],
vj
[
3
];
vector
pia
[
3
];
vector
piu_dt
;
vector
pih_dt
;
vector
ci
,
cj
,
v_sig
,
vi_sig
,
vj_sig
;
vector
omega_ij
,
mu_ij
,
fac_mu
,
Pi_ij
,
balsara
;
vector
alpha_ij
,
v_sig_u
,
tc
;
vector
rho_ij
,
visc
,
visc_term
,
sph_term
,
acc
,
entropy_dt
;
int
j
,
k
;
fac_mu
.
v
=
vec_set1
(
1
.
f
);
/* Will change with cosmological integration */
/* Load stuff. */
#if VEC_SIZE == 8
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
piPOrho
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pjPOrho
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
,
pi
[
4
]
->
rho
,
pi
[
5
]
->
rho
,
pi
[
6
]
->
rho
,
pi
[
7
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
4
]
->
rho
,
pj
[
5
]
->
rho
,
pj
[
6
]
->
rho
,
pj
[
7
]
->
rho
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
pi
[
2
]
->
force
.
soundspeed
,
pi
[
3
]
->
force
.
soundspeed
,
pi
[
4
]
->
force
.
soundspeed
,
pi
[
5
]
->
force
.
soundspeed
,
pi
[
6
]
->
force
.
soundspeed
,
pi
[
7
]
->
force
.
soundspeed
);
cj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
soundspeed
,
pj
[
1
]
->
force
.
soundspeed
,
pj
[
2
]
->
force
.
soundspeed
,
pj
[
3
]
->
force
.
soundspeed
,
pj
[
4
]
->
force
.
soundspeed
,
pj
[
5
]
->
force
.
soundspeed
,
pj
[
6
]
->
force
.
soundspeed
,
pj
[
7
]
->
force
.
soundspeed
);
vi_sig
.
v
=
vec_set
(
pi
[
0
]
->
force
.
v_sig
,
pi
[
1
]
->
force
.
v_sig
,
pi
[
2
]
->
force
.
v_sig
,
pi
[
3
]
->
force
.
v_sig
,
pi
[
4
]
->
force
.
v_sig
,
pi
[
5
]
->
force
.
v_sig
,
pi
[
6
]
->
force
.
v_sig
,
pi
[
7
]
->
force
.
v_sig
);
vj_sig
.
v
=
vec_set
(
pj
[
0
]
->
force
.
v_sig
,
pj
[
1
]
->
force
.
v_sig
,
pj
[
2
]
->
force
.
v_sig
,
pj
[
3
]
->
force
.
v_sig
,
pj
[
4
]
->
force
.
v_sig
,
pj
[
5
]
->
force
.
v_sig
,
pj
[
6
]
->
force
.
v_sig
,
pj
[
7
]
->
force
.
v_sig
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
],
pi
[
4
]
->
v
[
k
],
pi
[
5
]
->
v
[
k
],
pi
[
6
]
->
v
[
k
],
pi
[
7
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
],
pj
[
4
]
->
v
[
k
],
pj
[
5
]
->
v
[
k
],
pj
[
6
]
->
v
[
k
],
pj
[
7
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
],
Dx
[
12
+
k
],
Dx
[
15
+
k
],
Dx
[
18
+
k
],
Dx
[
21
+
k
]);
balsara
.
v
=
vec_set
(
pi
[
0
]
->
force
.
balsara
,
pi
[
1
]
->
force
.
balsara
,
pi
[
2
]
->
force
.
balsara
,
pi
[
3
]
->
force
.
balsara
,
pi
[
4
]
->
force
.
balsara
,
pi
[
5
]
->
force
.
balsara
,
pi
[
6
]
->
force
.
balsara
,
pi
[
7
]
->
force
.
balsara
)
+
vec_set
(
pj
[
0
]
->
force
.
balsara
,
pj
[
1
]
->
force
.
balsara
,
pj
[
2
]
->
force
.
balsara
,
pj
[
3
]
->
force
.
balsara
,
pj
[
4
]
->
force
.
balsara
,
pj
[
5
]
->
force
.
balsara
,
pj
[
6
]
->
force
.
balsara
,
pj
[
7
]
->
force
.
balsara
);
#elif VEC_SIZE == 4
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
POrho_i
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
POrho_j
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
pi
[
2
]
->
force
.
soundspeed
,
pi
[
3
]
->
force
.
soundspeed
);
cj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
soundspeed
,
pj
[
1
]
->
force
.
soundspeed
,
pj
[
2
]
->
force
.
soundspeed
,
pj
[
3
]
->
force
.
soundspeed
);
vi_sig
.
v
=
vec_set
(
pi
[
0
]
->
force
.
v_sig
,
pi
[
1
]
->
force
.
v_sig
,
pi
[
2
]
->
force
.
v_sig
,
pi
[
3
]
->
force
.
v_sig
);
vj_sig
.
v
=
vec_set
(
pj
[
0
]
->
force
.
v_sig
,
pj
[
1
]
->
force
.
v_sig
,
pj
[
2
]
->
force
.
v_sig
,
pj
[
3
]
->
force
.
v_sig
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
]);
balsara
.
v
=
vec_set
(
pi
[
0
]
->
force
.
balsara
,
pi
[
1
]
->
force
.
balsara
,
pi
[
2
]
->
force
.
balsara
,
pi
[
3
]
->
force
.
balsara
)
+
vec_set
(
pj
[
0
]
->
force
.
balsara
,
pj
[
1
]
->
force
.
balsara
,
pj
[
2
]
->
force
.
balsara
,
pj
[
3
]
->
force
.
balsara
);
#else
error
(
"Unknown vector size."
)
#endif
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
ri
.
v
=
vec_rsqrt
(
r2
.
v
);
ri
.
v
=
ri
.
v
-
vec_set1
(
0
.
5
f
)
*
ri
.
v
*
(
r2
.
v
*
ri
.
v
*
ri
.
v
-
vec_set1
(
1
.
0
f
));
r
.
v
=
r2
.
v
*
ri
.
v
;
/* Get the kernel for hi. */
hi
.
v
=
vec_load
(
Hi
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
));
hi2_inv
.
v
=
hi_inv
.
v
*
hi_inv
.
v
;
xi
.
v
=
r
.
v
*
hi_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hi2_inv
.
v
*
hi2_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
hj
.
v
=
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj
.
v
*
hj_inv
.
v
-
vec_set1
(
1
.
0
f
));
hj2_inv
.
v
=
hj_inv
.
v
*
hj_inv
.
v
;
xj
.
v
=
r
.
v
*
hj_inv
.
v
;
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
wj_dr
.
v
=
hj2_inv
.
v
*
hj2_inv
.
v
*
wj_dx
.
v
;
/* Compute dv dot r. */
dvdr
.
v
=
((
vi
[
0
].
v
-
vj
[
0
].
v
)
*
dx
[
0
].
v
)
+
((
vi
[
1
].
v
-
vj
[
1
].
v
)
*
dx
[
1
].
v
)
+
((
vi
[
2
].
v
-
vj
[
2
].
v
)
*
dx
[
2
].
v
);
//dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_set1
(
0
.
0
f
));
mu_ij
.
v
=
fac_mu
.
v
*
ri
.
v
*
omega_ij
.
v
;
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
ci
.
v
+
cj
.
v
-
vec_set1
(
3
.
0
f
)
*
mu_ij
.
v
;
/* Now construct the full viscosity term */
rho_ij
.
v
=
vec_set1
(
0
.
5
f
)
*
(
pirho
.
v
+
pjrho
.
v
);
visc
.
v
=
vec_set1
(
-
0
.
25
f
)
*
vec_set1
(
const_viscosity_alpha
)
*
v_sig
.
v
*
mu_ij
.
v
*
balsara
.
v
/
rho_ij
.
v
;
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_set1
(
0
.
5
f
)
*
visc
.
v
*
(
wi_dr
.
v
+
wj_dr
.
v
)
*
ri
.
v
;
sph_term
.
v
=
(
piPOrho
.
v
*
wi_dr
.
v
+
pjPOrho
.
v
*
wj_dr
.
v
)
*
ri
.
v
;
/* Eventually get the acceleration */
acc
.
v
=
visc_term
.
v
+
sph_term
.
v
;
/* Use the force, Luke! */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
f
.
v
=
dx
[
k
].
v
*
acc
.
v
;
pia
[
k
].
v
=
mj
.
v
*
f
.
v
;
}
/* Get the time derivative for h. */
pih_dt
.
v
=
mj
.
v
*
dvdr
.
v
*
ri
.
v
/
pirho
.
v
*
wi_dr
.
v
;
/* compute the signal velocity (this is always symmetrical). */
vi_sig
.
v
=
vec_fmax
(
vi_sig
.
v
,
v_sig
.
v
);
//vj_sig.v = vec_fmax(vj_sig.v, v_sig.v);
/* Change in entropy */
entropy_dt
.
v
=
vec_set1
(
0
.
5
f
)
*
mj
.
v
*
visc_term
.
v
*
dvdr
.
v
;
/* Store the forces back on the particles. */
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
for
(
j
=
0
;
j
<
3
;
j
++
)
pi
[
k
]
->
a_hydro
[
j
]
-=
pia
[
j
].
f
[
k
];
//pi[k]->force.u_dt += piu_dt.f[k];
pi
[
k
]
->
h_dt
-=
pih_dt
.
f
[
k
];
pi
[
k
]
->
force
.
v_sig
=
vi_sig
.
f
[
k
];
pi
[
k
]
->
force
.
entropy_dt
+=
entropy_dt
.
f
[
k
];
//pj[k]->force.v_sig = vj_sig.f[k];
}
#else
error
(
"The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used."
)
#endif
}
}
#endif
/* SWIFT_RUNNER_IACT_LEGACY_H */
#endif
/* SWIFT_RUNNER_IACT_LEGACY_H */
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment