Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
11f5d292
Commit
11f5d292
authored
7 years ago
by
Matthieu Schaller
Browse files
Options
Downloads
Plain Diff
Merge branch 'cosmo_vec' into 'master'
Cosmological terms in vectorized version of Gadget SPH. Closes
#406
See merge request
!510
parents
560cbdde
0901bce8
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!510
Cosmo vec
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/hydro/Gadget2/hydro_iact.h
+18
-12
18 additions, 12 deletions
src/hydro/Gadget2/hydro_iact.h
src/runner_doiact_vec.c
+13
-3
13 additions, 3 deletions
src/runner_doiact_vec.c
tests/testInteractions.c
+2
-2
2 additions, 2 deletions
tests/testInteractions.c
with
33 additions
and
17 deletions
src/hydro/Gadget2/hydro_iact.h
+
18
−
12
View file @
11f5d292
...
...
@@ -659,7 +659,7 @@ runner_iact_nonsym_1_vec_force(
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
vector
hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
hj_inv
,
const
float
a
,
const
float
H
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
)
{
...
...
@@ -687,8 +687,11 @@ runner_iact_nonsym_1_vec_force(
const
vector
balsara_j
=
vector_load
(
Balsara_j
);
const
vector
cj
=
vector_load
(
Cj
);
const
vector
fac_mu
=
vector_set1
(
1
.
f
);
/* Will change with cosmological integration */
/* Cosmological terms */
const
float
fac_mu
=
pow_three_gamma_minus_five_over_two
(
a
);
const
float
a2_Hubble
=
a
*
a
*
H
;
const
vector
v_fac_mu
=
vector_set1
(
fac_mu
);
const
vector
v_a2_Hubble
=
vector_set1
(
a2_Hubble
);
/* Load stuff. */
balsara
.
v
=
vec_add
(
balsara_i
.
v
,
balsara_j
.
v
);
...
...
@@ -718,13 +721,13 @@ runner_iact_nonsym_1_vec_force(
dvz
.
v
=
vec_sub
(
viz
.
v
,
vjz
.
v
);
/* Compute dv dot r. */
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
->
v
,
vec_fma
(
dvy
.
v
,
dy
->
v
,
vec_
mul
(
dvz
.
v
,
dz
->
v
)));
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
->
v
,
vec_fma
(
dvy
.
v
,
dy
->
v
,
vec_
fma
(
dvz
.
v
,
dz
->
v
,
vec_mul
(
v_a2_Hubble
.
v
,
r2
->
v
)
)));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_setzero
());
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
vec_mul
(
v_
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij
.
v
,
vec_add
(
ci
.
v
,
cj
.
v
));
...
...
@@ -787,7 +790,7 @@ runner_iact_nonsym_2_vec_force(
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
float
*
Hj_inv
,
const
float
a
,
const
float
H
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
,
mask_t
mask_2
,
short
mask_cond
)
{
...
...
@@ -853,8 +856,11 @@ runner_iact_nonsym_2_vec_force(
const
vector
hj_inv
=
vector_load
(
Hj_inv
);
const
vector
hj_inv_2
=
vector_load
(
&
Hj_inv
[
VEC_SIZE
]);
const
vector
fac_mu
=
vector_set1
(
1
.
f
);
/* Will change with cosmological integration */
/* Cosmological terms */
const
float
fac_mu
=
pow_three_gamma_minus_five_over_two
(
a
);
const
float
a2_Hubble
=
a
*
a
*
H
;
const
vector
v_fac_mu
=
vector_set1
(
fac_mu
);
const
vector
v_a2_Hubble
=
vector_set1
(
a2_Hubble
);
/* Find the balsara switch. */
balsara
.
v
=
vec_add
(
balsara_i
.
v
,
balsara_j
.
v
);
...
...
@@ -891,18 +897,18 @@ runner_iact_nonsym_2_vec_force(
dvz_2
.
v
=
vec_sub
(
viz
.
v
,
vjz_2
.
v
);
/* Compute dv dot r. */
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
.
v
,
vec_fma
(
dvy
.
v
,
dy
.
v
,
vec_
mul
(
dvz
.
v
,
dz
.
v
)));
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
.
v
,
vec_fma
(
dvy
.
v
,
dy
.
v
,
vec_
fma
(
dvz
.
v
,
dz
.
v
,
vec_mul
(
v_a2_Hubble
.
v
,
r2
.
v
)
)));
dvdr_2
.
v
=
vec_fma
(
dvx_2
.
v
,
dx_2
.
v
,
vec_fma
(
dvy_2
.
v
,
dy_2
.
v
,
vec_
mul
(
dvz_2
.
v
,
dz_2
.
v
)));
vec_fma
(
dvy_2
.
v
,
dy_2
.
v
,
vec_
fma
(
dvz_2
.
v
,
dz_2
.
v
,
vec_mul
(
v_a2_Hubble
.
v
,
r2_2
.
v
)
)));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_setzero
());
omega_ij_2
.
v
=
vec_fmin
(
dvdr_2
.
v
,
vec_setzero
());
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
vec_mul
(
v_
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
mu_ij_2
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri_2
.
v
,
omega_ij_2
.
v
));
/* This is 0 or negative */
v_
fac_mu
.
v
,
vec_mul
(
ri_2
.
v
,
omega_ij_2
.
v
));
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij
.
v
,
vec_add
(
ci
.
v
,
cj
.
v
));
...
...
This diff is collapsed.
Click to expand it.
src/runner_doiact_vec.c
+
13
−
3
View file @
11f5d292
...
...
@@ -1111,6 +1111,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
const
struct
engine
*
e
=
r
->
e
;
const
struct
cosmology
*
restrict
cosmo
=
e
->
cosmology
;
const
timebin_t
max_active_bin
=
e
->
max_active_bin
;
struct
part
*
restrict
parts
=
c
->
parts
;
const
int
count
=
c
->
count
;
...
...
@@ -1139,6 +1140,10 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
/* Read the particles from the cell and store them locally in the cache. */
cache_read_force_particles
(
c
,
cell_cache
);
/* Cosmological terms */
const
float
a
=
cosmo
->
a
;
const
float
H
=
cosmo
->
H
;
/* Loop over the particles in the cell. */
for
(
int
pid
=
0
;
pid
<
count
;
pid
++
)
{
...
...
@@ -1262,7 +1267,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
&
cell_cache
->
vy
[
pjd
],
&
cell_cache
->
vz
[
pjd
],
&
cell_cache
->
rho
[
pjd
],
&
cell_cache
->
grad_h
[
pjd
],
&
cell_cache
->
pOrho2
[
pjd
],
&
cell_cache
->
balsara
[
pjd
],
&
cell_cache
->
soundspeed
[
pjd
],
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
&
v_a_hydro_xSum
,
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
a
,
H
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doi_mask
);
}
...
...
@@ -1988,6 +1993,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
const
struct
engine
*
restrict
e
=
r
->
e
;
const
struct
cosmology
*
restrict
cosmo
=
e
->
cosmology
;
const
timebin_t
max_active_bin
=
e
->
max_active_bin
;
TIMER_TIC
;
...
...
@@ -2019,6 +2025,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const
int
active_ci
=
cell_is_active_hydro
(
ci
,
e
)
&&
ci_local
;
const
int
active_cj
=
cell_is_active_hydro
(
cj
,
e
)
&&
cj_local
;
/* Cosmological terms */
const
float
a
=
cosmo
->
a
;
const
float
H
=
cosmo
->
H
;
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
for
(
int
pid
=
0
;
pid
<
count_i
;
pid
++
)
...
...
@@ -2213,7 +2223,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&
cj_cache
->
grad_h
[
cj_cache_idx
],
&
cj_cache
->
pOrho2
[
cj_cache_idx
],
&
cj_cache
->
balsara
[
cj_cache_idx
],
&
cj_cache
->
soundspeed
[
cj_cache_idx
],
&
cj_cache
->
m
[
cj_cache_idx
],
v_hi_inv
,
v_hj_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
v_hi_inv
,
v_hj_inv
,
a
,
H
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doi_mask
);
}
...
...
@@ -2349,7 +2359,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&
ci_cache
->
grad_h
[
ci_cache_idx
],
&
ci_cache
->
pOrho2
[
ci_cache_idx
],
&
ci_cache
->
balsara
[
ci_cache_idx
],
&
ci_cache
->
soundspeed
[
ci_cache_idx
],
&
ci_cache
->
m
[
ci_cache_idx
],
v_hj_inv
,
v_hi_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
v_hj_inv
,
v_hi_inv
,
a
,
H
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doj_mask
);
}
...
...
This diff is collapsed.
Click to expand it.
tests/testInteractions.c
+
2
−
2
View file @
11f5d292
...
...
@@ -610,7 +610,7 @@ void test_force_interactions(struct part test_part, struct part *parts,
(
viz_vec
),
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
,
&
(
vjxq
[
i
]),
&
(
vjyq
[
i
]),
&
(
vjzq
[
i
]),
&
(
rhojq
[
i
]),
&
(
grad_hjq
[
i
]),
&
(
pOrhoj2q
[
i
]),
&
(
balsarajq
[
i
]),
&
(
cjq
[
i
]),
&
(
mjq
[
i
]),
hi_inv_vec
,
&
(
hj_invq
[
i
]),
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
(
mjq
[
i
]),
hi_inv_vec
,
&
(
hj_invq
[
i
]),
a
,
H
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
mask
,
mask2
,
0
);
}
else
{
/* Only use one vector for interaction. */
...
...
@@ -626,7 +626,7 @@ void test_force_interactions(struct part test_part, struct part *parts,
&
my_r2
,
&
my_dx
,
&
my_dy
,
&
my_dz
,
vix_vec
,
viy_vec
,
viz_vec
,
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
,
&
(
vjxq
[
i
]),
&
(
vjyq
[
i
]),
&
(
vjzq
[
i
]),
&
(
rhojq
[
i
]),
&
(
grad_hjq
[
i
]),
&
(
pOrhoj2q
[
i
]),
&
(
balsarajq
[
i
]),
&
(
cjq
[
i
]),
&
(
mjq
[
i
]),
hi_inv_vec
,
hj_inv
,
&
(
balsarajq
[
i
]),
&
(
cjq
[
i
]),
&
(
mjq
[
i
]),
hi_inv_vec
,
hj_inv
,
a
,
H
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
mask
);
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment