Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
eb07db4d
Commit
eb07db4d
authored
Sep 11, 2017
by
Matthieu Schaller
Browse files
Merge branch 'master' into gravity_speedup
parents
23b2fff2
55d562a1
Changes
22
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
eb07db4d
...
...
@@ -59,6 +59,11 @@ tests/brute_force_periodic_BC_perturbed.dat
tests/swift_dopair_active.dat
tests/test_nonsym_density_serial.dat
tests/test_nonsym_density_vec.dat
tests/test_nonsym_force_serial.dat
tests/test_nonsym_density_1_vec.dat
tests/test_nonsym_density_2_vec.dat
tests/test_nonsym_force_1_vec.dat
tests/test_nonsym_force_2_vec.dat
tests/testGreetings
tests/testReading
tests/input.hdf5
...
...
examples/main.c
View file @
eb07db4d
...
...
@@ -677,6 +677,7 @@ int main(int argc, char *argv[]) {
/* Write the state of the system before starting time integration. */
engine_dump_snapshot
(
&
e
);
engine_print_stats
(
&
e
);
/* Legend */
if
(
myrank
==
0
)
...
...
@@ -815,6 +816,7 @@ int main(int argc, char *argv[]) {
/* Write final output. */
engine_drift_all
(
&
e
);
engine_print_stats
(
&
e
);
engine_dump_snapshot
(
&
e
);
#ifdef WITH_MPI
...
...
src/active.h
View file @
eb07db4d
...
...
@@ -144,6 +144,14 @@ __attribute__((always_inline)) INLINE static int part_is_active(
return
(
part_bin
<=
max_active_bin
);
}
__attribute__
((
always_inline
))
INLINE
static
int
part_is_active_no_debug
(
const
struct
part
*
p
,
const
timebin_t
max_active_bin
)
{
const
timebin_t
part_bin
=
p
->
time_bin
;
return
(
part_bin
<=
max_active_bin
);
}
/**
* @brief Is this g-particle finishing its time-step now ?
*
...
...
src/cache.h
View file @
eb07db4d
...
...
@@ -65,6 +65,21 @@ struct cache {
/* Maximum index into neighbouring cell for particles that are in range. */
int
*
restrict
max_index
SWIFT_CACHE_ALIGN
;
/* Particle density. */
float
*
restrict
rho
SWIFT_CACHE_ALIGN
;
/* Particle smoothing length gradient. */
float
*
restrict
grad_h
SWIFT_CACHE_ALIGN
;
/* Pressure over density squared. */
float
*
restrict
pOrho2
SWIFT_CACHE_ALIGN
;
/* Balsara switch. */
float
*
restrict
balsara
SWIFT_CACHE_ALIGN
;
/* Particle sound speed. */
float
*
restrict
soundspeed
SWIFT_CACHE_ALIGN
;
/* Cache size. */
int
count
;
};
...
...
@@ -127,6 +142,11 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
free
(
c
->
vz
);
free
(
c
->
h
);
free
(
c
->
max_index
);
free
(
c
->
rho
);
free
(
c
->
grad_h
);
free
(
c
->
pOrho2
);
free
(
c
->
balsara
);
free
(
c
->
soundspeed
);
}
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
...
...
@@ -139,6 +159,15 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
max_index
,
SWIFT_CACHE_ALIGNMENT
,
sizeIntBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
rho
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
balsara
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
if
(
error
!=
0
)
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
...
...
@@ -191,6 +220,68 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
#endif
}
/**
* @brief Populate cache for force interactions by reading in the particles in
* unsorted order.
*
* @param ci The #cell.
* @param ci_cache The cache.
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_force_particles
(
const
struct
cell
*
restrict
const
ci
,
struct
cache
*
restrict
const
ci_cache
)
{
#if defined(GADGET2_SPH)
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr
(
float
,
x
,
ci_cache
->
x
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
y
,
ci_cache
->
y
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
z
,
ci_cache
->
z
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
h
,
ci_cache
->
h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
m
,
ci_cache
->
m
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vx
,
ci_cache
->
vx
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
rho
,
ci_cache
->
rho
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
grad_h
,
ci_cache
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
pOrho2
,
ci_cache
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
balsara
,
ci_cache
->
balsara
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
soundspeed
,
ci_cache
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]);
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
h
[
i
]
=
parts
[
i
].
h
;
m
[
i
]
=
parts
[
i
].
mass
;
vx
[
i
]
=
parts
[
i
].
v
[
0
];
vy
[
i
]
=
parts
[
i
].
v
[
1
];
vz
[
i
]
=
parts
[
i
].
v
[
2
];
rho
[
i
]
=
parts
[
i
].
rho
;
grad_h
[
i
]
=
parts
[
i
].
force
.
f
;
pOrho2
[
i
]
=
parts
[
i
].
force
.
P_over_rho2
;
balsara
[
i
]
=
parts
[
i
].
force
.
balsara
;
soundspeed
[
i
]
=
parts
[
i
].
force
.
soundspeed
;
}
#endif
}
/**
* @brief Populate caches by only reading particles that are within range of
* each other within the adjoining cell.Also read the particles into the cache
...
...
src/engine.c
View file @
eb07db4d
...
...
@@ -3378,6 +3378,9 @@ void engine_print_stats(struct engine *e) {
struct
statistics
global_stats
=
stats
;
#endif
/* Finalize operations */
stats_finalize
(
&
stats
);
/* Print info */
if
(
e
->
nodeID
==
0
)
stats_print_to_file
(
e
->
file_stats
,
&
global_stats
,
e
->
time
);
...
...
@@ -4372,7 +4375,7 @@ void engine_init(struct engine *e, struct space *s,
e
->
file_timesteps
=
NULL
;
e
->
deltaTimeStatistics
=
parser_get_param_double
(
params
,
"Statistics:delta_time"
);
e
->
timeLastStatistics
=
e
->
timeBegin
-
e
->
deltaTimeStatistics
;
e
->
timeLastStatistics
=
0
;
e
->
verbose
=
verbose
;
e
->
count_step
=
0
;
e
->
wallclock_time
=
0
.
f
;
...
...
@@ -4540,10 +4543,10 @@ void engine_init(struct engine *e, struct space *s,
e
->
file_stats
=
fopen
(
energyfileName
,
"w"
);
fprintf
(
e
->
file_stats
,
"#%14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
"%14s %14s %14s
\n
"
,
"%14s %14s
%14s %14s %14s
%14s
\n
"
,
"Time"
,
"Mass"
,
"E_tot"
,
"E_kin"
,
"E_int"
,
"E_pot"
,
"E_pot_self"
,
"E_pot_ext"
,
"E_radcool"
,
"Entropy"
,
"p_x"
,
"p_y"
,
"p_z"
,
"ang_x"
,
"ang_y"
,
"ang_z"
);
"ang_y"
,
"ang_z"
,
"com_x"
,
"com_y"
,
"com_z"
);
fflush
(
e
->
file_stats
);
char
timestepsfileName
[
200
]
=
""
;
...
...
src/engine.h
View file @
eb07db4d
...
...
@@ -261,6 +261,7 @@ void engine_unskip(struct engine *e);
void
engine_drift_all
(
struct
engine
*
e
);
void
engine_drift_top_multipoles
(
struct
engine
*
e
);
void
engine_reconstruct_multipoles
(
struct
engine
*
e
);
void
engine_print_stats
(
struct
engine
*
e
);
void
engine_dump_snapshot
(
struct
engine
*
e
);
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
const
struct
swift_params
*
params
,
int
nr_nodes
,
int
nodeID
,
...
...
src/hydro/Gadget2/hydro_iact.h
View file @
eb07db4d
...
...
@@ -1167,4 +1167,365 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
#endif
}
#ifdef WITH_VECTORIZATION
static
const
vector
const_viscosity_alpha_fac
=
FILL_VEC
(
-
0
.
25
f
*
const_viscosity_alpha
);
/**
* @brief Force interaction computed using 1 vector
* (non-symmetric vectorized version).
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_1_vec_force
(
vector
*
r2
,
vector
*
dx
,
vector
*
dy
,
vector
*
dz
,
vector
vix
,
vector
viy
,
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
vector
hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
)
{
#ifdef WITH_VECTORIZATION
vector
r
,
ri
;
vector
vjx
,
vjy
,
vjz
,
dvx
,
dvy
,
dvz
;
vector
pjrho
,
grad_hj
,
pjPOrho2
,
balsara_j
,
cj
,
mj
;
vector
xi
,
xj
;
vector
hid_inv
,
hjd_inv
;
vector
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
piax
,
piay
,
piaz
;
vector
pih_dt
;
vector
v_sig
;
vector
omega_ij
,
mu_ij
,
fac_mu
,
balsara
;
vector
rho_ij
,
visc
,
visc_term
,
sph_term
,
acc
,
entropy_dt
;
/* Fill vectors. */
vjx
.
v
=
vec_load
(
Vjx
);
vjy
.
v
=
vec_load
(
Vjy
);
vjz
.
v
=
vec_load
(
Vjz
);
mj
.
v
=
vec_load
(
Mj
);
pjrho
.
v
=
vec_load
(
Pjrho
);
grad_hj
.
v
=
vec_load
(
Grad_hj
);
pjPOrho2
.
v
=
vec_load
(
PjPOrho2
);
balsara_j
.
v
=
vec_load
(
Balsara_j
);
cj
.
v
=
vec_load
(
Cj
);
fac_mu
.
v
=
vec_set1
(
1
.
f
);
/* Will change with cosmological integration */
/* Load stuff. */
balsara
.
v
=
vec_add
(
balsara_i
.
v
,
balsara_j
.
v
);
/* Get the radius and inverse radius. */
ri
=
vec_reciprocal_sqrt
(
*
r2
);
r
.
v
=
vec_mul
(
r2
->
v
,
ri
.
v
);
/* Get the kernel for hi. */
hid_inv
=
pow_dimension_plus_one_vec
(
hi_inv
);
xi
.
v
=
vec_mul
(
r
.
v
,
hi_inv
.
v
);
kernel_eval_dWdx_force_vec
(
&
xi
,
&
wi_dx
);
wi_dr
.
v
=
vec_mul
(
hid_inv
.
v
,
wi_dx
.
v
);
/* Get the kernel for hj. */
hjd_inv
=
pow_dimension_plus_one_vec
(
hj_inv
);
xj
.
v
=
vec_mul
(
r
.
v
,
hj_inv
.
v
);
/* Calculate the kernel for two particles. */
kernel_eval_dWdx_force_vec
(
&
xj
,
&
wj_dx
);
wj_dr
.
v
=
vec_mul
(
hjd_inv
.
v
,
wj_dx
.
v
);
/* Compute dv. */
dvx
.
v
=
vec_sub
(
vix
.
v
,
vjx
.
v
);
dvy
.
v
=
vec_sub
(
viy
.
v
,
vjy
.
v
);
dvz
.
v
=
vec_sub
(
viz
.
v
,
vjz
.
v
);
/* Compute dv dot r. */
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
->
v
,
vec_fma
(
dvy
.
v
,
dy
->
v
,
vec_mul
(
dvz
.
v
,
dz
->
v
)));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_setzero
());
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij
.
v
,
vec_add
(
ci
.
v
,
cj
.
v
));
/* Now construct the full viscosity term */
rho_ij
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_add
(
pirho
.
v
,
pjrho
.
v
));
visc
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig
.
v
,
vec_mul
(
mu_ij
.
v
,
balsara
.
v
))),
rho_ij
.
v
);
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc
.
v
,
vec_mul
(
vec_add
(
wi_dr
.
v
,
wj_dr
.
v
),
ri
.
v
)));
sph_term
.
v
=
vec_mul
(
vec_fma
(
vec_mul
(
grad_hi
.
v
,
piPOrho2
.
v
),
wi_dr
.
v
,
vec_mul
(
grad_hj
.
v
,
vec_mul
(
pjPOrho2
.
v
,
wj_dr
.
v
))),
ri
.
v
);
/* Eventually get the acceleration */
acc
.
v
=
vec_add
(
visc_term
.
v
,
sph_term
.
v
);
/* Use the force, Luke! */
piax
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dx
->
v
,
acc
.
v
));
piay
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dy
->
v
,
acc
.
v
));
piaz
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dz
->
v
,
acc
.
v
));
/* Get the time derivative for h. */
pih_dt
.
v
=
vec_div
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
vec_mul
(
ri
.
v
,
wi_dr
.
v
))),
pjrho
.
v
);
/* Change in entropy */
entropy_dt
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
visc_term
.
v
,
dvdr
.
v
));
/* Store the forces back on the particles. */
a_hydro_xSum
->
v
=
vec_mask_sub
(
a_hydro_xSum
->
v
,
piax
.
v
,
mask
);
a_hydro_ySum
->
v
=
vec_mask_sub
(
a_hydro_ySum
->
v
,
piay
.
v
,
mask
);
a_hydro_zSum
->
v
=
vec_mask_sub
(
a_hydro_zSum
->
v
,
piaz
.
v
,
mask
);
h_dtSum
->
v
=
vec_mask_sub
(
h_dtSum
->
v
,
pih_dt
.
v
,
mask
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and_mask
(
v_sig
.
v
,
mask
));
entropy_dtSum
->
v
=
vec_mask_add
(
entropy_dtSum
->
v
,
entropy_dt
.
v
,
mask
);
#else
error
(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used."
);
#endif
}
/**
* @brief Force interaction computed using 2 interleaved vectors
* (non-symmetric vectorized version).
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_2_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
vix
,
vector
viy
,
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
,
mask_t
mask_2
,
short
mask_cond
)
{
#ifdef WITH_VECTORIZATION
vector
r
,
r2
,
ri
;
vector
dx
,
dy
,
dz
,
dvx
,
dvy
,
dvz
;
vector
vjx
,
vjy
,
vjz
;
vector
pjrho
,
grad_hj
,
pjPOrho2
,
balsara_j
,
cj
,
mj
,
hj_inv
;
vector
ui
,
uj
;
vector
hid_inv
,
hjd_inv
;
vector
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
piax
,
piay
,
piaz
;
vector
pih_dt
;
vector
v_sig
;
vector
omega_ij
,
mu_ij
,
fac_mu
,
balsara
;
vector
rho_ij
,
visc
,
visc_term
,
sph_term
,
acc
,
entropy_dt
;
vector
r_2
,
r2_2
,
ri_2
;
vector
dx_2
,
dy_2
,
dz_2
,
dvx_2
,
dvy_2
,
dvz_2
;
vector
vjx_2
,
vjy_2
,
vjz_2
;
vector
pjrho_2
,
grad_hj_2
,
pjPOrho2_2
,
balsara_j_2
,
cj_2
,
mj_2
,
hj_inv_2
;
vector
ui_2
,
uj_2
;
vector
hjd_inv_2
;
vector
wi_dx_2
,
wj_dx_2
,
wi_dr_2
,
wj_dr_2
,
dvdr_2
;
vector
piax_2
,
piay_2
,
piaz_2
;
vector
pih_dt_2
;
vector
v_sig_2
;
vector
omega_ij_2
,
mu_ij_2
,
balsara_2
;
vector
rho_ij_2
,
visc_2
,
visc_term_2
,
sph_term_2
,
acc_2
,
entropy_dt_2
;
/* Fill vectors. */
mj
.
v
=
vec_load
(
Mj
);
mj_2
.
v
=
vec_load
(
&
Mj
[
VEC_SIZE
]);
vjx
.
v
=
vec_load
(
Vjx
);
vjx_2
.
v
=
vec_load
(
&
Vjx
[
VEC_SIZE
]);
vjy
.
v
=
vec_load
(
Vjy
);
vjy_2
.
v
=
vec_load
(
&
Vjy
[
VEC_SIZE
]);
vjz
.
v
=
vec_load
(
Vjz
);
vjz_2
.
v
=
vec_load
(
&
Vjz
[
VEC_SIZE
]);
dx
.
v
=
vec_load
(
Dx
);
dx_2
.
v
=
vec_load
(
&
Dx
[
VEC_SIZE
]);
dy
.
v
=
vec_load
(
Dy
);
dy_2
.
v
=
vec_load
(
&
Dy
[
VEC_SIZE
]);
dz
.
v
=
vec_load
(
Dz
);
dz_2
.
v
=
vec_load
(
&
Dz
[
VEC_SIZE
]);
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
r2_2
.
v
=
vec_load
(
&
R2
[
VEC_SIZE
]);
ri
=
vec_reciprocal_sqrt
(
r2
);
ri_2
=
vec_reciprocal_sqrt
(
r2_2
);
r
.
v
=
vec_mul
(
r2
.
v
,
ri
.
v
);
r_2
.
v
=
vec_mul
(
r2_2
.
v
,
ri_2
.
v
);
/* Get remaining properties. */
pjrho
.
v
=
vec_load
(
Pjrho
);
pjrho_2
.
v
=
vec_load
(
&
Pjrho
[
VEC_SIZE
]);
grad_hj
.
v
=
vec_load
(
Grad_hj
);
grad_hj_2
.
v
=
vec_load
(
&
Grad_hj
[
VEC_SIZE
]);
pjPOrho2
.
v
=
vec_load
(
PjPOrho2
);
pjPOrho2_2
.
v
=
vec_load
(
&
PjPOrho2
[
VEC_SIZE
]);
balsara_j
.
v
=
vec_load
(
Balsara_j
);
balsara_j_2
.
v
=
vec_load
(
&
Balsara_j
[
VEC_SIZE
]);
cj
.
v
=
vec_load
(
Cj
);
cj_2
.
v
=
vec_load
(
&
Cj
[
VEC_SIZE
]);
hj_inv
.
v
=
vec_load
(
Hj_inv
);
hj_inv_2
.
v
=
vec_load
(
&
Hj_inv
[
VEC_SIZE
]);
fac_mu
.
v
=
vec_set1
(
1
.
f
);
/* Will change with cosmological integration */
/* Find the balsara switch. */
balsara
.
v
=
vec_add
(
balsara_i
.
v
,
balsara_j
.
v
);
balsara_2
.
v
=
vec_add
(
balsara_i
.
v
,
balsara_j_2
.
v
);
/* Get the kernel for hi. */
hid_inv
=
pow_dimension_plus_one_vec
(
hi_inv
);
ui
.
v
=
vec_mul
(
r
.
v
,
hi_inv
.
v
);
ui_2
.
v
=
vec_mul
(
r_2
.
v
,
hi_inv
.
v
);
kernel_eval_dWdx_force_vec
(
&
ui
,
&
wi_dx
);
kernel_eval_dWdx_force_vec
(
&
ui_2
,
&
wi_dx_2
);
wi_dr
.
v
=
vec_mul
(
hid_inv
.
v
,
wi_dx
.
v
);
wi_dr_2
.
v
=
vec_mul
(
hid_inv
.
v
,
wi_dx_2
.
v
);
/* Get the kernel for hj. */
hjd_inv
=
pow_dimension_plus_one_vec
(
hj_inv
);
hjd_inv_2
=
pow_dimension_plus_one_vec
(
hj_inv_2
);
uj
.
v
=
vec_mul
(
r
.
v
,
hj_inv
.
v
);
uj_2
.
v
=
vec_mul
(
r_2
.
v
,
hj_inv_2
.
v
);
/* Calculate the kernel for two particles. */
kernel_eval_dWdx_force_vec
(
&
uj
,
&
wj_dx
);
kernel_eval_dWdx_force_vec
(
&
uj_2
,
&
wj_dx_2
);
wj_dr
.
v
=
vec_mul
(
hjd_inv
.
v
,
wj_dx
.
v
);
wj_dr_2
.
v
=
vec_mul
(
hjd_inv_2
.
v
,
wj_dx_2
.
v
);
/* Compute dv. */
dvx
.
v
=
vec_sub
(
vix
.
v
,
vjx
.
v
);
dvx_2
.
v
=
vec_sub
(
vix
.
v
,
vjx_2
.
v
);
dvy
.
v
=
vec_sub
(
viy
.
v
,
vjy
.
v
);
dvy_2
.
v
=
vec_sub
(
viy
.
v
,
vjy_2
.
v
);
dvz
.
v
=
vec_sub
(
viz
.
v
,
vjz
.
v
);
dvz_2
.
v
=
vec_sub
(
viz
.
v
,
vjz_2
.
v
);
/* Compute dv dot r. */
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
.
v
,
vec_fma
(
dvy
.
v
,
dy
.
v
,
vec_mul
(
dvz
.
v
,
dz
.
v
)));
dvdr_2
.
v
=
vec_fma
(
dvx_2
.
v
,
dx_2
.
v
,
vec_fma
(
dvy_2
.
v
,
dy_2
.
v
,
vec_mul
(
dvz_2
.
v
,
dz_2
.
v
)));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_setzero
());
omega_ij_2
.
v
=
vec_fmin
(
dvdr_2
.
v
,
vec_setzero
());
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
mu_ij_2
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri_2
.
v
,
omega_ij_2
.
v
));
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij
.
v
,
vec_add
(
ci
.
v
,
cj
.
v
));
v_sig_2
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij_2
.
v
,
vec_add
(
ci
.
v
,
cj_2
.
v
));
/* Now construct the full viscosity term */
rho_ij
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_add
(
pirho
.
v
,
pjrho
.
v
));
rho_ij_2
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_add
(
pirho
.
v
,
pjrho_2
.
v
));
visc
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig
.
v
,
vec_mul
(
mu_ij
.
v
,
balsara
.
v
))),
rho_ij
.
v
);
visc_2
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig_2
.
v
,
vec_mul
(
mu_ij_2
.
v
,
balsara_2
.
v
))),
rho_ij_2
.
v
);
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc
.
v
,
vec_mul
(
vec_add
(
wi_dr
.
v
,
wj_dr
.
v
),
ri
.
v
)));
visc_term_2
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc_2
.
v
,
vec_mul
(
vec_add
(
wi_dr_2
.
v
,
wj_dr_2
.
v
),
ri_2
.
v
)));
vector
grad_hi_mul_piPOrho2
;
grad_hi_mul_piPOrho2
.
v
=
vec_mul
(
grad_hi
.
v
,
piPOrho2
.
v
);
sph_term
.
v
=
vec_mul
(
vec_fma
(
grad_hi_mul_piPOrho2
.
v
,
wi_dr
.
v
,
vec_mul
(
grad_hj
.
v
,
vec_mul
(
pjPOrho2
.
v
,
wj_dr
.
v
))),
ri
.
v
);
sph_term_2
.
v
=
vec_mul
(
vec_fma
(
grad_hi_mul_piPOrho2
.
v
,
wi_dr_2
.
v
,
vec_mul
(
grad_hj_2
.
v
,
vec_mul
(
pjPOrho2_2
.
v
,
wj_dr_2
.
v
))),
ri_2
.
v
);
/* Eventually get the acceleration */
acc
.
v
=
vec_add
(
visc_term
.
v
,
sph_term
.
v
);
acc_2
.
v
=
vec_add
(
visc_term_2
.
v
,
sph_term_2
.
v
);
/* Use the force, Luke! */
piax
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dx
.
v
,
acc
.
v
));
piax_2
.
v
=
vec_mul
(
mj_2
.
v
,
vec_mul
(
dx_2
.
v
,
acc_2
.
v
));
piay
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dy
.
v
,
acc
.
v
));
piay_2
.
v
=
vec_mul
(
mj_2
.
v
,
vec_mul
(
dy_2
.
v
,
acc_2
.
v
));
piaz
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dz
.
v
,
acc
.
v
));
piaz_2
.
v
=
vec_mul
(
mj_2
.
v
,
vec_mul
(
dz_2
.
v
,
acc_2
.
v
));
/* Get the time derivative for h. */
pih_dt
.
v
=
vec_div
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
vec_mul
(
ri
.
v
,
wi_dr
.
v
))),
pjrho
.
v
);
pih_dt_2
.
v
=
vec_div
(
vec_mul
(
mj_2
.
v
,
vec_mul
(
dvdr_2
.
v
,
vec_mul
(
ri_2
.
v
,
wi_dr_2
.
v
))),
pjrho_2
.
v
);
/* Change in entropy */
entropy_dt
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
visc_term
.
v
,
dvdr
.
v
));
entropy_dt_2
.
v
=
vec_mul
(
mj_2
.
v
,
vec_mul
(
visc_term_2
.
v
,
dvdr_2
.
v
));
/* Store the forces back on the particles. */
if
(
mask_cond
)
{
a_hydro_xSum
->
v
=
vec_mask_sub
(
a_hydro_xSum
->
v
,
piax
.
v
,
mask
);
a_hydro_xSum
->
v
=
vec_mask_sub
(
a_hydro_xSum
->
v
,
piax_2
.
v
,
mask_2
);
a_hydro_ySum
->
v
=
vec_mask_sub
(
a_hydro_ySum
->
v
,
piay
.
v
,
mask
);
a_hydro_ySum
->
v
=
vec_mask_sub
(
a_hydro_ySum
->
v
,
piay_2
.
v
,
mask_2
);
a_hydro_zSum
->
v
=
vec_mask_sub
(
a_hydro_zSum
->
v
,
piaz
.
v
,
mask
);
a_hydro_zSum
->
v
=
vec_mask_sub
(
a_hydro_zSum
->
v
,
piaz_2
.
v
,
mask_2
);
h_dtSum
->
v
=
vec_mask_sub
(
h_dtSum
->
v
,
pih_dt
.
v
,
mask
);
h_dtSum
->
v
=
vec_mask_sub
(
h_dtSum
->
v
,
pih_dt_2
.
v
,
mask_2
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and_mask
(
v_sig
.
v
,
mask
));
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
vec_and_mask
(
v_sig_2
.
v
,
mask_2
));
entropy_dtSum
->
v
=
vec_mask_add
(
entropy_dtSum
->
v
,
entropy_dt
.
v
,
mask
);
entropy_dtSum
->
v
=
vec_mask_add
(
entropy_dtSum
->
v
,
entropy_dt_2
.
v
,
mask_2
);
}
else
{
a_hydro_xSum
->
v
=
vec_sub
(
a_hydro_xSum
->
v
,
piax
.
v
);
a_hydro_xSum
->
v
=
vec_sub
(
a_hydro_xSum
->
v
,
piax_2
.
v
);
a_hydro_ySum
->
v
=
vec_sub
(
a_hydro_ySum
->
v
,
piay
.
v
);
a_hydro_ySum
->
v
=
vec_sub
(
a_hydro_ySum
->
v
,
piay_2
.
v
);
a_hydro_zSum
->
v
=
vec_sub
(
a_hydro_zSum
->
v
,
piaz
.
v
);
a_hydro_zSum
->
v
=
vec_sub
(
a_hydro_zSum
->
v
,
piaz_2
.
v
);
h_dtSum
->
v
=
vec_sub
(
h_dtSum
->
v
,
pih_dt
.
v
);
h_dtSum
->
v
=
vec_sub
(
h_dtSum
->
v
,
pih_dt_2
.
v
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
v_sig
.
v
);
v_sigSum
->
v
=
vec_fmax
(
v_sigSum
->
v
,
v_sig_2
.
v
);
entropy_dtSum
->
v
=
vec_add
(
entropy_dtSum
->
v
,
entropy_dt
.
v
);
entropy_dtSum
->
v
=
vec_add
(
entropy_dtSum
->
v
,
entropy_dt_2
.
v
);
}
#else
error
(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used."
);
#endif
}
#endif
#endif
/* SWIFT_GADGET2_HYDRO_IACT_H */
src/kernel_hydro.h
View file @
eb07db4d
...
...
@@ -438,9 +438,7 @@ static const vector cond = FILL_VEC(0.5f);
/**
* @brief Computes the kernel function and its derivative for two particles
* using vectors.
*
* Return 0 if $u > \\gamma = H/h$
* using vectors. The return value is undefined if $u > \\gamma = H/h$.
*
* @param u The ratio of the distance to the smoothing length $u = x/h$.
* @param w (return) The value of the kernel function $W(x,h)$.
...
...
@@ -518,9 +516,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec(
/**
* @brief Computes the kernel function and its derivative for two particles
* using interleaved vectors.
*