Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
7dd6e919
Commit
7dd6e919
authored
Aug 22, 2017
by
James Willis
Browse files
Formatting.
parent
71dde066
Changes
5
Hide whitespace changes
Inline
Side-by-side
src/cache.h
View file @
7dd6e919
...
...
@@ -64,7 +64,7 @@ struct cache {
/* Maximum index into neighbouring cell for particles that are in range. */
int
*
restrict
max_index
SWIFT_CACHE_ALIGN
;
/* Particle density. */
float
*
restrict
rho
SWIFT_CACHE_ALIGN
;
...
...
@@ -178,10 +178,14 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
error
+=
posix_memalign
((
void
**
)
&
c
->
max_index
,
SWIFT_CACHE_ALIGNMENT
,
sizeIntBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
rho
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
balsara
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
balsara
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
if
(
error
!=
0
)
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
...
...
@@ -235,7 +239,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
}
/**
* @brief Populate cache for force interactions by reading in the particles in unsorted order.
* @brief Populate cache for force interactions by reading in the particles in
* unsorted order.
*
* @param ci The #cell.
* @param ci_cache The cache.
...
...
@@ -257,10 +262,14 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
rho
,
ci_cache
->
rho
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
grad_h
,
ci_cache
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
pOrho2
,
ci_cache
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
balsara
,
ci_cache
->
balsara
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
soundspeed
,
ci_cache
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
grad_h
,
ci_cache
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
pOrho2
,
ci_cache
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
balsara
,
ci_cache
->
balsara
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
soundspeed
,
ci_cache
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
double
loc
[
3
];
...
...
@@ -280,7 +289,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
vx
[
i
]
=
parts
[
i
].
v
[
0
];
vy
[
i
]
=
parts
[
i
].
v
[
1
];
vz
[
i
]
=
parts
[
i
].
v
[
2
];
rho
[
i
]
=
parts
[
i
].
rho
;
grad_h
[
i
]
=
parts
[
i
].
force
.
f
;
pOrho2
[
i
]
=
parts
[
i
].
force
.
P_over_rho2
;
...
...
src/hydro/Gadget2/hydro_iact.h
View file @
7dd6e919
...
...
@@ -1168,7 +1168,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
}
#ifdef WITH_VECTORIZATION
static
const
vector
const_viscosity_alpha_fac
=
FILL_VEC
(
-
0
.
25
f
*
const_viscosity_alpha
);
static
const
vector
const_viscosity_alpha_fac
=
FILL_VEC
(
-
0
.
25
f
*
const_viscosity_alpha
);
/**
* @brief Force interaction computed using 1 vector
...
...
@@ -1177,12 +1178,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_1_vec_force
(
vector
*
r2
,
vector
*
dx
,
vector
*
dy
,
vector
*
dz
,
vector
vix
,
vector
viy
,
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_
x
Sum
,
vector
*
a_hydro_
y
Sum
,
vector
*
a_hydro_z
Sum
,
vector
*
h_dt
Sum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
)
{
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_
y
Sum
,
vector
*
a_hydro_
z
Sum
,
vector
*
h_dt
Sum
,
vector
*
v_sig
Sum
,
vector
*
entropy_dtSum
,
mask_t
mask
)
{
#ifdef WITH_VECTORIZATION
...
...
@@ -1246,21 +1247,28 @@ runner_iact_nonsym_1_vec_force(
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_setzero
());
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij
.
v
,
vec_add
(
ci
.
v
,
cj
.
v
));
/* Now construct the full viscosity term */
rho_ij
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_add
(
pirho
.
v
,
pjrho
.
v
));
visc
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig
.
v
,
vec_mul
(
mu_ij
.
v
,
balsara
.
v
))),
rho_ij
.
v
);
visc
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig
.
v
,
vec_mul
(
mu_ij
.
v
,
balsara
.
v
))),
rho_ij
.
v
);
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc
.
v
,
vec_mul
(
vec_add
(
wi_dr
.
v
,
wj_dr
.
v
),
ri
.
v
)));
visc_term
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc
.
v
,
vec_mul
(
vec_add
(
wi_dr
.
v
,
wj_dr
.
v
),
ri
.
v
)));
sph_term
.
v
=
vec_mul
(
vec_fma
(
vec_mul
(
grad_hi
.
v
,
piPOrho2
.
v
),
wi_dr
.
v
,
vec_mul
(
grad_hj
.
v
,
vec_mul
(
pjPOrho2
.
v
,
wj_dr
.
v
))),
ri
.
v
);
vec_mul
(
vec_fma
(
vec_mul
(
grad_hi
.
v
,
piPOrho2
.
v
),
wi_dr
.
v
,
vec_mul
(
grad_hj
.
v
,
vec_mul
(
pjPOrho2
.
v
,
wj_dr
.
v
))),
ri
.
v
);
/* Eventually get the acceleration */
acc
.
v
=
vec_add
(
visc_term
.
v
,
sph_term
.
v
);
...
...
@@ -1270,7 +1278,8 @@ runner_iact_nonsym_1_vec_force(
piaz
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
dz
->
v
,
acc
.
v
));
/* Get the time derivative for h. */
pih_dt
.
v
=
vec_div
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
vec_mul
(
ri
.
v
,
wi_dr
.
v
))),
pjrho
.
v
);
pih_dt
.
v
=
vec_div
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
vec_mul
(
ri
.
v
,
wi_dr
.
v
))),
pjrho
.
v
);
/* Change in entropy */
entropy_dt
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
visc_term
.
v
,
dvdr
.
v
));
...
...
@@ -1299,13 +1308,12 @@ runner_iact_nonsym_1_vec_force(
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_2_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
vix
,
vector
viy
,
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
,
mask_t
mask_2
,
short
mask_cond
)
{
vector
viz
,
vector
pirho
,
vector
grad_hi
,
vector
piPOrho2
,
vector
balsara_i
,
vector
ci
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Pjrho
,
float
*
Grad_hj
,
float
*
PjPOrho2
,
float
*
Balsara_j
,
float
*
Cj
,
float
*
Mj
,
vector
hi_inv
,
float
*
Hj_inv
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
mask_t
mask
,
mask_t
mask_2
,
short
mask_cond
)
{
#ifdef WITH_VECTORIZATION
...
...
@@ -1408,18 +1416,20 @@ runner_iact_nonsym_2_vec_force(
dvy_2
.
v
=
vec_sub
(
viy
.
v
,
vjy_2
.
v
);
dvz
.
v
=
vec_sub
(
viz
.
v
,
vjz
.
v
);
dvz_2
.
v
=
vec_sub
(
viz
.
v
,
vjz_2
.
v
);
/* Compute dv dot r. */
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
.
v
,
vec_fma
(
dvy
.
v
,
dy
.
v
,
vec_mul
(
dvz
.
v
,
dz
.
v
)));
dvdr_2
.
v
=
vec_fma
(
dvx_2
.
v
,
dx_2
.
v
,
vec_fma
(
dvy_2
.
v
,
dy_2
.
v
,
vec_mul
(
dvz_2
.
v
,
dz_2
.
v
)));
dvdr_2
.
v
=
vec_fma
(
dvx_2
.
v
,
dx_2
.
v
,
vec_fma
(
dvy_2
.
v
,
dy_2
.
v
,
vec_mul
(
dvz_2
.
v
,
dz_2
.
v
)));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_setzero
());
omega_ij_2
.
v
=
vec_fmin
(
dvdr_2
.
v
,
vec_setzero
());
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
mu_ij_2
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri_2
.
v
,
omega_ij_2
.
v
));
/* This is 0 or negative */
mu_ij
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri
.
v
,
omega_ij
.
v
));
/* This is 0 or negative */
mu_ij_2
.
v
=
vec_mul
(
fac_mu
.
v
,
vec_mul
(
ri_2
.
v
,
omega_ij_2
.
v
));
/* This is 0 or negative */
/* Compute signal velocity */
v_sig
.
v
=
vec_fnma
(
vec_set1
(
3
.
f
),
mu_ij
.
v
,
vec_add
(
ci
.
v
,
cj
.
v
));
...
...
@@ -1429,19 +1439,33 @@ runner_iact_nonsym_2_vec_force(
rho_ij
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_add
(
pirho
.
v
,
pjrho
.
v
));
rho_ij_2
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_add
(
pirho
.
v
,
pjrho_2
.
v
));
visc
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig
.
v
,
vec_mul
(
mu_ij
.
v
,
balsara
.
v
))),
rho_ij
.
v
);
visc_2
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig_2
.
v
,
vec_mul
(
mu_ij_2
.
v
,
balsara_2
.
v
))),
rho_ij_2
.
v
);
visc
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig
.
v
,
vec_mul
(
mu_ij
.
v
,
balsara
.
v
))),
rho_ij
.
v
);
visc_2
.
v
=
vec_div
(
vec_mul
(
const_viscosity_alpha_fac
.
v
,
vec_mul
(
v_sig_2
.
v
,
vec_mul
(
mu_ij_2
.
v
,
balsara_2
.
v
))),
rho_ij_2
.
v
);
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc
.
v
,
vec_mul
(
vec_add
(
wi_dr
.
v
,
wj_dr
.
v
),
ri
.
v
)));
visc_term_2
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc_2
.
v
,
vec_mul
(
vec_add
(
wi_dr_2
.
v
,
wj_dr_2
.
v
),
ri_2
.
v
)));
visc_term
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc
.
v
,
vec_mul
(
vec_add
(
wi_dr
.
v
,
wj_dr
.
v
),
ri
.
v
)));
visc_term_2
.
v
=
vec_mul
(
vec_set1
(
0
.
5
f
),
vec_mul
(
visc_2
.
v
,
vec_mul
(
vec_add
(
wi_dr_2
.
v
,
wj_dr_2
.
v
),
ri_2
.
v
)));
vector
grad_hi_mul_piPOrho2
;
grad_hi_mul_piPOrho2
.
v
=
vec_mul
(
grad_hi
.
v
,
piPOrho2
.
v
);
sph_term
.
v
=
vec_mul
(
vec_fma
(
grad_hi_mul_piPOrho2
.
v
,
wi_dr
.
v
,
vec_mul
(
grad_hj
.
v
,
vec_mul
(
pjPOrho2
.
v
,
wj_dr
.
v
))),
ri
.
v
);
sph_term_2
.
v
=
vec_mul
(
vec_fma
(
grad_hi_mul_piPOrho2
.
v
,
wi_dr_2
.
v
,
vec_mul
(
grad_hj_2
.
v
,
vec_mul
(
pjPOrho2_2
.
v
,
wj_dr_2
.
v
))),
ri_2
.
v
);
vec_mul
(
vec_fma
(
grad_hi_mul_piPOrho2
.
v
,
wi_dr
.
v
,
vec_mul
(
grad_hj
.
v
,
vec_mul
(
pjPOrho2
.
v
,
wj_dr
.
v
))),
ri
.
v
);
sph_term_2
.
v
=
vec_mul
(
vec_fma
(
grad_hi_mul_piPOrho2
.
v
,
wi_dr_2
.
v
,
vec_mul
(
grad_hj_2
.
v
,
vec_mul
(
pjPOrho2_2
.
v
,
wj_dr_2
.
v
))),
ri_2
.
v
);
/* Eventually get the acceleration */
acc
.
v
=
vec_add
(
visc_term
.
v
,
sph_term
.
v
);
...
...
@@ -1456,8 +1480,11 @@ runner_iact_nonsym_2_vec_force(
piaz_2
.
v
=
vec_mul
(
mj_2
.
v
,
vec_mul
(
dz_2
.
v
,
acc_2
.
v
));
/* Get the time derivative for h. */
pih_dt
.
v
=
vec_div
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
vec_mul
(
ri
.
v
,
wi_dr
.
v
))),
pjrho
.
v
);
pih_dt_2
.
v
=
vec_div
(
vec_mul
(
mj_2
.
v
,
vec_mul
(
dvdr_2
.
v
,
vec_mul
(
ri_2
.
v
,
wi_dr_2
.
v
))),
pjrho_2
.
v
);
pih_dt
.
v
=
vec_div
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
vec_mul
(
ri
.
v
,
wi_dr
.
v
))),
pjrho
.
v
);
pih_dt_2
.
v
=
vec_div
(
vec_mul
(
mj_2
.
v
,
vec_mul
(
dvdr_2
.
v
,
vec_mul
(
ri_2
.
v
,
wi_dr_2
.
v
))),
pjrho_2
.
v
);
/* Change in entropy */
entropy_dt
.
v
=
vec_mul
(
mj
.
v
,
vec_mul
(
visc_term
.
v
,
dvdr
.
v
));
...
...
src/runner_doiact.h
View file @
7dd6e919
...
...
@@ -1039,7 +1039,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const
float
hj
=
pj
->
h
;
const
double
dj
=
sort_j
[
pjd
].
d
-
hj
*
kernel_gamma
-
dx_max
+
rshift
;
if
(
dj
-
rshift
>
di_max
)
continue
;
double
pjx
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
pjx
[
k
]
=
pj
->
x
[
k
]
+
shift
[
k
];
const
float
hjg2
=
hj
*
hj
*
kernel_gamma2
;
...
...
@@ -1451,7 +1451,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
const
float
hj
=
pj
->
h
;
const
double
dj
=
sort_j
[
pjd
].
d
-
hj
*
kernel_gamma
-
dx_max
+
rshift
;
if
(
dj
-
rshift
>
di_max
)
continue
;
double
pjx
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
pjx
[
k
]
=
pj
->
x
[
k
]
+
shift
[
k
];
const
float
hjg2
=
hj
*
hj
*
kernel_gamma2
;
...
...
src/runner_doiact_vec.c
View file @
7dd6e919
...
...
@@ -226,20 +226,27 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
}
/**
* @brief Compute the vector remainder force interactions from the secondary cache.
* @brief Compute the vector remainder force interactions from the secondary
* cache.
*
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x acceleration
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x
* acceleration
* update on pi.
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y acceleration
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y
* acceleration
* update on pi.
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z acceleration
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z
* acceleration
* update on pi.
* @param h_dtSum (return) #vector holding the cumulative sum of the time derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity update on pi.
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time derivative of the entropy
* @param h_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity
* update on pi.
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the entropy
* update on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
...
...
@@ -251,7 +258,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
* @param v_balsara_i #vector of balsara switch of pi.
* @param v_ci #vector of sound speed of pi.
* @param icount_align (return) Interaction count after the remainder
* @param num_vec_proc #int of the number of vectors to use to perform interaction.
* @param num_vec_proc #int of the number of vectors to use to perform
* interaction.
* interactions have been performed, should be a multiple of the vector length.
*/
__attribute__
((
always_inline
))
INLINE
static
void
calcRemForceInteractions
(
...
...
@@ -333,15 +341,21 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x acceleration
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x
* acceleration
* update on pi.
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y
* acceleration
* update on pi.
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y acceleration
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z
* acceleration
* update on pi.
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z acceleration
* @param h_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity
* update on pi.
* @param h_dtSum (return) #vector holding the cumulative sum of the time derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity update on pi.
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time derivative of the entropy
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the entropy
* update on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
...
...
@@ -352,7 +366,8 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
* @param v_pOrhoi2 #vector of pressure over density squared of pi.
* @param v_balsara_i #vector of balsara switch of pi.
* @param v_ci #vector of sound speed of pi.
* @param num_vec_proc #int of the number of vectors to use to perform interaction.
* @param num_vec_proc #int of the number of vectors to use to perform
* interaction.
* interactions have been performed, should be a multiple of the vector length.
*/
__attribute__
((
always_inline
))
INLINE
static
void
storeForceInteractions
(
...
...
@@ -993,17 +1008,17 @@ for (int pid = 0; pid < count; pid++) {
storeForceInteractions
(
doi_mask
,
pjd
,
&
v_r2
,
&
v_dx_tmp
,
&
v_dy_tmp
,
&
v_dz_tmp
,
cell_cache
,
&
int_cache
,
&
icount
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
2
);
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
2
);
}
}
/* Loop over all other particles. */
/* Perform padded vector remainder interactions if any are present. */
calcRemForceInteractions
(
&
int_cache
,
icount
,
&
a_hydro_
x
Sum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h
_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
icount_align
,
2
);
calcRemForceInteractions
(
&
int_cache
,
icount
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_
z
Sum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy
_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
icount_align
,
2
);
/* Initialise masks to true in case remainder interactions have been
* performed. */
...
...
@@ -1015,14 +1030,13 @@ for (int pid = 0; pid < count; pid++) {
for
(
int
pjd
=
0
;
pjd
<
icount_align
;
pjd
+=
(
2
*
VEC_SIZE
))
{
runner_iact_nonsym_2_vec_force
(
&
int_cache
.
r2q
[
pjd
],
&
int_cache
.
dxq
[
pjd
],
&
int_cache
.
dyq
[
pjd
],
&
int_cache
.
dzq
[
pjd
],
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
int_cache
.
vxq
[
pjd
],
&
int_cache
.
vyq
[
pjd
],
&
int_cache
.
vzq
[
pjd
],
&
int_cache
.
rhoq
[
pjd
],
&
int_cache
.
grad_hq
[
pjd
],
&
int_cache
.
pOrho2q
[
pjd
],
&
int_cache
.
balsaraq
[
pjd
],
&
int_cache
.
soundspeedq
[
pjd
],
&
int_cache
.
mq
[
pjd
],
v_hi_inv
,
&
int_cache
.
h_invq
[
pjd
],
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
int_mask
,
int_mask2
,
0
);
&
int_cache
.
dzq
[
pjd
],
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
int_cache
.
vxq
[
pjd
],
&
int_cache
.
vyq
[
pjd
],
&
int_cache
.
vzq
[
pjd
],
&
int_cache
.
rhoq
[
pjd
],
&
int_cache
.
grad_hq
[
pjd
],
&
int_cache
.
pOrho2q
[
pjd
],
&
int_cache
.
balsaraq
[
pjd
],
&
int_cache
.
soundspeedq
[
pjd
],
&
int_cache
.
mq
[
pjd
],
v_hi_inv
,
&
int_cache
.
h_invq
[
pjd
],
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
int_mask
,
int_mask2
,
0
);
}
VEC_HADD
(
a_hydro_xSum
,
pi
->
a_hydro
[
0
]);
...
...
@@ -1258,8 +1272,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
#ifdef SWIFT_DEBUG_CHECKS
if
(
cj_cache_idx
%
VEC_SIZE
!=
0
||
cj_cache_idx
<
0
||
cj_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
last_pj_align
+
1
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! cj_cache_idx=%d, last_pj_align=%d"
,
cj_cache_idx
,
last_pj_align
);
if
(
cj_cache_idx
%
VEC_SIZE
!=
0
||
cj_cache_idx
<
0
||
cj_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
last_pj_align
+
1
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! cj_cache_idx=%d, last_pj_align=%d"
,
cj_cache_idx
,
last_pj_align
);
}
#endif
...
...
@@ -1384,8 +1400,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
ci_cache_idx
<
ci_cache_count
;
ci_cache_idx
+=
VEC_SIZE
)
{
#ifdef SWIFT_DEBUG_CHECKS
if
(
ci_cache_idx
%
VEC_SIZE
!=
0
||
ci_cache_idx
<
0
||
ci_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
count_i
-
first_pi_align
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! ci_cache_idx=%d, first_pi_align=%d, count_i=%d"
,
ci_cache_idx
,
first_pi_align
,
count_i
);
if
(
ci_cache_idx
%
VEC_SIZE
!=
0
||
ci_cache_idx
<
0
||
ci_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
count_i
-
first_pi_align
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! ci_cache_idx=%d, first_pi_align=%d, "
"count_i=%d"
,
ci_cache_idx
,
first_pi_align
,
count_i
);
}
#endif
...
...
tests/testInteractions.c
View file @
7dd6e919
...
...
@@ -126,37 +126,29 @@ void dump_indv_particle_fields(char *fileName, struct part *p) {
FILE
*
file
=
fopen
(
fileName
,
"a"
);
fprintf
(
file
,
"%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
"%8.5f "
"%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f
\n
"
,
p
->
id
,
p
->
x
[
0
],
p
->
x
[
1
],
p
->
x
[
2
],
p
->
v
[
0
],
p
->
v
[
1
],
p
->
v
[
2
],
p
->
h
,
hydro_get_density
(
p
),
"%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
"%8.5f "
"%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f
\n
"
,
p
->
id
,
p
->
x
[
0
],
p
->
x
[
1
],
p
->
x
[
2
],
p
->
v
[
0
],
p
->
v
[
1
],
p
->
v
[
2
],
p
->
h
,
hydro_get_density
(
p
),
#if defined(MINIMAL_SPH) || defined(SHADOWFAX_SPH)
0
.
f
,
0
.
f
,
#else
p
->
density
.
div_v
,
p
->
density
.
div_v
,
#endif
hydro_get_entropy
(
p
),
hydro_get_internal_energy
(
p
),
hydro_get_pressure
(
p
),
hydro_get_soundspeed
(
p
),
p
->
a_hydro
[
0
],
p
->
a_hydro
[
1
],
p
->
a_hydro
[
2
],
p
->
force
.
h_dt
,
hydro_get_entropy
(
p
),
hydro_get_internal_energy
(
p
),
hydro_get_pressure
(
p
),
hydro_get_soundspeed
(
p
),
p
->
a_hydro
[
0
],
p
->
a_hydro
[
1
],
p
->
a_hydro
[
2
],
p
->
force
.
h_dt
,
#if defined(GADGET2_SPH)
p
->
force
.
v_sig
,
p
->
entropy_dt
,
0
.
f
p
->
force
.
v_sig
,
p
->
entropy_dt
,
0
.
f
#elif defined(DEFAULT_SPH)
p
->
force
.
v_sig
,
0
.
f
,
p
->
force
.
u_dt
p
->
force
.
v_sig
,
0
.
f
,
p
->
force
.
u_dt
#elif defined(MINIMAL_SPH)
p
->
force
.
v_sig
,
0
.
f
,
p
->
u_dt
p
->
force
.
v_sig
,
0
.
f
,
p
->
u_dt
#else
0
.
f
,
0
.
f
,
0
.
f
0
.
f
,
0
.
f
,
0
.
f
#endif
);
);
fclose
(
file
);
}
...
...
@@ -174,7 +166,7 @@ void write_header(char *fileName) {
"ID"
,
"pos_x"
,
"pos_y"
,
"pos_z"
,
"v_x"
,
"v_y"
,
"v_z"
,
"h"
,
"rho"
,
"div_v"
,
"S"
,
"u"
,
"P"
,
"c"
,
"a_x"
,
"a_y"
,
"a_z"
,
"h_dt"
,
"v_sig"
,
"dS/dt"
,
"du/dt"
);
fclose
(
file
);
}
...
...
@@ -415,8 +407,9 @@ void test_interactions(struct part test_part, struct part *parts, size_t count,
* @param runs No. of times to call interactions
*
*/
void
test_force_interactions
(
struct
part
test_part
,
struct
part
*
parts
,
size_t
count
,
char
*
filePrefix
,
int
runs
,
int
num_vec_proc
)
{
void
test_force_interactions
(
struct
part
test_part
,
struct
part
*
parts
,
size_t
count
,
char
*
filePrefix
,
int
runs
,
int
num_vec_proc
)
{
ticks
serial_time
=
0
;
ticks
vec_time
=
0
;
...
...
@@ -449,7 +442,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
float
dxq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
dyq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
dzq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
hiq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
vixq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
viyq
[
count
]
__attribute__
((
aligned
(
array_align
)));
...
...
@@ -459,7 +452,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
float
pOrhoi2q
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
balsaraiq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
ciq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
hj_invq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
mjq
[
count
]
__attribute__
((
aligned
(
array_align
)));
float
vjxq
[
count
]
__attribute__
((
aligned
(
array_align
)));
...
...
@@ -502,8 +495,8 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
#pragma novector
#endif
for
(
size_t
i
=
0
;
i
<
count
;
i
++
)
{
runner_iact_nonsym_force
(
r2
[
i
],
&
(
dx
[
3
*
i
]),
pi_serial
.
h
,
pj_serial
[
i
].
h
,
&
pi_serial
,
&
pj_serial
[
i
]);
runner_iact_nonsym_force
(
r2
[
i
],
&
(
dx
[
3
*
i
]),
pi_serial
.
h
,
pj_serial
[
i
].
h
,
&
pi_serial
,
&
pj_serial
[
i
]);
}
serial_time
+=
getticks
()
-
tic
;
}
...
...
@@ -540,7 +533,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
dxq
[
i
]
=
dx
[
0
];
dyq
[
i
]
=
dx
[
1
];
dzq
[
i
]
=
dx
[
2
];
hiq
[
i
]
=
pi_vec
.
h
;
vixq
[
i
]
=
pi_vec
.
v
[
0
];
viyq
[
i
]
=
pi_vec
.
v
[
1
];
...
...
@@ -550,7 +543,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
pOrhoi2q
[
i
]
=
pi_vec
.
force
.
P_over_rho2
;
balsaraiq
[
i
]
=
pi_vec
.
force
.
balsara
;
ciq
[
i
]
=
pi_vec
.
force
.
soundspeed
;
hj_invq
[
i
]
=
1
.
f
/
pj_vec
[
i
].
h
;
mjq
[
i
]
=
pj_vec
[
i
].
mass
;
vjxq
[
i
]
=
pj_vec
[
i
].
v
[
0
];
...
...
@@ -561,7 +554,6 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
pOrhoj2q
[
i
]
=
pj_vec
[
i
].
force
.
P_over_rho2
;
balsarajq
[
i
]
=
pj_vec
[
i
].
force
.
balsara
;
cjq
[
i
]
=
pj_vec
[
i
].
force
.
soundspeed
;
}
/* Only dump data on first run. */
...
...
@@ -572,9 +564,11 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
dump_indv_particle_fields
(
vec_filename
,
pjq
[
i
]);
}
/* Perform vector interaction. */
vector
hi_vec
,
hi_inv_vec
,
vix_vec
,
viy_vec
,
viz_vec
,
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
;
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
/* Perform vector interaction. */
vector
hi_vec
,
hi_inv_vec
,
vix_vec
,
viy_vec
,
viz_vec
,
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
;
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
a_hydro_xSum
.
v
=
vec_setzero
();
a_hydro_ySum
.
v
=
vec_setzero
();
...
...
@@ -594,20 +588,23 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
ci_vec
.
v
=
vec_load
(
&
ciq
[
0
]);
hi_inv_vec
=
vec_reciprocal
(
hi_vec
);
mask_t
mask
,
mask2
;
vec_init_mask_true
(
mask
);
vec_init_mask_true
(
mask2
);
const
ticks
vec_tic
=
getticks
();
for
(
size_t
i
=
0
;
i
<
count
;
i
+=
num_vec_proc
*
VEC_SIZE
)
{
if
(
num_vec_proc
==
2
)
{
runner_iact_nonsym_2_vec_force
(
&
(
r2q
[
i
]),
&
(
dxq
[
i
]),
&
(
dyq
[
i
]),
&
(
dzq
[
i
]),
(
vix_vec
),
(
viy_vec
),
(
viz_vec
),
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
,
&
(
vjxq
[
i
]),
&
(
vjyq
[
i
]),
&
(
vjzq
[
i
]),
&
(
rhojq
[
i
]),
&
(
grad_hjq
[
i
]),
&
(
pOrhoj2q
[
i
]),
&
(
balsarajq
[
i
]),
&
(
cjq
[
i
]),
&
(
mjq
[
i
]),
hi_inv_vec
,
&
(
hj_invq
[
i
]),
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
mask
,
mask2
,
0
);
runner_iact_nonsym_2_vec_force
(
&
(
r2q
[
i
]),
&
(
dxq
[
i
]),
&
(
dyq
[
i
]),
&
(
dzq
[
i
]),
(
vix_vec
),
(
viy_vec
),
(
viz_vec
),
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
,
&
(
vjxq
[
i
]),
&
(
vjyq
[
i
]),
&
(
vjzq
[
i
]),
&
(
rhojq
[
i
]),
&
(
grad_hjq
[
i
]),
&
(
pOrhoj2q
[
i
]),
&
(
balsarajq
[
i
]),
&
(
cjq
[
i
]),
&
(
mjq
[
i
]),
hi_inv_vec
,
&
(
hj_invq
[
i
]),
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
mask
,
mask2
,
0
);
}
else
{
/* Only use one vector for interaction. */
vector
r2
,
dx
,
dy
,
dz
;
...
...
@@ -617,11 +614,13 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
dz
.
v
=
vec_load
(
&
(
dzq
[
i
]));
runner_iact_nonsym_1_vec_force
(
&
r2
,
&
dx
,
&
dy
,
&
dz
,
vix_vec
,
viy_vec
,
viz_vec
,
rhoi_vec
,
grad_hi_vec
,
pOrhoi2_vec
,
balsara_i_vec
,
ci_vec
,
&
(
vjxq
[
i
]),
&
(
vjyq
[
i
]),
&
(
vjzq
[
i
]),
&
(
rhojq
[
i
]),
&
(
grad_hjq
[
i
]),
&
(
pOrhoj2q
[
i
]),
&
(
balsarajq
[
i
]),
&
(
cjq
[
i
]),
&
(
mjq
[
i
]),
hi_inv_vec
,
&
(
hj_invq
[
i
]),
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
mask
);