Commit c46d0c91 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Some speed-up improvements in the GIZMO face slope limiter.

parent 915ad3ed
......@@ -93,7 +93,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
Wj[4] = pj->primitives.P;
/* Compute kernel of pi. */
hi_inv = 1.0 / hi;
hi_inv = 1.f / hi;
xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx);
......@@ -191,7 +191,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
hydro_slope_limit_cell_collect(pi, pj, r);
/* Compute kernel of pj. */
hj_inv = 1.0 / hj;
hj_inv = 1.f / hj;
xj = r * hj_inv;
kernel_deval(xj, &wj, &wj_dx);
......@@ -330,7 +330,7 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj,
Wj[4] = pj->primitives.P;
/* Compute kernel of pi. */
hi_inv = 1.0 / hi;
hi_inv = 1.f / hi;
xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx);
......
......@@ -58,7 +58,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
int k, l;
/* Compute density of pi. */
h_inv = 1.0 / hi;
h_inv = 1.f / hi;
xi = r * h_inv;
kernel_deval(xi, &wi, &wi_dx);
......@@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pi->geometry.centroid[2] -= dx[2] * wi;
/* Compute density of pj. */
h_inv = 1.0 / hj;
h_inv = 1.f / hj;
xj = r * h_inv;
kernel_deval(xj, &wj, &wj_dx);
......@@ -126,7 +126,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
/* Get r and r inverse. */
r = sqrtf(r2);
h_inv = 1.0 / hi;
h_inv = 1.f / hi;
xi = r * h_inv;
kernel_deval(xi, &wi, &wi_dx);
......@@ -274,7 +274,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
(vi[2] - vj[2]) * dx[2]);
if (dvdotdx < 0.) {
/* the magical factor 3 also appears in Gadget2 */
vmax -= 3. * dvdotdx / r;
vmax -= 3.f * dvdotdx / r;
}
pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax);
if (mode == 1) {
......@@ -282,13 +282,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
}
/* Compute kernel of pi. */
hi_inv = 1.0 / hi;
hi_inv = 1.f / hi;
hi_inv_dim = pow_dimension(hi_inv);
xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx);
/* Compute kernel of pj. */
hj_inv = 1.0 / hj;
hj_inv = 1.f / hj;
hj_inv_dim = pow_dimension(hj_inv);
xj = r * hj_inv;
kernel_deval(xj, &wj, &wj_dx);
......@@ -322,7 +322,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
float Xi = Vi;
float Xj = Vj;
#ifdef GIZMO_VOLUME_CORRECTION
if (fabsf(Vi - Vj) / min(Vi, Vj) > 1.5 * hydro_dimension) {
if (fabsf(Vi - Vj) / min(Vi, Vj) > 1.5f * hydro_dimension) {
Xi = (Vi * hj + Vj * hi) / (hi + hj);
Xj = Xi;
}
......@@ -344,7 +344,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
Anorm *= Anorm * r2;
}
if (Anorm == 0.) {
if (Anorm == 0.f) {
/* if the interface has no area, nothing happens and we return */
/* continuing results in dividing by zero and NaN's... */
return;
......
......@@ -34,9 +34,8 @@
__attribute__((always_inline)) INLINE static float
hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
float xij_norm, float r) {
float xij_norm, float r_inv) {
float delta1, delta2, phimin, phimax, phibar, phiplus, phiminus, phi_mid;
const float psi1 = 0.5f;
const float psi2 = 0.25f;
......@@ -44,22 +43,24 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
return 0.0f;
}
delta1 = psi1 * fabsf(phi_i - phi_j);
delta2 = psi2 * fabsf(phi_i - phi_j);
const float delta1 = psi1 * fabsf(phi_i - phi_j);
const float delta2 = psi2 * fabsf(phi_i - phi_j);
phimin = min(phi_i, phi_j);
phimax = max(phi_i, phi_j);
const float phimin = min(phi_i, phi_j);
const float phimax = max(phi_i, phi_j);
phibar = phi_i + xij_norm / r * (phi_j - phi_i);
const float phibar = phi_i + xij_norm * r_inv * (phi_j - phi_i);
float phiplus, phiminus, phi_mid;
/* if sign(phimax+delta1) == sign(phimax) */
if ((phimax + delta1) * phimax > 0.0f) {
phiplus = phimax + delta1;
} else {
if (phimax != 0.) {
if (phimax != 0.f) {
phiplus = phimax / (1.0f + delta1 / fabsf(phimax));
} else {
phiplus = 0.;
phiplus = 0.f;
}
}
......@@ -67,10 +68,10 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
if ((phimin - delta1) * phimin > 0.0f) {
phiminus = phimin - delta1;
} else {
if (phimin != 0.) {
if (phimin != 0.f) {
phiminus = phimin / (1.0f + delta1 / fabsf(phimin));
} else {
phiminus = 0.;
phiminus = 0.f;
}
}
......@@ -102,35 +103,35 @@ __attribute__((always_inline)) INLINE static void hydro_slope_limit_face(
float *Wi, float *Wj, float *dWi, float *dWj, float *xij_i, float *xij_j,
float r) {
float xij_i_norm, xij_j_norm;
xij_i_norm =
const float xij_i_norm =
sqrtf(xij_i[0] * xij_i[0] + xij_i[1] * xij_i[1] + xij_i[2] * xij_i[2]);
xij_j_norm =
const float xij_j_norm =
sqrtf(xij_j[0] * xij_j[0] + xij_j[1] * xij_j[1] + xij_j[2] * xij_j[2]);
const float r_inv = 1.f / r;
dWi[0] = hydro_slope_limit_face_quantity(Wi[0], Wj[0], Wi[0] + dWi[0],
xij_i_norm, r);
xij_i_norm, r_inv);
dWi[1] = hydro_slope_limit_face_quantity(Wi[1], Wj[1], Wi[1] + dWi[1],
xij_i_norm, r);
xij_i_norm, r_inv);
dWi[2] = hydro_slope_limit_face_quantity(Wi[2], Wj[2], Wi[2] + dWi[2],
xij_i_norm, r);
xij_i_norm, r_inv);
dWi[3] = hydro_slope_limit_face_quantity(Wi[3], Wj[3], Wi[3] + dWi[3],
xij_i_norm, r);
xij_i_norm, r_inv);
dWi[4] = hydro_slope_limit_face_quantity(Wi[4], Wj[4], Wi[4] + dWi[4],
xij_i_norm, r);
xij_i_norm, r_inv);
dWj[0] = hydro_slope_limit_face_quantity(Wj[0], Wi[0], Wj[0] + dWj[0],
xij_j_norm, r);
xij_j_norm, r_inv);
dWj[1] = hydro_slope_limit_face_quantity(Wj[1], Wi[1], Wj[1] + dWj[1],
xij_j_norm, r);
xij_j_norm, r_inv);
dWj[2] = hydro_slope_limit_face_quantity(Wj[2], Wi[2], Wj[2] + dWj[2],
xij_j_norm, r);
xij_j_norm, r_inv);
dWj[3] = hydro_slope_limit_face_quantity(Wj[3], Wi[3], Wj[3] + dWj[3],
xij_j_norm, r);
xij_j_norm, r_inv);
dWj[4] = hydro_slope_limit_face_quantity(Wj[4], Wi[4], Wj[4] + dWj[4],
xij_j_norm, r);
xij_j_norm, r_inv);
}
#endif /* SWIFT_GIZMO_SLOPE_LIMITER_FACE_H */
......@@ -41,9 +41,9 @@
#endif
#define gizmo_check_physical_quantity(name, quantity) \
if (quantity < 0.) { \
if (quantity < 0.f) { \
gizmo_unphysical_message(name, quantity); \
quantity = 0.; \
quantity = 0.f; \
}
#else // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE)
......
......@@ -30,9 +30,9 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_init(
struct part* restrict p, struct xpart* restrict xp) {
#ifdef GIZMO_FIX_PARTICLES
p->v[0] = 0.;
p->v[1] = 0.;
p->v[2] = 0.;
p->v[0] = 0.f;
p->v[1] = 0.f;
p->v[2] = 0.f;
#else
p->v[0] = p->primitives.v[0];
p->v[1] = p->primitives.v[1];
......@@ -87,13 +87,13 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set(
/* We first set the particle velocity. */
#ifdef GIZMO_FIX_PARTICLES
p->v[0] = 0.;
p->v[1] = 0.;
p->v[2] = 0.;
p->v[0] = 0.f;
p->v[1] = 0.f;
p->v[2] = 0.f;
#else // GIZMO_FIX_PARTICLES
if (p->conserved.mass > 0. && p->primitives.rho > 0.) {
if (p->conserved.mass > 0.f && p->primitives.rho > 0.f) {
/* Normal case: set particle velocity to fluid velocity. */
p->v[0] = p->conserved.momentum[0] / p->conserved.mass;
p->v[1] = p->conserved.momentum[1] / p->conserved.mass;
......@@ -112,18 +112,18 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set(
ds[2] = p->geometry.centroid[2];
const float d = sqrtf(ds[0] * ds[0] + ds[1] * ds[1] + ds[2] * ds[2]);
const float R = get_radius_dimension_sphere(p->geometry.volume);
const float eta = 0.25;
const float eta = 0.25f;
const float etaR = eta * R;
const float xi = 1.;
const float xi = 1.f;
const float soundspeed =
sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho);
/* We only apply the correction if the offset between centroid and position
is
too large. */
if (d > 0.9 * etaR) {
if (d > 0.9f * etaR) {
float fac = xi * soundspeed / d;
if (d < 1.1 * etaR) {
fac *= 5. * (d - 0.9 * etaR) / etaR;
if (d < 1.1f * etaR) {
fac *= 5.f * (d - 0.9f * etaR) / etaR;
}
p->v[0] -= ds[0] * fac;
p->v[1] -= ds[1] * fac;
......@@ -133,9 +133,9 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set(
#endif // GIZMO_STEER_MOTION
} else {
/* Vacuum particles have no fluid velocity. */
p->v[0] = 0.;
p->v[1] = 0.;
p->v[2] = 0.;
p->v[0] = 0.f;
p->v[1] = 0.f;
p->v[2] = 0.f;
}
#endif // GIZMO_FIX_PARTICLES
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment