Commit c46d0c91 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Some speed-up improvements in the GIZMO face slope limiter.

parent 915ad3ed
...@@ -93,7 +93,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( ...@@ -93,7 +93,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
Wj[4] = pj->primitives.P; Wj[4] = pj->primitives.P;
/* Compute kernel of pi. */ /* Compute kernel of pi. */
hi_inv = 1.0 / hi; hi_inv = 1.f / hi;
xi = r * hi_inv; xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
...@@ -191,7 +191,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( ...@@ -191,7 +191,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
hydro_slope_limit_cell_collect(pi, pj, r); hydro_slope_limit_cell_collect(pi, pj, r);
/* Compute kernel of pj. */ /* Compute kernel of pj. */
hj_inv = 1.0 / hj; hj_inv = 1.f / hj;
xj = r * hj_inv; xj = r * hj_inv;
kernel_deval(xj, &wj, &wj_dx); kernel_deval(xj, &wj, &wj_dx);
...@@ -330,7 +330,7 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj, ...@@ -330,7 +330,7 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj,
Wj[4] = pj->primitives.P; Wj[4] = pj->primitives.P;
/* Compute kernel of pi. */ /* Compute kernel of pi. */
hi_inv = 1.0 / hi; hi_inv = 1.f / hi;
xi = r * hi_inv; xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
......
...@@ -58,7 +58,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -58,7 +58,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
int k, l; int k, l;
/* Compute density of pi. */ /* Compute density of pi. */
h_inv = 1.0 / hi; h_inv = 1.f / hi;
xi = r * h_inv; xi = r * h_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
...@@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pi->geometry.centroid[2] -= dx[2] * wi; pi->geometry.centroid[2] -= dx[2] * wi;
/* Compute density of pj. */ /* Compute density of pj. */
h_inv = 1.0 / hj; h_inv = 1.f / hj;
xj = r * h_inv; xj = r * h_inv;
kernel_deval(xj, &wj, &wj_dx); kernel_deval(xj, &wj, &wj_dx);
...@@ -126,7 +126,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -126,7 +126,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
/* Get r and r inverse. */ /* Get r and r inverse. */
r = sqrtf(r2); r = sqrtf(r2);
h_inv = 1.0 / hi; h_inv = 1.f / hi;
xi = r * h_inv; xi = r * h_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
...@@ -274,7 +274,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( ...@@ -274,7 +274,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
(vi[2] - vj[2]) * dx[2]); (vi[2] - vj[2]) * dx[2]);
if (dvdotdx < 0.) { if (dvdotdx < 0.) {
/* the magical factor 3 also appears in Gadget2 */ /* the magical factor 3 also appears in Gadget2 */
vmax -= 3. * dvdotdx / r; vmax -= 3.f * dvdotdx / r;
} }
pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax); pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax);
if (mode == 1) { if (mode == 1) {
...@@ -282,13 +282,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( ...@@ -282,13 +282,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
} }
/* Compute kernel of pi. */ /* Compute kernel of pi. */
hi_inv = 1.0 / hi; hi_inv = 1.f / hi;
hi_inv_dim = pow_dimension(hi_inv); hi_inv_dim = pow_dimension(hi_inv);
xi = r * hi_inv; xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
/* Compute kernel of pj. */ /* Compute kernel of pj. */
hj_inv = 1.0 / hj; hj_inv = 1.f / hj;
hj_inv_dim = pow_dimension(hj_inv); hj_inv_dim = pow_dimension(hj_inv);
xj = r * hj_inv; xj = r * hj_inv;
kernel_deval(xj, &wj, &wj_dx); kernel_deval(xj, &wj, &wj_dx);
...@@ -322,7 +322,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( ...@@ -322,7 +322,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
float Xi = Vi; float Xi = Vi;
float Xj = Vj; float Xj = Vj;
#ifdef GIZMO_VOLUME_CORRECTION #ifdef GIZMO_VOLUME_CORRECTION
if (fabsf(Vi - Vj) / min(Vi, Vj) > 1.5 * hydro_dimension) { if (fabsf(Vi - Vj) / min(Vi, Vj) > 1.5f * hydro_dimension) {
Xi = (Vi * hj + Vj * hi) / (hi + hj); Xi = (Vi * hj + Vj * hi) / (hi + hj);
Xj = Xi; Xj = Xi;
} }
...@@ -344,7 +344,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( ...@@ -344,7 +344,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
Anorm *= Anorm * r2; Anorm *= Anorm * r2;
} }
if (Anorm == 0.) { if (Anorm == 0.f) {
/* if the interface has no area, nothing happens and we return */ /* if the interface has no area, nothing happens and we return */
/* continuing results in dividing by zero and NaN's... */ /* continuing results in dividing by zero and NaN's... */
return; return;
......
...@@ -34,9 +34,8 @@ ...@@ -34,9 +34,8 @@
__attribute__((always_inline)) INLINE static float __attribute__((always_inline)) INLINE static float
hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0, hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
float xij_norm, float r) { float xij_norm, float r_inv) {
float delta1, delta2, phimin, phimax, phibar, phiplus, phiminus, phi_mid;
const float psi1 = 0.5f; const float psi1 = 0.5f;
const float psi2 = 0.25f; const float psi2 = 0.25f;
...@@ -44,22 +43,24 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0, ...@@ -44,22 +43,24 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
return 0.0f; return 0.0f;
} }
delta1 = psi1 * fabsf(phi_i - phi_j); const float delta1 = psi1 * fabsf(phi_i - phi_j);
delta2 = psi2 * fabsf(phi_i - phi_j); const float delta2 = psi2 * fabsf(phi_i - phi_j);
phimin = min(phi_i, phi_j); const float phimin = min(phi_i, phi_j);
phimax = max(phi_i, phi_j); const float phimax = max(phi_i, phi_j);
phibar = phi_i + xij_norm / r * (phi_j - phi_i); const float phibar = phi_i + xij_norm * r_inv * (phi_j - phi_i);
float phiplus, phiminus, phi_mid;
/* if sign(phimax+delta1) == sign(phimax) */ /* if sign(phimax+delta1) == sign(phimax) */
if ((phimax + delta1) * phimax > 0.0f) { if ((phimax + delta1) * phimax > 0.0f) {
phiplus = phimax + delta1; phiplus = phimax + delta1;
} else { } else {
if (phimax != 0.) { if (phimax != 0.f) {
phiplus = phimax / (1.0f + delta1 / fabsf(phimax)); phiplus = phimax / (1.0f + delta1 / fabsf(phimax));
} else { } else {
phiplus = 0.; phiplus = 0.f;
} }
} }
...@@ -67,10 +68,10 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0, ...@@ -67,10 +68,10 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
if ((phimin - delta1) * phimin > 0.0f) { if ((phimin - delta1) * phimin > 0.0f) {
phiminus = phimin - delta1; phiminus = phimin - delta1;
} else { } else {
if (phimin != 0.) { if (phimin != 0.f) {
phiminus = phimin / (1.0f + delta1 / fabsf(phimin)); phiminus = phimin / (1.0f + delta1 / fabsf(phimin));
} else { } else {
phiminus = 0.; phiminus = 0.f;
} }
} }
...@@ -102,35 +103,35 @@ __attribute__((always_inline)) INLINE static void hydro_slope_limit_face( ...@@ -102,35 +103,35 @@ __attribute__((always_inline)) INLINE static void hydro_slope_limit_face(
float *Wi, float *Wj, float *dWi, float *dWj, float *xij_i, float *xij_j, float *Wi, float *Wj, float *dWi, float *dWj, float *xij_i, float *xij_j,
float r) { float r) {
float xij_i_norm, xij_j_norm; const float xij_i_norm =
xij_i_norm =
sqrtf(xij_i[0] * xij_i[0] + xij_i[1] * xij_i[1] + xij_i[2] * xij_i[2]); sqrtf(xij_i[0] * xij_i[0] + xij_i[1] * xij_i[1] + xij_i[2] * xij_i[2]);
xij_j_norm = const float xij_j_norm =
sqrtf(xij_j[0] * xij_j[0] + xij_j[1] * xij_j[1] + xij_j[2] * xij_j[2]); sqrtf(xij_j[0] * xij_j[0] + xij_j[1] * xij_j[1] + xij_j[2] * xij_j[2]);
const float r_inv = 1.f / r;
dWi[0] = hydro_slope_limit_face_quantity(Wi[0], Wj[0], Wi[0] + dWi[0], dWi[0] = hydro_slope_limit_face_quantity(Wi[0], Wj[0], Wi[0] + dWi[0],
xij_i_norm, r); xij_i_norm, r_inv);
dWi[1] = hydro_slope_limit_face_quantity(Wi[1], Wj[1], Wi[1] + dWi[1], dWi[1] = hydro_slope_limit_face_quantity(Wi[1], Wj[1], Wi[1] + dWi[1],
xij_i_norm, r); xij_i_norm, r_inv);
dWi[2] = hydro_slope_limit_face_quantity(Wi[2], Wj[2], Wi[2] + dWi[2], dWi[2] = hydro_slope_limit_face_quantity(Wi[2], Wj[2], Wi[2] + dWi[2],
xij_i_norm, r); xij_i_norm, r_inv);
dWi[3] = hydro_slope_limit_face_quantity(Wi[3], Wj[3], Wi[3] + dWi[3], dWi[3] = hydro_slope_limit_face_quantity(Wi[3], Wj[3], Wi[3] + dWi[3],
xij_i_norm, r); xij_i_norm, r_inv);
dWi[4] = hydro_slope_limit_face_quantity(Wi[4], Wj[4], Wi[4] + dWi[4], dWi[4] = hydro_slope_limit_face_quantity(Wi[4], Wj[4], Wi[4] + dWi[4],
xij_i_norm, r); xij_i_norm, r_inv);
dWj[0] = hydro_slope_limit_face_quantity(Wj[0], Wi[0], Wj[0] + dWj[0], dWj[0] = hydro_slope_limit_face_quantity(Wj[0], Wi[0], Wj[0] + dWj[0],
xij_j_norm, r); xij_j_norm, r_inv);
dWj[1] = hydro_slope_limit_face_quantity(Wj[1], Wi[1], Wj[1] + dWj[1], dWj[1] = hydro_slope_limit_face_quantity(Wj[1], Wi[1], Wj[1] + dWj[1],
xij_j_norm, r); xij_j_norm, r_inv);
dWj[2] = hydro_slope_limit_face_quantity(Wj[2], Wi[2], Wj[2] + dWj[2], dWj[2] = hydro_slope_limit_face_quantity(Wj[2], Wi[2], Wj[2] + dWj[2],
xij_j_norm, r); xij_j_norm, r_inv);
dWj[3] = hydro_slope_limit_face_quantity(Wj[3], Wi[3], Wj[3] + dWj[3], dWj[3] = hydro_slope_limit_face_quantity(Wj[3], Wi[3], Wj[3] + dWj[3],
xij_j_norm, r); xij_j_norm, r_inv);
dWj[4] = hydro_slope_limit_face_quantity(Wj[4], Wi[4], Wj[4] + dWj[4], dWj[4] = hydro_slope_limit_face_quantity(Wj[4], Wi[4], Wj[4] + dWj[4],
xij_j_norm, r); xij_j_norm, r_inv);
} }
#endif /* SWIFT_GIZMO_SLOPE_LIMITER_FACE_H */ #endif /* SWIFT_GIZMO_SLOPE_LIMITER_FACE_H */
...@@ -41,9 +41,9 @@ ...@@ -41,9 +41,9 @@
#endif #endif
#define gizmo_check_physical_quantity(name, quantity) \ #define gizmo_check_physical_quantity(name, quantity) \
if (quantity < 0.) { \ if (quantity < 0.f) { \
gizmo_unphysical_message(name, quantity); \ gizmo_unphysical_message(name, quantity); \
quantity = 0.; \ quantity = 0.f; \
} }
#else // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE) #else // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE)
......
...@@ -30,9 +30,9 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_init( ...@@ -30,9 +30,9 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_init(
struct part* restrict p, struct xpart* restrict xp) { struct part* restrict p, struct xpart* restrict xp) {
#ifdef GIZMO_FIX_PARTICLES #ifdef GIZMO_FIX_PARTICLES
p->v[0] = 0.; p->v[0] = 0.f;
p->v[1] = 0.; p->v[1] = 0.f;
p->v[2] = 0.; p->v[2] = 0.f;
#else #else
p->v[0] = p->primitives.v[0]; p->v[0] = p->primitives.v[0];
p->v[1] = p->primitives.v[1]; p->v[1] = p->primitives.v[1];
...@@ -87,13 +87,13 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set( ...@@ -87,13 +87,13 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set(
/* We first set the particle velocity. */ /* We first set the particle velocity. */
#ifdef GIZMO_FIX_PARTICLES #ifdef GIZMO_FIX_PARTICLES
p->v[0] = 0.; p->v[0] = 0.f;
p->v[1] = 0.; p->v[1] = 0.f;
p->v[2] = 0.; p->v[2] = 0.f;
#else // GIZMO_FIX_PARTICLES #else // GIZMO_FIX_PARTICLES
if (p->conserved.mass > 0. && p->primitives.rho > 0.) { if (p->conserved.mass > 0.f && p->primitives.rho > 0.f) {
/* Normal case: set particle velocity to fluid velocity. */ /* Normal case: set particle velocity to fluid velocity. */
p->v[0] = p->conserved.momentum[0] / p->conserved.mass; p->v[0] = p->conserved.momentum[0] / p->conserved.mass;
p->v[1] = p->conserved.momentum[1] / p->conserved.mass; p->v[1] = p->conserved.momentum[1] / p->conserved.mass;
...@@ -112,18 +112,18 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set( ...@@ -112,18 +112,18 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set(
ds[2] = p->geometry.centroid[2]; ds[2] = p->geometry.centroid[2];
const float d = sqrtf(ds[0] * ds[0] + ds[1] * ds[1] + ds[2] * ds[2]); const float d = sqrtf(ds[0] * ds[0] + ds[1] * ds[1] + ds[2] * ds[2]);
const float R = get_radius_dimension_sphere(p->geometry.volume); const float R = get_radius_dimension_sphere(p->geometry.volume);
const float eta = 0.25; const float eta = 0.25f;
const float etaR = eta * R; const float etaR = eta * R;
const float xi = 1.; const float xi = 1.f;
const float soundspeed = const float soundspeed =
sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho); sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho);
/* We only apply the correction if the offset between centroid and position /* We only apply the correction if the offset between centroid and position
is is
too large. */ too large. */
if (d > 0.9 * etaR) { if (d > 0.9f * etaR) {
float fac = xi * soundspeed / d; float fac = xi * soundspeed / d;
if (d < 1.1 * etaR) { if (d < 1.1f * etaR) {
fac *= 5. * (d - 0.9 * etaR) / etaR; fac *= 5.f * (d - 0.9f * etaR) / etaR;
} }
p->v[0] -= ds[0] * fac; p->v[0] -= ds[0] * fac;
p->v[1] -= ds[1] * fac; p->v[1] -= ds[1] * fac;
...@@ -133,9 +133,9 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set( ...@@ -133,9 +133,9 @@ __attribute__((always_inline)) INLINE static void hydro_velocities_set(
#endif // GIZMO_STEER_MOTION #endif // GIZMO_STEER_MOTION
} else { } else {
/* Vacuum particles have no fluid velocity. */ /* Vacuum particles have no fluid velocity. */
p->v[0] = 0.; p->v[0] = 0.f;
p->v[1] = 0.; p->v[1] = 0.f;
p->v[2] = 0.; p->v[2] = 0.f;
} }
#endif // GIZMO_FIX_PARTICLES #endif // GIZMO_FIX_PARTICLES
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment