Commit d6c10934 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'gizmo_speedup' into 'master'

Improvements to GIZMO implementation

See merge request !524
parents abdb92f4 d847056c
...@@ -65,7 +65,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ ...@@ -65,7 +65,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \ kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \
runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
dimension.h part_type.h periodic.h memswap.h dump.h logger.h \ dimension.h part_type.h periodic.h memswap.h dump.h logger.h sign.h \
gravity.h gravity_io.h gravity_cache.h \ gravity.h gravity_io.h gravity_cache.h \
gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \
...@@ -110,6 +110,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h ...@@ -110,6 +110,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h
hydro/Shadowswift/voronoi_cell.h \ hydro/Shadowswift/voronoi_cell.h \
riemann/riemann_hllc.h riemann/riemann_trrs.h \ riemann/riemann_hllc.h riemann/riemann_trrs.h \
riemann/riemann_exact.h riemann/riemann_vacuum.h \ riemann/riemann_exact.h riemann/riemann_vacuum.h \
riemann/riemann_checks.h \
stars.h stars_io.h \ stars.h stars_io.h \
stars/Default/star.h stars/Default/star_iact.h stars/Default/star_io.h \ stars/Default/star.h stars/Default/star_iact.h stars/Default/star_io.h \
stars/Default/star_debug.h stars/Default/star_part.h \ stars/Default/star_debug.h stars/Default/star_part.h \
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#define hydro_gamma 1.66666666666666667f #define hydro_gamma 1.66666666666666667f
#define hydro_gamma_minus_one 0.66666666666666667f #define hydro_gamma_minus_one 0.66666666666666667f
#define hydro_gamma_plus_one 2.66666666666666667f
#define hydro_one_over_gamma_minus_one 1.5f #define hydro_one_over_gamma_minus_one 1.5f
#define hydro_gamma_plus_one_over_two_gamma 0.8f #define hydro_gamma_plus_one_over_two_gamma 0.8f
#define hydro_gamma_minus_one_over_two_gamma 0.2f #define hydro_gamma_minus_one_over_two_gamma 0.2f
...@@ -56,6 +57,7 @@ ...@@ -56,6 +57,7 @@
#define hydro_gamma 1.4f #define hydro_gamma 1.4f
#define hydro_gamma_minus_one 0.4f #define hydro_gamma_minus_one 0.4f
#define hydro_gamma_plus_one 2.4f
#define hydro_one_over_gamma_minus_one 2.5f #define hydro_one_over_gamma_minus_one 2.5f
#define hydro_gamma_plus_one_over_two_gamma 0.857142857f #define hydro_gamma_plus_one_over_two_gamma 0.857142857f
#define hydro_gamma_minus_one_over_two_gamma 0.142857143f #define hydro_gamma_minus_one_over_two_gamma 0.142857143f
...@@ -70,6 +72,7 @@ ...@@ -70,6 +72,7 @@
#define hydro_gamma 1.33333333333333333f #define hydro_gamma 1.33333333333333333f
#define hydro_gamma_minus_one 0.33333333333333333f #define hydro_gamma_minus_one 0.33333333333333333f
#define hydro_gamma_plus_one 2.33333333333333333f
#define hydro_one_over_gamma_minus_one 3.f #define hydro_one_over_gamma_minus_one 3.f
#define hydro_gamma_plus_one_over_two_gamma 0.875f #define hydro_gamma_plus_one_over_two_gamma 0.875f
#define hydro_gamma_minus_one_over_two_gamma 0.125f #define hydro_gamma_minus_one_over_two_gamma 0.125f
...@@ -84,6 +87,7 @@ ...@@ -84,6 +87,7 @@
#define hydro_gamma 2.f #define hydro_gamma 2.f
#define hydro_gamma_minus_one 1.f #define hydro_gamma_minus_one 1.f
#define hydro_gamma_plus_one 3.f
#define hydro_one_over_gamma_minus_one 1.f #define hydro_one_over_gamma_minus_one 1.f
#define hydro_gamma_plus_one_over_two_gamma 0.75f #define hydro_gamma_plus_one_over_two_gamma 0.75f
#define hydro_gamma_minus_one_over_two_gamma 0.25f #define hydro_gamma_minus_one_over_two_gamma 0.25f
......
...@@ -2405,6 +2405,15 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { ...@@ -2405,6 +2405,15 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) {
drift_part(p, xp, dt_drift, dt_kick_hydro, dt_kick_grav, dt_therm, drift_part(p, xp, dt_drift, dt_kick_hydro, dt_kick_grav, dt_therm,
ti_old_part, ti_current); ti_old_part, ti_current);
#ifdef SWIFT_DEBUG_CHECKS
/* Make sure the particle does not drift by more than a box length. */
if (fabsf(xp->v_full[0] * dt_drift) > e->s->dim[0] ||
fabsf(xp->v_full[1] * dt_drift) > e->s->dim[1] ||
fabsf(xp->v_full[2] * dt_drift) > e->s->dim[2]) {
error("Particle drifts by more than a box length!");
}
#endif
/* Limit h to within the allowed range */ /* Limit h to within the allowed range */
p->h = min(p->h, hydro_h_max); p->h = min(p->h, hydro_h_max);
......
...@@ -68,7 +68,9 @@ ...@@ -68,7 +68,9 @@
#define GIZMO_UNPHYSICAL_RESCUE #define GIZMO_UNPHYSICAL_RESCUE
/* Show a warning message if an unphysical value was reset (only works if /* Show a warning message if an unphysical value was reset (only works if
GIZMO_UNPHYSICAL_RESCUE is also selected). */ GIZMO_UNPHYSICAL_RESCUE is also selected). */
#ifdef SWIFT_DEBUG_CHECKS
#define GIZMO_UNPHYSICAL_WARNING #define GIZMO_UNPHYSICAL_WARNING
#endif
/* Parameters that control how GIZMO handles pathological particle /* Parameters that control how GIZMO handles pathological particle
configurations. */ configurations. */
......
...@@ -362,8 +362,9 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( ...@@ -362,8 +362,9 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
#endif #endif
/* sanity checks */ /* sanity checks */
gizmo_check_physical_quantity("density", p->primitives.rho); gizmo_check_physical_quantities("density", "pressure", p->primitives.rho,
gizmo_check_physical_quantity("pressure", p->primitives.P); p->primitives.v[0], p->primitives.v[1],
p->primitives.v[2], p->primitives.P);
#ifdef GIZMO_LLOYD_ITERATION #ifdef GIZMO_LLOYD_ITERATION
/* overwrite primitive variables to make sure they still have safe values */ /* overwrite primitive variables to make sure they still have safe values */
...@@ -563,7 +564,6 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( ...@@ -563,7 +564,6 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
p->primitives.v[2] += p->primitives.v[2] +=
p->conserved.flux.momentum[2] * dt_drift / p->conserved.mass; p->conserved.flux.momentum[2] * dt_drift / p->conserved.mass;
// MATTHIEU: Bert is this correct?
#if !defined(EOS_ISOTHERMAL_GAS) #if !defined(EOS_ISOTHERMAL_GAS)
const float u = p->conserved.energy + p->conserved.flux.energy * dt_therm; const float u = p->conserved.energy + p->conserved.flux.energy * dt_therm;
p->primitives.P = p->primitives.P =
...@@ -576,6 +576,10 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( ...@@ -576,6 +576,10 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
error("Zero or negative smoothing length (%g)!", p->h); error("Zero or negative smoothing length (%g)!", p->h);
} }
#endif #endif
gizmo_check_physical_quantities("density", "pressure", p->primitives.rho,
p->primitives.v[0], p->primitives.v[1],
p->primitives.v[2], p->primitives.P);
} }
/** /**
...@@ -632,13 +636,14 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( ...@@ -632,13 +636,14 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
/* Apply the minimal energy limit */ /* Apply the minimal energy limit */
const float min_energy = const float min_energy =
hydro_props->minimal_internal_energy * cosmo->a_factor_internal_energy; hydro_props->minimal_internal_energy * cosmo->a_factor_internal_energy;
if (p->conserved.energy < min_energy) { if (p->conserved.energy < min_energy * p->conserved.mass) {
p->conserved.energy = min_energy; p->conserved.energy = min_energy * p->conserved.mass;
p->conserved.flux.energy = 0.f; p->conserved.flux.energy = 0.f;
} }
gizmo_check_physical_quantity("mass", p->conserved.mass); gizmo_check_physical_quantities(
gizmo_check_physical_quantity("energy", p->conserved.energy); "mass", "energy", p->conserved.mass, p->conserved.momentum[0],
p->conserved.momentum[1], p->conserved.momentum[2], p->conserved.energy);
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
/* Note that this check will only have effect if no GIZMO_UNPHYSICAL option /* Note that this check will only have effect if no GIZMO_UNPHYSICAL option
...@@ -675,6 +680,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( ...@@ -675,6 +680,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
p->conserved.momentum[0] += dt * p->conserved.mass * a_grav[0]; p->conserved.momentum[0] += dt * p->conserved.mass * a_grav[0];
p->conserved.momentum[1] += dt * p->conserved.mass * a_grav[1]; p->conserved.momentum[1] += dt * p->conserved.mass * a_grav[1];
p->conserved.momentum[2] += dt * p->conserved.mass * a_grav[2]; p->conserved.momentum[2] += dt * p->conserved.mass * a_grav[2];
p->conserved.energy += dt * (p->gravity.mflux[0] * a_grav[0] +
p->gravity.mflux[1] * a_grav[1] +
p->gravity.mflux[2] * a_grav[2]);
} }
hydro_velocities_set(p, xp); hydro_velocities_set(p, xp);
......
...@@ -93,21 +93,15 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_finalize( ...@@ -93,21 +93,15 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_finalize(
*/ */
__attribute__((always_inline)) INLINE static void hydro_gradients_predict( __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
struct part* restrict pi, struct part* restrict pj, float hi, float hj, struct part* restrict pi, struct part* restrict pj, float hi, float hj,
const float* dx, float r, float* xij_i, float* Wi, float* Wj) { const float* dx, float r, const float* xij_i, float* Wi, float* Wj) {
float dWi[5], dWj[5];
float xij_j[3];
int k;
float xfac;
/* perform gradient reconstruction in space and time */ /* perform gradient reconstruction in space and time */
/* space */
/* Compute interface position (relative to pj, since we don't need the actual /* Compute interface position (relative to pj, since we don't need the actual
* position) */ * position) eqn. (8) */
/* eqn. (8) */ const float xfac = hj / (hi + hj);
xfac = hj / (hi + hj); const float xij_j[3] = {xfac * dx[0], xfac * dx[1], xfac * dx[2]};
for (k = 0; k < 3; k++) xij_j[k] = xfac * dx[k];
float dWi[5];
dWi[0] = pi->primitives.gradients.rho[0] * xij_i[0] + dWi[0] = pi->primitives.gradients.rho[0] * xij_i[0] +
pi->primitives.gradients.rho[1] * xij_i[1] + pi->primitives.gradients.rho[1] * xij_i[1] +
pi->primitives.gradients.rho[2] * xij_i[2]; pi->primitives.gradients.rho[2] * xij_i[2];
...@@ -124,6 +118,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( ...@@ -124,6 +118,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
pi->primitives.gradients.P[1] * xij_i[1] + pi->primitives.gradients.P[1] * xij_i[1] +
pi->primitives.gradients.P[2] * xij_i[2]; pi->primitives.gradients.P[2] * xij_i[2];
float dWj[5];
dWj[0] = pj->primitives.gradients.rho[0] * xij_j[0] + dWj[0] = pj->primitives.gradients.rho[0] * xij_j[0] +
pj->primitives.gradients.rho[1] * xij_j[1] + pj->primitives.gradients.rho[1] * xij_j[1] +
pj->primitives.gradients.rho[2] * xij_j[2]; pj->primitives.gradients.rho[2] * xij_j[2];
...@@ -140,6 +135,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( ...@@ -140,6 +135,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
pj->primitives.gradients.P[1] * xij_j[1] + pj->primitives.gradients.P[1] * xij_j[1] +
pj->primitives.gradients.P[2] * xij_j[2]; pj->primitives.gradients.P[2] * xij_j[2];
/* Apply the slope limiter at this interface */
hydro_slope_limit_face(Wi, Wj, dWi, dWj, xij_i, xij_j, r); hydro_slope_limit_face(Wi, Wj, dWi, dWj, xij_i, xij_j, r);
Wi[0] += dWi[0]; Wi[0] += dWi[0];
...@@ -154,10 +150,10 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( ...@@ -154,10 +150,10 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
Wj[3] += dWj[3]; Wj[3] += dWj[3];
Wj[4] += dWj[4]; Wj[4] += dWj[4];
gizmo_check_physical_quantity("density", Wi[0]); gizmo_check_physical_quantities("density", "pressure", Wi[0], Wi[1], Wi[2],
gizmo_check_physical_quantity("pressure", Wi[4]); Wi[3], Wi[4]);
gizmo_check_physical_quantity("density", Wj[0]); gizmo_check_physical_quantities("density", "pressure", Wj[0], Wj[1], Wj[2],
gizmo_check_physical_quantity("pressure", Wj[4]); Wj[3], Wj[4]);
} }
#endif /* SWIFT_HYDRO_GIZMO_GRADIENTS_H */ #endif /* SWIFT_HYDRO_GIZMO_GRADIENTS_H */
...@@ -65,18 +65,17 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( ...@@ -65,18 +65,17 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
float r2, const float *dx, float hi, float hj, struct part *restrict pi, float r2, const float *dx, float hi, float hj, struct part *restrict pi,
struct part *restrict pj) { struct part *restrict pj) {
float r = sqrtf(r2); const float r_inv = 1.f / sqrtf(r2);
float xi, xj; const float r = r2 * r_inv;
float hi_inv, hj_inv;
float wi, wj, wi_dx, wj_dx; float wi, wj, wi_dx, wj_dx;
int k, l;
float Bi[3][3]; float Bi[3][3];
float Bj[3][3]; float Bj[3][3];
float Wi[5], Wj[5]; float Wi[5], Wj[5];
/* Initialize local variables */ /* Initialize local variables */
for (k = 0; k < 3; k++) { for (int k = 0; k < 3; k++) {
for (l = 0; l < 3; l++) { for (int l = 0; l < 3; l++) {
Bi[k][l] = pi->geometry.matrix_E[k][l]; Bi[k][l] = pi->geometry.matrix_E[k][l];
Bj[k][l] = pj->geometry.matrix_E[k][l]; Bj[k][l] = pj->geometry.matrix_E[k][l];
} }
...@@ -93,8 +92,8 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( ...@@ -93,8 +92,8 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
Wj[4] = pj->primitives.P; Wj[4] = pj->primitives.P;
/* Compute kernel of pi. */ /* Compute kernel of pi. */
hi_inv = 1.0 / hi; const float hi_inv = 1.f / hi;
xi = r * hi_inv; const float xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
if (pi->density.wcorr > const_gizmo_min_wcorr) { if (pi->density.wcorr > const_gizmo_min_wcorr) {
...@@ -153,46 +152,46 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( ...@@ -153,46 +152,46 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
/* The gradient matrix was not well-behaved, switch to SPH gradients */ /* The gradient matrix was not well-behaved, switch to SPH gradients */
pi->primitives.gradients.rho[0] -= pi->primitives.gradients.rho[0] -=
wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r; wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pi->primitives.gradients.rho[1] -= pi->primitives.gradients.rho[1] -=
wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r; wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pi->primitives.gradients.rho[2] -= pi->primitives.gradients.rho[2] -=
wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r; wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pi->primitives.gradients.v[0][0] -= pi->primitives.gradients.v[0][0] -=
wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pi->primitives.gradients.v[0][1] -= pi->primitives.gradients.v[0][1] -=
wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pi->primitives.gradients.v[0][2] -= pi->primitives.gradients.v[0][2] -=
wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pi->primitives.gradients.v[1][0] -= pi->primitives.gradients.v[1][0] -=
wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pi->primitives.gradients.v[1][1] -= pi->primitives.gradients.v[1][1] -=
wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pi->primitives.gradients.v[1][2] -= pi->primitives.gradients.v[1][2] -=
wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pi->primitives.gradients.v[2][0] -= pi->primitives.gradients.v[2][0] -=
wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) * r_inv;
pi->primitives.gradients.v[2][1] -= pi->primitives.gradients.v[2][1] -=
wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) * r_inv;
pi->primitives.gradients.v[2][2] -= pi->primitives.gradients.v[2][2] -=
wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) * r_inv;
pi->primitives.gradients.P[0] -= pi->primitives.gradients.P[0] -=
wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r; wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) * r_inv;
pi->primitives.gradients.P[1] -= pi->primitives.gradients.P[1] -=
wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r; wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) * r_inv;
pi->primitives.gradients.P[2] -= pi->primitives.gradients.P[2] -=
wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r; wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) * r_inv;
} }
hydro_slope_limit_cell_collect(pi, pj, r); hydro_slope_limit_cell_collect(pi, pj, r);
/* Compute kernel of pj. */ /* Compute kernel of pj. */
hj_inv = 1.0 / hj; const float hj_inv = 1.f / hj;
xj = r * hj_inv; const float xj = r * hj_inv;
kernel_deval(xj, &wj, &wj_dx); kernel_deval(xj, &wj, &wj_dx);
if (pj->density.wcorr > const_gizmo_min_wcorr) { if (pj->density.wcorr > const_gizmo_min_wcorr) {
...@@ -252,38 +251,38 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( ...@@ -252,38 +251,38 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
/* SPH gradients */ /* SPH gradients */
pj->primitives.gradients.rho[0] -= pj->primitives.gradients.rho[0] -=
wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r; wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pj->primitives.gradients.rho[1] -= pj->primitives.gradients.rho[1] -=
wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r; wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pj->primitives.gradients.rho[2] -= pj->primitives.gradients.rho[2] -=
wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r; wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pj->primitives.gradients.v[0][0] -= pj->primitives.gradients.v[0][0] -=
wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pj->primitives.gradients.v[0][1] -= pj->primitives.gradients.v[0][1] -=
wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pj->primitives.gradients.v[0][2] -= pj->primitives.gradients.v[0][2] -=
wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pj->primitives.gradients.v[1][0] -= pj->primitives.gradients.v[1][0] -=
wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pj->primitives.gradients.v[1][1] -= pj->primitives.gradients.v[1][1] -=
wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pj->primitives.gradients.v[1][2] -= pj->primitives.gradients.v[1][2] -=
wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pj->primitives.gradients.v[2][0] -= pj->primitives.gradients.v[2][0] -=
wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) * r_inv;
pj->primitives.gradients.v[2][1] -= pj->primitives.gradients.v[2][1] -=
wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) * r_inv;
pj->primitives.gradients.v[2][2] -= pj->primitives.gradients.v[2][2] -=
wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) * r_inv;
pj->primitives.gradients.P[0] -= pj->primitives.gradients.P[0] -=
wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r; wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) * r_inv;
pj->primitives.gradients.P[1] -= pj->primitives.gradients.P[1] -=
wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r; wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) * r_inv;
pj->primitives.gradients.P[2] -= pj->primitives.gradients.P[2] -=
wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r; wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) * r_inv;
} }
hydro_slope_limit_cell_collect(pj, pi, r); hydro_slope_limit_cell_collect(pj, pi, r);
...@@ -304,17 +303,15 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj, ...@@ -304,17 +303,15 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj,
struct part *restrict pi, struct part *restrict pi,
struct part *restrict pj) { struct part *restrict pj) {
float r = sqrtf(r2); const float r_inv = 1.f / sqrtf(r2);
float xi; const float r = r2 * r_inv;
float hi_inv;
float wi, wi_dx;
int k, l;
float Bi[3][3]; float Bi[3][3];
float Wi[5], Wj[5]; float Wi[5], Wj[5];
/* Initialize local variables */ /* Initialize local variables */
for (k = 0; k < 3; k++) { for (int k = 0; k < 3; k++) {
for (l = 0; l < 3; l++) { for (int l = 0; l < 3; l++) {
Bi[k][l] = pi->geometry.matrix_E[k][l]; Bi[k][l] = pi->geometry.matrix_E[k][l];
} }
} }
...@@ -330,8 +327,9 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj, ...@@ -330,8 +327,9 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj,
Wj[4] = pj->primitives.P; Wj[4] = pj->primitives.P;
/* Compute kernel of pi. */ /* Compute kernel of pi. */
hi_inv = 1.0 / hi; float wi, wi_dx;
xi = r * hi_inv; const float hi_inv = 1.f / hi;
const float xi = r * hi_inv;
kernel_deval(xi, &wi, &wi_dx); kernel_deval(xi, &wi, &wi_dx);
if (pi->density.wcorr > const_gizmo_min_wcorr) { if (pi->density.wcorr > const_gizmo_min_wcorr) {
...@@ -390,38 +388,38 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj, ...@@ -390,38 +388,38 @@ hydro_gradients_nonsym_collect(float r2, const float *dx, float hi, float hj,
/* Gradient matrix is not well-behaved, switch to SPH gradients */ /* Gradient matrix is not well-behaved, switch to SPH gradients */
pi->primitives.gradients.rho[0] -= pi->primitives.gradients.rho[0] -=
wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r; wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pi->primitives.gradients.rho[1] -= pi->primitives.gradients.rho[1] -=
wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r; wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pi->primitives.gradients.rho[2] -= pi->primitives.gradients.rho[2] -=
wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r; wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) * r_inv;
pi->primitives.gradients.v[0][0] -= pi->primitives.gradients.v[0][0] -=
wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pi->primitives.gradients.v[0][1] -= pi->primitives.gradients.v[0][1] -=
wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pi->primitives.gradients.v[0][2] -= pi->primitives.gradients.v[0][2] -=
wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) * r_inv;
pi->primitives.gradients.v[1][0] -= pi->primitives.gradients.v[1][0] -=
wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pi->primitives.gradients.v[1][1] -= pi->primitives.gradients.v[1][1] -=
wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) * r_inv;
pi