Commit 36b22593 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Formatting

parent 8bcd5f6e
......@@ -33,7 +33,7 @@
* @param x The number to take the exponential of.
*/
__attribute__((always_inline)) INLINE static float approx_expf(float x) {
return 1.f + x * (1.f + x * (0.5f + x * (((float)(1.0 / 6.0)) + ((float)(1.0 / 24.0)) * x)));
return 1.f + x * (1.f + x * (0.5f + x * (1.f / 6.f + 1.f / 24.f * x)));
}
#endif /* SWIFT_APPROX_MATH_H */
......@@ -145,7 +145,8 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
/* Compute this particle's sound speed. */
const float u = p->u;
const float fc = p->force.soundspeed = sqrtf(hydro_gamma * hydro_gamma_minus_one * u);
const float fc = p->force.soundspeed =
sqrtf(hydro_gamma * hydro_gamma_minus_one * u);
/* Compute the P/Omega/rho2. */
xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho;
......
......@@ -479,13 +479,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v =
vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2,
pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v =
vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2,
pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
......@@ -495,12 +495,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
pi[6]->u, pi[7]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u,
pj[6]->u, pj[7]->u);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig,
pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig,
pi[6]->force.v_sig, pi[7]->force.v_sig);
......@@ -538,10 +540,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig,
pi[3]->force.v_sig);
vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig,
......@@ -780,13 +782,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v =
vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2,
pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v =
vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2,
pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
......@@ -796,12 +798,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
pi[6]->u, pi[7]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u,
pj[6]->u, pj[7]->u);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig,
pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig,
pi[6]->force.v_sig, pi[7]->force.v_sig);
......@@ -838,10 +842,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig,
pi[3]->force.v_sig);
vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig,
......@@ -936,7 +940,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->force.u_dt += piu_dt.f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]);
pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
}
......
......@@ -501,30 +501,32 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
/* Load stuff. */
#if VEC_SIZE == 8
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho.v =
vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2,
pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho.v =
vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2,
pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
......@@ -550,10 +552,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
......@@ -595,7 +597,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
//dvdr.v = dvdr.v * ri.v;
// dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
......@@ -607,8 +609,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
/* Now construct the full viscosity term */
rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v *
balsara.v / rho_ij.v;
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
mu_ij.v * balsara.v / rho_ij.v;
/* Now, convolve with the kernel */
visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
......@@ -647,7 +649,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
#else
error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.")
error(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used.")
#endif
}
......@@ -744,7 +748,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
#ifdef WITH_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
......@@ -765,28 +769,30 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
/* Load stuff. */
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho.v =
vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2,
pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho.v =
vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2,
pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
......@@ -811,10 +817,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
ci.v =
vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v =
vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
......@@ -856,7 +862,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
//dvdr.v = dvdr.v * ri.v;
// dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
......@@ -868,8 +874,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
/* Now construct the full viscosity term */
rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v *
balsara.v / rho_ij.v;
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
mu_ij.v * balsara.v / rho_ij.v;
/* Now, convolve with the kernel */
visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
......@@ -894,13 +900,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
for (k = 0; k < VEC_SIZE; k++) {
for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]);
pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
pi[k]->entropy_dt += entropy_dt.f[k];
}
#else
error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.")
error(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used.")
#endif
}
......
......@@ -24,9 +24,12 @@
#include <unistd.h>
#include "swift.h"
/* Typdef function pointers for serial and vectorised versions of the interaction functions. */
typedef void (*serial_interaction)(float, float *, float, float, struct part *, struct part *);
typedef void (*vec_interaction)(float *, float *, float *, float *, struct part **, struct part **);
/* Typdef function pointers for serial and vectorised versions of the
* interaction functions. */
typedef void (*serial_interaction)(float, float *, float, float, struct part *,
struct part *);
typedef void (*vec_interaction)(float *, float *, float *, float *,
struct part **, struct part **);
/**
* @brief Constructs an array of particles in a valid state prior to
......@@ -90,21 +93,20 @@ void dump_indv_particle_fields(char *fileName, struct part *p) {
FILE *file = fopen(fileName, "a");
fprintf(
file,
"%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e %13e %13e "
fprintf(file,
"%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e "
"%13e %13e %13e %13e "
"%13e %13e %13e %10f\n",
p->id, p->x[0], p->x[1],
p->x[2], p->v[0], p->v[1],
p->v[2], p->a_hydro[0], p->a_hydro[1],
p->a_hydro[2], p->rho, p->rho_dh,
p->density.wcount, p->density.wcount_dh, p->force.h_dt, p->force.v_sig,
p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2],
p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho, p->rho_dh,
p->density.wcount, p->density.wcount_dh, p->force.h_dt,
p->force.v_sig,
#if defined(GADGET2_SPH)
p->density.div_v, p->density.rot_v[0],
p->density.rot_v[1], p->density.rot_v[2], p->entropy_dt
p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
p->density.rot_v[2], p->entropy_dt
#elif defined(DEFAULT_SPH)
p->density.div_v, p->density.rot_v[0],
p->density.rot_v[1], p->density.rot_v[2], 0.
p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
p->density.rot_v[2], 0.
#else
0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
#endif
......@@ -120,11 +122,13 @@ void write_header(char *fileName) {
FILE *file = fopen(fileName, "w");
/* Write header */
fprintf(file,
"# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s %13s %13s"
"# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s "
"%13s %13s %13s %13s"
"%13s %13s %13s %13s\n",
"ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", "a_z", "rho", "rho_dh",
"wcount", "wcount_dh", "dh/dt", "v_sig", "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
fprintf(file,"\nPARTICLES BEFORE INTERACTION:\n");
"ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y",
"a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig",
"div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n");
fclose(file);
}
......@@ -136,7 +140,9 @@ void write_header(char *fileName) {
* @param count No. of particles to be interacted
*
*/
void test_interactions(struct part *parts, int count, serial_interaction serial_inter_func, vec_interaction vec_inter_func, char *filePrefix) {
void test_interactions(struct part *parts, int count,
serial_interaction serial_inter_func,
vec_interaction vec_inter_func, char *filePrefix) {
/* Use the first particle in the array as the one that gets updated. */
struct part pi = parts[0];
......@@ -145,8 +151,8 @@ void test_interactions(struct part *parts, int count, serial_interaction serial_
char serial_filename[200] = "";
char vec_filename[200] = "";
strcpy(serial_filename,filePrefix);
strcpy(vec_filename,filePrefix);
strcpy(serial_filename, filePrefix);
strcpy(vec_filename, filePrefix);
sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
sprintf(vec_filename + strlen(vec_filename), "_vec.dat");
......@@ -267,8 +273,10 @@ int main(int argc, char *argv[]) {
/* Build the infrastructure */
static long long partId = 0;
struct part *density_particles = make_particles(count,offset,spacing,h,&partId);
struct part *force_particles = make_particles(count,offset,spacing,h,&partId);
struct part *density_particles =
make_particles(count, offset, spacing, h, &partId);
struct part *force_particles =
make_particles(count, offset, spacing, h, &partId);
prepare_force(force_particles);
/* Define which interactions to call */
......@@ -276,25 +284,28 @@ int main(int argc, char *argv[]) {
vec_interaction vec_inter_func = &runner_iact_nonsym_vec_density;
/* Call the non-sym density test. */
test_interactions(density_particles,count,serial_inter_func,vec_inter_func,"test_nonsym_density");
test_interactions(density_particles, count, serial_inter_func, vec_inter_func,
"test_nonsym_density");
density_particles = make_particles(count,offset,spacing,h,&partId);
density_particles = make_particles(count, offset, spacing, h, &partId);
/* Re-assign function pointers. */
serial_inter_func = &runner_iact_density;
vec_inter_func = &runner_iact_vec_density;
/* Call the symmetrical density test. */
test_interactions(density_particles,count,serial_inter_func,vec_inter_func,"test_sym_density");
test_interactions(density_particles, count, serial_inter_func, vec_inter_func,
"test_sym_density");
/* Re-assign function pointers. */
serial_inter_func = &runner_iact_nonsym_force;
vec_inter_func = &runner_iact_nonsym_vec_force;
/* Call the test non-sym force test. */
test_interactions(force_particles,count,serial_inter_func,vec_inter_func,"test_nonsym_force");
test_interactions(force_particles, count, serial_inter_func, vec_inter_func,
"test_nonsym_force");
force_particles = make_particles(count,offset,spacing,h,&partId);
force_particles = make_particles(count, offset, spacing, h, &partId);
prepare_force(force_particles);
/* Re-assign function pointers. */
......@@ -302,7 +313,8 @@ int main(int argc, char *argv[]) {
vec_inter_func = &runner_iact_vec_force;
/* Call the test symmetrical force test. */
test_interactions(force_particles,count,serial_inter_func,vec_inter_func,"test_sym_force");
test_interactions(force_particles, count, serial_inter_func, vec_inter_func,
"test_sym_force");
return 0;
}
......@@ -22,8 +22,8 @@
#include "approx_math.h"
#include "vector.h"
#include <stdio.h>
#include <math.h>
#include <stdio.h>
int main() {
......@@ -31,15 +31,16 @@ int main() {
for (int i = 0; i < numPoints; ++i) {
const float x = 0.6f * (i / (float) numPoints) - 0.3f;
const float x = 0.6f * (i / (float)numPoints) - 0.3f;
const float exp_correct = expf(x);
const float exp_approx = approx_expf(x);
const float abs = fabs(exp_correct - exp_approx);
const float rel = 0.5f * fabs(exp_correct - exp_approx) / fabs(exp_correct + exp_approx);
const float rel =
0.5f * fabs(exp_correct - exp_approx) / fabs(exp_correct + exp_approx);
printf("%2d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i, x, exp_correct,
exp_approx, abs, rel);
printf("%2d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i, x,
exp_correct, exp_approx, abs, rel);
if (abs > 3e-6 && fabsf(x) <= 0.2) {
printf("Absolute difference too large !\n");
......@@ -58,7 +59,6 @@ int main() {
printf("Relative difference too large !\n");
return 1;
}
}
printf("\nAll values are consistent\n");
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment