diff --git a/src/approx_math.h b/src/approx_math.h index 27f05af4ae04a2764a0e43f6ca2bc40e6ca2721d..ad07adeb4f3b1b54ca5f33d80eabb6a004d2a3aa 100644 --- a/src/approx_math.h +++ b/src/approx_math.h @@ -33,7 +33,7 @@ * @param x The number to take the exponential of. */ __attribute__((always_inline)) INLINE static float approx_expf(float x) { - return 1.f + x * (1.f + x * (0.5f + x * (((float)(1.0 / 6.0)) + ((float)(1.0 / 24.0)) * x))); + return 1.f + x * (1.f + x * (0.5f + x * (1.f / 6.f + 1.f / 24.f * x))); } #endif /* SWIFT_APPROX_MATH_H */ diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index bfe48e4a36d5744053703d437d666151173b923d..2b90cf2f90a3cd089e99fd07cfb1d9a1690dbc92 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -145,7 +145,8 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute this particle's sound speed. */ const float u = p->u; - const float fc = p->force.soundspeed = sqrtf(hydro_gamma * hydro_gamma_minus_one * u); + const float fc = p->force.soundspeed = + sqrtf(hydro_gamma * hydro_gamma_minus_one * u); /* Compute the P/Omega/rho2. */ xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index d8b1fdda330396dc50d47467d39c264fbe1e58fe..5db3604e1717ba2c14d50c531c0537409501226d 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -479,14 +479,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = - vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, - pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = - vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, - pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, + pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, + pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, @@ -495,12 +495,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pi[6]->u, pi[7]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pj[6]->u, pj[7]->u); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, + pi[6]->force.soundspeed, pi[7]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, + pj[6]->force.soundspeed, pj[7]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[6]->force.v_sig, pi[7]->force.v_sig); @@ -538,10 +540,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig); vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, @@ -780,14 +782,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( #if VEC_SIZE == 8 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = - vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, - pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = - vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, - pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, + pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, + pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, @@ -796,12 +798,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pi[6]->u, pi[7]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pj[6]->u, pj[7]->u); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, + pi[6]->force.soundspeed, pi[7]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, + pj[6]->force.soundspeed, pj[7]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[6]->force.v_sig, pi[7]->force.v_sig); @@ -838,10 +842,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig); vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, @@ -936,7 +940,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]); + pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; } diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 4b340c5ec93f1dd340a49591a9d9e0ab358a2df2..323f066db6a047db7ae3401a3525afa941bfc047 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -501,30 +501,32 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ - /* Load stuff. */ +/* Load stuff. */ #if VEC_SIZE == 8 mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho.v = - vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, - pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho.v = - vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, - pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, + pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, + pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, + pi[6]->force.soundspeed, pi[7]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, + pj[6]->force.soundspeed, pj[7]->force.soundspeed); for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); @@ -545,15 +547,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed); for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); @@ -595,20 +597,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Compute dv dot r. */ dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + ((vi[2].v - vj[2].v) * dx[2].v); - //dvdr.v = dvdr.v * ri.v; + // dvdr.v = dvdr.v * ri.v; /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ - + /* Compute signal velocity */ v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; - + /* Now construct the full viscosity term */ rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); - visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v * - balsara.v / rho_ij.v; + visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * + mu_ij.v * balsara.v / rho_ij.v; /* Now, convolve with the kernel */ visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; @@ -616,7 +618,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Eventually get the acceleration */ acc.v = visc_term.v + sph_term.v; - + /* Use the force, Luke! */ for (k = 0; k < 3; k++) { f.v = dx[k].v * acc.v; @@ -630,7 +632,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Change in entropy */ entropy_dt.v = vec_set1(0.5f) * visc_term.v * dvdr.v; - + /* Store the forces back on the particles. */ for (k = 0; k < VEC_SIZE; k++) { for (j = 0; j < 3; j++) { @@ -645,9 +647,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pj[k]->entropy_dt -= entropy_dt.f[k] * mi.f[k]; } -#else +#else - error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.") + error( + "The Gadget2 serial version of runner_iact_nonsym_force was called when " + "the vectorised version should have been used.") #endif } @@ -744,7 +748,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { - #ifdef WITH_VECTORIZATION +#ifdef WITH_VECTORIZATION vector r, r2, ri; vector xi, xj; @@ -765,28 +769,30 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ - /* Load stuff. */ +/* Load stuff. */ #if VEC_SIZE == 8 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho.v = - vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, - pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho.v = - vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, - pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, + pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, + pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, + pi[6]->force.soundspeed, pi[7]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, + pj[6]->force.soundspeed, pj[7]->force.soundspeed); for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); @@ -806,15 +812,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( #elif VEC_SIZE == 4 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = - vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = - vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); + ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, + pi[2]->force.soundspeed, pi[3]->force.soundspeed); + cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, + pj[2]->force.soundspeed, pj[3]->force.soundspeed); for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); @@ -856,20 +862,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Compute dv dot r. */ dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + ((vi[2].v - vj[2].v) * dx[2].v); - //dvdr.v = dvdr.v * ri.v; + // dvdr.v = dvdr.v * ri.v; /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ - + /* Compute signal velocity */ v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; - + /* Now construct the full viscosity term */ rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); - visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v * - balsara.v / rho_ij.v; + visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * + mu_ij.v * balsara.v / rho_ij.v; /* Now, convolve with the kernel */ visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; @@ -877,7 +883,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Eventually get the acceleration */ acc.v = visc_term.v + sph_term.v; - + /* Use the force, Luke! */ for (k = 0; k < 3; k++) { f.v = dx[k].v * acc.v; @@ -889,18 +895,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Change in entropy */ entropy_dt.v = vec_set1(0.5f) * mj.v * visc_term.v * dvdr.v; - + /* Store the forces back on the particles. */ for (k = 0; k < VEC_SIZE; k++) { for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]); + pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); pi[k]->entropy_dt += entropy_dt.f[k]; } -#else +#else - error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.") + error( + "The Gadget2 serial version of runner_iact_nonsym_force was called when " + "the vectorised version should have been used.") #endif } diff --git a/tests/testInteractions.c b/tests/testInteractions.c index 27fd511accf1440993a2af29bd20f6695afd2e92..67ac0a24d7d49e0fd1d4d58abc002d1e19d2a1e3 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -24,9 +24,12 @@ #include <unistd.h> #include "swift.h" -/* Typdef function pointers for serial and vectorised versions of the interaction functions. */ -typedef void (*serial_interaction)(float, float *, float, float, struct part *, struct part *); -typedef void (*vec_interaction)(float *, float *, float *, float *, struct part **, struct part **); +/* Typdef function pointers for serial and vectorised versions of the + * interaction functions. */ +typedef void (*serial_interaction)(float, float *, float, float, struct part *, + struct part *); +typedef void (*vec_interaction)(float *, float *, float *, float *, + struct part **, struct part **); /** * @brief Constructs an array of particles in a valid state prior to @@ -73,7 +76,7 @@ struct part *make_particles(int count, double *offset, double spacing, double h, * @brief Populates particle properties needed for the force calculation. */ void prepare_force(struct part *parts) { - + struct part *p; for (size_t i = 0; i < VEC_SIZE + 1; ++i) { p = &parts[i]; @@ -89,24 +92,23 @@ void prepare_force(struct part *parts) { void dump_indv_particle_fields(char *fileName, struct part *p) { FILE *file = fopen(fileName, "a"); - - fprintf( - file, - "%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e %13e %13e " - "%13e %13e %13e %10f\n", - p->id, p->x[0], p->x[1], - p->x[2], p->v[0], p->v[1], - p->v[2], p->a_hydro[0], p->a_hydro[1], - p->a_hydro[2], p->rho, p->rho_dh, - p->density.wcount, p->density.wcount_dh, p->force.h_dt, p->force.v_sig, + + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e " + "%13e %13e %13e %13e " + "%13e %13e %13e %10f\n", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], + p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho, p->rho_dh, + p->density.wcount, p->density.wcount_dh, p->force.h_dt, + p->force.v_sig, #if defined(GADGET2_SPH) - p->density.div_v, p->density.rot_v[0], - p->density.rot_v[1], p->density.rot_v[2], p->entropy_dt + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2], p->entropy_dt #elif defined(DEFAULT_SPH) - p->density.div_v, p->density.rot_v[0], - p->density.rot_v[1], p->density.rot_v[2], 0. + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2], 0. #else - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. #endif ); fclose(file); @@ -118,14 +120,16 @@ void dump_indv_particle_fields(char *fileName, struct part *p) { void write_header(char *fileName) { FILE *file = fopen(fileName, "w"); - /* Write header */ - fprintf(file, - "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s %13s %13s" - "%13s %13s %13s %13s\n", - "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", "a_z", "rho", "rho_dh", - "wcount", "wcount_dh", "dh/dt", "v_sig", "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); - fprintf(file,"\nPARTICLES BEFORE INTERACTION:\n"); - fclose(file); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s " + "%13s %13s %13s %13s" + "%13s %13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", + "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig", + "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); + fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n"); + fclose(file); } /* @@ -136,8 +140,10 @@ void write_header(char *fileName) { * @param count No. of particles to be interacted * */ -void test_interactions(struct part *parts, int count, serial_interaction serial_inter_func, vec_interaction vec_inter_func, char *filePrefix) { - +void test_interactions(struct part *parts, int count, + serial_interaction serial_inter_func, + vec_interaction vec_inter_func, char *filePrefix) { + /* Use the first particle in the array as the one that gets updated. */ struct part pi = parts[0]; @@ -145,8 +151,8 @@ void test_interactions(struct part *parts, int count, serial_interaction serial_ char serial_filename[200] = ""; char vec_filename[200] = ""; - strcpy(serial_filename,filePrefix); - strcpy(vec_filename,filePrefix); + strcpy(serial_filename, filePrefix); + strcpy(vec_filename, filePrefix); sprintf(serial_filename + strlen(serial_filename), "_serial.dat"); sprintf(vec_filename + strlen(vec_filename), "_vec.dat"); @@ -180,7 +186,7 @@ void test_interactions(struct part *parts, int count, serial_interaction serial_ } serial_inter_func(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); - } + } file = fopen(serial_filename, "a"); fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); @@ -217,7 +223,7 @@ void test_interactions(struct part *parts, int count, serial_interaction serial_ /* Perform vector interaction. */ vec_inter_func(r2q, dxq, hiq, hjq, piq, pjq); - + file = fopen(vec_filename, "a"); fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); fclose(file); @@ -267,8 +273,10 @@ int main(int argc, char *argv[]) { /* Build the infrastructure */ static long long partId = 0; - struct part *density_particles = make_particles(count,offset,spacing,h,&partId); - struct part *force_particles = make_particles(count,offset,spacing,h,&partId); + struct part *density_particles = + make_particles(count, offset, spacing, h, &partId); + struct part *force_particles = + make_particles(count, offset, spacing, h, &partId); prepare_force(force_particles); /* Define which interactions to call */ @@ -276,33 +284,37 @@ int main(int argc, char *argv[]) { vec_interaction vec_inter_func = &runner_iact_nonsym_vec_density; /* Call the non-sym density test. */ - test_interactions(density_particles,count,serial_inter_func,vec_inter_func,"test_nonsym_density"); - - density_particles = make_particles(count,offset,spacing,h,&partId); - + test_interactions(density_particles, count, serial_inter_func, vec_inter_func, + "test_nonsym_density"); + + density_particles = make_particles(count, offset, spacing, h, &partId); + /* Re-assign function pointers. */ serial_inter_func = &runner_iact_density; vec_inter_func = &runner_iact_vec_density; - + /* Call the symmetrical density test. */ - test_interactions(density_particles,count,serial_inter_func,vec_inter_func,"test_sym_density"); - + test_interactions(density_particles, count, serial_inter_func, vec_inter_func, + "test_sym_density"); + /* Re-assign function pointers. */ serial_inter_func = &runner_iact_nonsym_force; vec_inter_func = &runner_iact_nonsym_vec_force; /* Call the test non-sym force test. */ - test_interactions(force_particles,count,serial_inter_func,vec_inter_func,"test_nonsym_force"); + test_interactions(force_particles, count, serial_inter_func, vec_inter_func, + "test_nonsym_force"); - force_particles = make_particles(count,offset,spacing,h,&partId); + force_particles = make_particles(count, offset, spacing, h, &partId); prepare_force(force_particles); - + /* Re-assign function pointers. */ serial_inter_func = &runner_iact_force; vec_inter_func = &runner_iact_vec_force; /* Call the test symmetrical force test. */ - test_interactions(force_particles,count,serial_inter_func,vec_inter_func,"test_sym_force"); + test_interactions(force_particles, count, serial_inter_func, vec_inter_func, + "test_sym_force"); return 0; } diff --git a/tests/testMaths.c b/tests/testMaths.c index 5db28857ff691c13464b99593d0455caf6ff8201..96c75313db18edc653aa3a48cb6ac34913297806 100644 --- a/tests/testMaths.c +++ b/tests/testMaths.c @@ -22,24 +22,25 @@ #include "approx_math.h" #include "vector.h" -#include <stdio.h> #include <math.h> +#include <stdio.h> int main() { const int numPoints = 60000; - + for (int i = 0; i < numPoints; ++i) { - - const float x = 0.6f * (i / (float) numPoints) - 0.3f; + + const float x = 0.6f * (i / (float)numPoints) - 0.3f; const float exp_correct = expf(x); const float exp_approx = approx_expf(x); const float abs = fabs(exp_correct - exp_approx); - const float rel = 0.5f * fabs(exp_correct - exp_approx) / fabs(exp_correct + exp_approx); - - printf("%2d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i, x, exp_correct, - exp_approx, abs, rel); + const float rel = + 0.5f * fabs(exp_correct - exp_approx) / fabs(exp_correct + exp_approx); + + printf("%2d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i, x, + exp_correct, exp_approx, abs, rel); if (abs > 3e-6 && fabsf(x) <= 0.2) { printf("Absolute difference too large !\n"); @@ -58,7 +59,6 @@ int main() { printf("Relative difference too large !\n"); return 1; } - } printf("\nAll values are consistent\n");