Commit 36b22593 by Matthieu Schaller

### Formatting

parent 8bcd5f6e
 ... @@ -33,7 +33,7 @@ ... @@ -33,7 +33,7 @@ * @param x The number to take the exponential of. * @param x The number to take the exponential of. */ */ __attribute__((always_inline)) INLINE static float approx_expf(float x) { __attribute__((always_inline)) INLINE static float approx_expf(float x) { return 1.f + x * (1.f + x * (0.5f + x * (((float)(1.0 / 6.0)) + ((float)(1.0 / 24.0)) * x))); return 1.f + x * (1.f + x * (0.5f + x * (1.f / 6.f + 1.f / 24.f * x))); } } #endif /* SWIFT_APPROX_MATH_H */ #endif /* SWIFT_APPROX_MATH_H */
 ... @@ -145,7 +145,8 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( ... @@ -145,7 +145,8 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute this particle's sound speed. */ /* Compute this particle's sound speed. */ const float u = p->u; const float u = p->u; const float fc = p->force.soundspeed = sqrtf(hydro_gamma * hydro_gamma_minus_one * u); const float fc = p->force.soundspeed = sqrtf(hydro_gamma * hydro_gamma_minus_one * u); /* Compute the P/Omega/rho2. */ /* Compute the P/Omega/rho2. */ xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; ... ...
 ... @@ -479,14 +479,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -479,14 +479,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); piPOrho2.v = piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pjPOrho2.v = pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, ... @@ -495,12 +495,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -495,12 +495,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pi[6]->u, pi[7]->u); pi[6]->u, pi[7]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pj[6]->u, pj[7]->u); pj[6]->u, pj[7]->u); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); pi[4]->force.soundspeed, pi[5]->force.soundspeed, cj.v = pi[6]->force.soundspeed, pi[7]->force.soundspeed); vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[6]->force.v_sig, pi[7]->force.v_sig); pi[6]->force.v_sig, pi[7]->force.v_sig); ... @@ -538,10 +540,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -538,10 +540,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); pi[2]->force.soundspeed, pi[3]->force.soundspeed); cj.v = cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig); pi[3]->force.v_sig); vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, ... @@ -780,14 +782,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -780,14 +782,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( #if VEC_SIZE == 8 #if VEC_SIZE == 8 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); piPOrho2.v = piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pjPOrho2.v = pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, ... @@ -796,12 +798,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -796,12 +798,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pi[6]->u, pi[7]->u); pi[6]->u, pi[7]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pj[6]->u, pj[7]->u); pj[6]->u, pj[7]->u); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); pi[4]->force.soundspeed, pi[5]->force.soundspeed, cj.v = pi[6]->force.soundspeed, pi[7]->force.soundspeed); vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[6]->force.v_sig, pi[7]->force.v_sig); pi[6]->force.v_sig, pi[7]->force.v_sig); ... @@ -838,10 +842,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -838,10 +842,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); pi[2]->force.soundspeed, pi[3]->force.soundspeed); cj.v = cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig); pi[3]->force.v_sig); vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, ... @@ -936,7 +940,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -936,7 +940,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( for (k = 0; k < VEC_SIZE; k++) { for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; pi[k]->force.u_dt += piu_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]); pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; } } ... ...
 ... @@ -501,30 +501,32 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -501,30 +501,32 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ /* Load stuff. */ /* Load stuff. */ #if VEC_SIZE == 8 #if VEC_SIZE == 8 mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); piPOrho.v = piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pjPOrho.v = pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pj[5]->rho, pj[6]->rho, pj[7]->rho); pj[5]->rho, pj[6]->rho, pj[7]->rho); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); pi[4]->force.soundspeed, pi[5]->force.soundspeed, cj.v = pi[6]->force.soundspeed, pi[7]->force.soundspeed); vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); for (k = 0; k < 3; k++) { for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); ... @@ -545,15 +547,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -545,15 +547,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); pi[2]->force.soundspeed, pi[3]->force.soundspeed); cj.v = cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed); for (k = 0; k < 3; k++) { for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); ... @@ -595,20 +597,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -595,20 +597,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Compute dv dot r. */ /* Compute dv dot r. */ dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + ((vi[2].v - vj[2].v) * dx[2].v); ((vi[2].v - vj[2].v) * dx[2].v); //dvdr.v = dvdr.v * ri.v; // dvdr.v = dvdr.v * ri.v; /* Compute the relative velocity. (This is 0 if the particles move away from /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ * each other and negative otherwise) */ omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ /* Compute signal velocity */ /* Compute signal velocity */ v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; /* Now construct the full viscosity term */ /* Now construct the full viscosity term */ rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v * visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * balsara.v / rho_ij.v; mu_ij.v * balsara.v / rho_ij.v; /* Now, convolve with the kernel */ /* Now, convolve with the kernel */ visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; ... @@ -616,7 +618,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -616,7 +618,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Eventually get the acceleration */ /* Eventually get the acceleration */ acc.v = visc_term.v + sph_term.v; acc.v = visc_term.v + sph_term.v; /* Use the force, Luke! */ /* Use the force, Luke! */ for (k = 0; k < 3; k++) { for (k = 0; k < 3; k++) { f.v = dx[k].v * acc.v; f.v = dx[k].v * acc.v; ... @@ -630,7 +632,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -630,7 +632,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Change in entropy */ /* Change in entropy */ entropy_dt.v = vec_set1(0.5f) * visc_term.v * dvdr.v; entropy_dt.v = vec_set1(0.5f) * visc_term.v * dvdr.v; /* Store the forces back on the particles. */ /* Store the forces back on the particles. */ for (k = 0; k < VEC_SIZE; k++) { for (k = 0; k < VEC_SIZE; k++) { for (j = 0; j < 3; j++) { for (j = 0; j < 3; j++) { ... @@ -645,9 +647,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ... @@ -645,9 +647,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pj[k]->entropy_dt -= entropy_dt.f[k] * mi.f[k]; pj[k]->entropy_dt -= entropy_dt.f[k] * mi.f[k]; } } #else #else error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.") error( "The Gadget2 serial version of runner_iact_nonsym_force was called when " "the vectorised version should have been used.") #endif #endif } } ... @@ -744,7 +748,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -744,7 +748,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { struct part **pj) { #ifdef WITH_VECTORIZATION #ifdef WITH_VECTORIZATION vector r, r2, ri; vector r, r2, ri; vector xi, xj; vector xi, xj; ... @@ -765,28 +769,30 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -765,28 +769,30 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ /* Load stuff. */ /* Load stuff. */ #if VEC_SIZE == 8 #if VEC_SIZE == 8 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); piPOrho.v = piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pjPOrho.v = pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, pj[5]->rho, pj[6]->rho, pj[7]->rho); pj[5]->rho, pj[6]->rho, pj[7]->rho); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); pi[4]->force.soundspeed, pi[5]->force.soundspeed, cj.v = pi[6]->force.soundspeed, pi[7]->force.soundspeed); vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed, pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); for (k = 0; k < 3; k++) { for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); ... @@ -806,15 +812,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -806,15 +812,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( #elif VEC_SIZE == 4 #elif VEC_SIZE == 4 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); ci.v = ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); pi[2]->force.soundspeed, pi[3]->force.soundspeed); cj.v = cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); pj[2]->force.soundspeed, pj[3]->force.soundspeed); for (k = 0; k < 3; k++) { for (k = 0; k < 3; k++) { vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); ... @@ -856,20 +862,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -856,20 +862,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Compute dv dot r. */ /* Compute dv dot r. */ dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + ((vi[2].v - vj[2].v) * dx[2].v); ((vi[2].v - vj[2].v) * dx[2].v); //dvdr.v = dvdr.v * ri.v; // dvdr.v = dvdr.v * ri.v; /* Compute the relative velocity. (This is 0 if the particles move away from /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ * each other and negative otherwise) */ omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ /* Compute signal velocity */ /* Compute signal velocity */ v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; /* Now construct the full viscosity term */ /* Now construct the full viscosity term */ rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v * visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * balsara.v / rho_ij.v; mu_ij.v * balsara.v / rho_ij.v; /* Now, convolve with the kernel */ /* Now, convolve with the kernel */ visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; ... @@ -877,7 +883,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ... @@ -877,7 +883,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Eventually get the acceleration */