diff --git a/examples/main.c b/examples/main.c index 99f673307ab43b5e55991591177da30f4c212f63..be1ed7c192458fb08215035694bdb2a843400933 100644 --- a/examples/main.c +++ b/examples/main.c @@ -269,12 +269,6 @@ int main(int argc, char *argv[]) { /* Temporary abort to handle absence of vectorized functions */ #ifdef WITH_VECTORIZATION -#ifdef GADGET2_SPH - error( - "Vectorized version of Gadget SPH routines not implemented yet. " - "Reconfigure with --disable-vec and recompile or use DEFAULT_SPH."); -#endif - #ifdef MINIMAL_SPH error( "Vectorized version of Minimal SPH routines not implemented yet. " diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index 6e6e4f18d2e10ed4a6bf78e5e4ef0bf7b4eff183..bfe48e4a36d5744053703d437d666151173b923d 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -145,17 +145,17 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute this particle's sound speed. */ const float u = p->u; - const float fc = p->force.c = sqrtf(hydro_gamma * hydro_gamma_minus_one * u); + const float fc = p->force.soundspeed = sqrtf(hydro_gamma * hydro_gamma_minus_one * u); /* Compute the P/Omega/rho2. */ xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; - p->force.POrho2 = u * hydro_gamma_minus_one / (p->rho * xp->omega); + p->force.P_over_rho2 = u * hydro_gamma_minus_one / (p->rho * xp->omega); /* Balsara switch */ p->force.balsara = normDiv_v / (normDiv_v + normRot_v + 0.0001f * fc * ih); /* Viscosity parameter decay time */ - const float tau = h / (2.f * const_viscosity_length * p->force.c); + const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed); /* Viscosity source term */ const float S = fmaxf(-normDiv_v, 0.f); @@ -214,7 +214,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( u = p->u *= expf(w); /* Predict gradient term */ - p->force.POrho2 = u * hydro_gamma_minus_one / (p->rho * xp->omega); + p->force.P_over_rho2 = u * hydro_gamma_minus_one / (p->rho * xp->omega); } /** diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 41bf3d6fbc6a8a9feeef9a03bf945887d301981a..d8b1fdda330396dc50d47467d39c264fbe1e58fe 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -371,8 +371,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( mj = pj->mass; rhoi = pi->rho; rhoj = pj->rho; - POrho2i = pi->force.POrho2; - POrho2j = pj->force.POrho2; + POrho2i = pi->force.P_over_rho2; + POrho2j = pj->force.P_over_rho2; /* Get the kernel for hi. */ hi_inv = 1.0f / hi; @@ -398,7 +398,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( omega_ij = fminf(dvdr, 0.f); /* Compute signal velocity */ - v_sig = pi->force.c + pj->force.c - 2.0f * omega_ij; + v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij; /* Compute viscosity parameter */ alpha_ij = -0.5f * (pi->alpha + pj->alpha); @@ -480,13 +480,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); piPOrho2.v = - vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, pi[2]->force.POrho2, - pi[3]->force.POrho2, pi[4]->force.POrho2, pi[5]->force.POrho2, - pi[6]->force.POrho2, pi[7]->force.POrho2); + vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, + pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pjPOrho2.v = - vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, pj[2]->force.POrho2, - pj[3]->force.POrho2, pj[4]->force.POrho2, pj[5]->force.POrho2, - pj[6]->force.POrho2, pj[7]->force.POrho2); + vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, + pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, @@ -496,11 +496,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pj[6]->u, pj[7]->u); ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c, - pi[4]->force.c, pi[5]->force.c, pi[6]->force.c, pi[7]->force.c); + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c, - pj[4]->force.c, pj[5]->force.c, pj[6]->force.c, pj[7]->force.c); + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[6]->force.v_sig, pi[7]->force.v_sig); @@ -530,18 +530,18 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( #elif VEC_SIZE == 4 mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, - pi[2]->force.POrho2, pi[3]->force.POrho2); - pjPOrho2.v = vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, - pj[2]->force.POrho2, pj[3]->force.POrho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c); + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c); + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig); vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, @@ -637,18 +637,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( piu_dt.v += mj.v * tc.v * (piu.v - pju.v); pju_dt.v += mi.v * tc.v * (pju.v - piu.v); - /* compute the signal velocity (this is always symmetrical). */ - vi_sig.v = vec_fmax(vi_sig.v, v_sig.v); - vj_sig.v = vec_fmax(vj_sig.v, v_sig.v); - /* Store the forces back on the particles. */ for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; pj[k]->force.u_dt += pju_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = vi_sig.f[k]; - pj[k]->force.v_sig = vj_sig.f[k]; + pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); + pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]); for (j = 0; j < 3; j++) { pi[k]->a_hydro[j] -= pia[j].f[k]; pj[k]->a_hydro[j] += pja[j].f[k]; @@ -684,8 +680,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( mj = pj->mass; rhoi = pi->rho; rhoj = pj->rho; - POrho2i = pi->force.POrho2; - POrho2j = pj->force.POrho2; + POrho2i = pi->force.P_over_rho2; + POrho2j = pj->force.P_over_rho2; /* Get the kernel for hi. */ hi_inv = 1.0f / hi; @@ -711,7 +707,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( omega_ij = fminf(dvdr, 0.f); /* Compute signal velocity */ - v_sig = pi->force.c + pj->force.c - 2.0f * omega_ij; + v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij; /* Compute viscosity parameter */ alpha_ij = -0.5f * (pi->alpha + pj->alpha); @@ -785,13 +781,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); piPOrho2.v = - vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, pi[2]->force.POrho2, - pi[3]->force.POrho2, pi[4]->force.POrho2, pi[5]->force.POrho2, - pi[6]->force.POrho2, pi[7]->force.POrho2); + vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, + pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); pjPOrho2.v = - vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, pj[2]->force.POrho2, - pj[3]->force.POrho2, pj[4]->force.POrho2, pj[5]->force.POrho2, - pj[6]->force.POrho2, pj[7]->force.POrho2); + vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, + pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, @@ -801,11 +797,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, pj[6]->u, pj[7]->u); ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c, - pi[4]->force.c, pi[5]->force.c, pi[6]->force.c, pi[7]->force.c); + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c, - pj[4]->force.c, pj[5]->force.c, pj[6]->force.c, pj[7]->force.c); + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, pi[6]->force.v_sig, pi[7]->force.v_sig); @@ -834,18 +830,18 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); #elif VEC_SIZE == 4 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, - pi[2]->force.POrho2, pi[3]->force.POrho2); - pjPOrho2.v = vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, - pj[2]->force.POrho2, pj[3]->force.POrho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c); + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c); + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, pi[3]->force.v_sig); vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, @@ -936,15 +932,11 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Add the thermal conductivity */ piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - /* compute the signal velocity (this is always symmetrical). */ - vi_sig.v = vec_fmax(vi_sig.v, v_sig.v); - vj_sig.v = vec_fmax(vj_sig.v, v_sig.v); - /* Store the forces back on the particles. */ for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = vi_sig.f[k]; + pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]); for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; } diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 7730133fa4488cf51dbd8424b7dce3232f038909..a3972dc7ebd1b04a18ccdb6f815d24e36aff270c 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -90,7 +90,7 @@ struct part { float balsara; /* Aggregate quantities. */ - float POrho2; + float P_over_rho2; /* Change in particle energy over time. */ float u_dt; @@ -99,7 +99,7 @@ struct part { float v_sig; /* Sound speed */ - float c; + float soundspeed; /* Change in smoothing length over time. */ float h_dt; diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 7ff0ca03997b1526a4d741f31ff02f56bac7f7f1..9f4eaf708aa5e5153294c3f8b0dc6acd3b113a3b 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -251,8 +251,8 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( p->entropy *= 0.5f; /* Do not 'overcool' when timestep increases */ - if (p->entropy + 0.5f * p->entropy_dt * dt < 0.5f * p->entropy) - p->entropy_dt = -0.5f * p->entropy / dt; + if (p->entropy + p->entropy_dt * half_dt < 0.5f * p->entropy) + p->entropy_dt = -0.5f * p->entropy / half_dt; } /** diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 2033e29604bea81bfaabfe3ce1a27178fee09679..7ee02a9e3963a16ed207dc4d518d5d3556460dfa 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -479,9 +479,177 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( __attribute__((always_inline)) INLINE static void runner_iact_vec_force( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { - error( - "A vectorised version of the Gadget2 force interaction function does not " - "exist yet!"); + +#ifdef WITH_VECTORIZATION + + vector r, r2, ri; + vector xi, xj; + vector hi, hj, hi_inv, hj_inv; + vector hi2_inv, hj2_inv; + vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; + vector piPOrho, pjPOrho, pirho, pjrho; + vector mi, mj; + vector f; + vector dx[3]; + vector vi[3], vj[3]; + vector pia[3], pja[3]; + vector pih_dt, pjh_dt; + vector ci, cj, v_sig; + vector omega_ij, mu_ij, fac_mu, balsara; + vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; + int j, k; + + fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ + + /* Load stuff. */ +#if VEC_SIZE == 8 + mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, + pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, + pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); + piPOrho.v = + vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, + pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho.v = + vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, + pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, + pi[5]->rho, pi[6]->rho, pi[7]->rho); + pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, + pj[5]->rho, pj[6]->rho, pj[7]->rho); + ci.v = + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); + cj.v = + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], + pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], + pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); + } + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], + Dx[15 + k], Dx[18 + k], Dx[21 + k]); + balsara.v = + vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, + pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, + pi[6]->force.balsara, pi[7]->force.balsara) + + vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, + pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, + pj[6]->force.balsara, pj[7]->force.balsara); +#elif VEC_SIZE == 4 + mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); + piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); + pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); + ci.v = + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); + cj.v = + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); + } + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); + balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, + pi[2]->force.balsara, pi[3]->force.balsara) + + vec_set(pj[0]->force.balsara, pj[1]->force.balsara, + pj[2]->force.balsara, pj[3]->force.balsara); +#else + error("Unknown vector size.") +#endif + + /* Get the radius and inverse radius. */ + r2.v = vec_load(R2); + ri.v = vec_rsqrt(r2.v); + ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + r.v = r2.v * ri.v; + + /* Get the kernel for hi. */ + hi.v = vec_load(Hi); + hi_inv.v = vec_rcp(hi.v); + hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); + hi2_inv.v = hi_inv.v * hi_inv.v; + xi.v = r.v * hi_inv.v; + kernel_deval_vec(&xi, &wi, &wi_dx); + wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; + + /* Get the kernel for hj. */ + hj.v = vec_load(Hj); + hj_inv.v = vec_rcp(hj.v); + hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); + hj2_inv.v = hj_inv.v * hj_inv.v; + xj.v = r.v * hj_inv.v; + kernel_deval_vec(&xj, &wj, &wj_dx); + wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; + + /* Compute dv dot r. */ + dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + + ((vi[2].v - vj[2].v) * dx[2].v); + //dvdr.v = dvdr.v * ri.v; + + /* Compute the relative velocity. (This is 0 if the particles move away from + * each other and negative otherwise) */ + omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); + mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ + + /* Compute signal velocity */ + v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; + + /* Now construct the full viscosity term */ + rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); + visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v * + balsara.v / rho_ij.v; + + /* Now, convolve with the kernel */ + visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; + sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v; + + /* Eventually get the acceleration */ + acc.v = visc_term.v + sph_term.v; + + /* Use the force, Luke! */ + for (k = 0; k < 3; k++) { + f.v = dx[k].v * acc.v; + pia[k].v = mj.v * f.v; + pja[k].v = mi.v * f.v; + } + + /* Get the time derivative for h. */ + pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v; + pjh_dt.v = mi.v * dvdr.v * ri.v / pirho.v * wj_dr.v; + + /* Change in entropy */ + entropy_dt.v = vec_set1(0.5f) * visc_term.v * dvdr.v; + + /* Store the forces back on the particles. */ + for (k = 0; k < VEC_SIZE; k++) { + for (j = 0; j < 3; j++) { + pi[k]->a_hydro[j] -= pia[j].f[k]; + pj[k]->a_hydro[j] += pja[j].f[k]; + } + pi[k]->force.h_dt -= pih_dt.f[k]; + pj[k]->force.h_dt -= pjh_dt.f[k]; + pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); + pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]); + pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k]; + pj[k]->entropy_dt -= entropy_dt.f[k] * mi.f[k]; + } + +#else + + error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.") + +#endif } /** @@ -575,9 +743,166 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { - error( - "A vectorised version of the Gadget2 non-symmetric force interaction " - "function does not exist yet!"); + + #ifdef WITH_VECTORIZATION + + vector r, r2, ri; + vector xi, xj; + vector hi, hj, hi_inv, hj_inv; + vector hi2_inv, hj2_inv; + vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; + vector piPOrho, pjPOrho, pirho, pjrho; + vector mj; + vector f; + vector dx[3]; + vector vi[3], vj[3]; + vector pia[3]; + vector pih_dt; + vector ci, cj, v_sig; + vector omega_ij, mu_ij, fac_mu, balsara; + vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; + int j, k; + + fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ + + /* Load stuff. */ +#if VEC_SIZE == 8 + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, + pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); + piPOrho.v = + vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, pi[2]->force.P_over_rho2, + pi[3]->force.P_over_rho2, pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho.v = + vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, pj[2]->force.P_over_rho2, + pj[3]->force.P_over_rho2, pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, + pi[5]->rho, pi[6]->rho, pi[7]->rho); + pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, + pj[5]->rho, pj[6]->rho, pj[7]->rho); + ci.v = + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed, + pi[4]->force.soundspeed, pi[5]->force.soundspeed, pi[6]->force.soundspeed, pi[7]->force.soundspeed); + cj.v = + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed, + pj[4]->force.soundspeed, pj[5]->force.soundspeed, pj[6]->force.soundspeed, pj[7]->force.soundspeed); + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], + pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], + pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); + } + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], + Dx[15 + k], Dx[18 + k], Dx[21 + k]); + balsara.v = + vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, + pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, + pi[6]->force.balsara, pi[7]->force.balsara) + + vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, + pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, + pj[6]->force.balsara, pj[7]->force.balsara); +#elif VEC_SIZE == 4 + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); + piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); + pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); + ci.v = + vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, pi[2]->force.soundspeed, pi[3]->force.soundspeed); + cj.v = + vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, pj[2]->force.soundspeed, pj[3]->force.soundspeed); + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); + } + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); + balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, + pi[2]->force.balsara, pi[3]->force.balsara) + + vec_set(pj[0]->force.balsara, pj[1]->force.balsara, + pj[2]->force.balsara, pj[3]->force.balsara); +#else + error("Unknown vector size.") +#endif + + /* Get the radius and inverse radius. */ + r2.v = vec_load(R2); + ri.v = vec_rsqrt(r2.v); + ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + r.v = r2.v * ri.v; + + /* Get the kernel for hi. */ + hi.v = vec_load(Hi); + hi_inv.v = vec_rcp(hi.v); + hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); + hi2_inv.v = hi_inv.v * hi_inv.v; + xi.v = r.v * hi_inv.v; + kernel_deval_vec(&xi, &wi, &wi_dx); + wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; + + /* Get the kernel for hj. */ + hj.v = vec_load(Hj); + hj_inv.v = vec_rcp(hj.v); + hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); + hj2_inv.v = hj_inv.v * hj_inv.v; + xj.v = r.v * hj_inv.v; + kernel_deval_vec(&xj, &wj, &wj_dx); + wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; + + /* Compute dv dot r. */ + dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + + ((vi[2].v - vj[2].v) * dx[2].v); + //dvdr.v = dvdr.v * ri.v; + + /* Compute the relative velocity. (This is 0 if the particles move away from + * each other and negative otherwise) */ + omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); + mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ + + /* Compute signal velocity */ + v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; + + /* Now construct the full viscosity term */ + rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); + visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * mu_ij.v * + balsara.v / rho_ij.v; + + /* Now, convolve with the kernel */ + visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; + sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v; + + /* Eventually get the acceleration */ + acc.v = visc_term.v + sph_term.v; + + /* Use the force, Luke! */ + for (k = 0; k < 3; k++) { + f.v = dx[k].v * acc.v; + pia[k].v = mj.v * f.v; + } + + /* Get the time derivative for h. */ + pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v; + + /* Change in entropy */ + entropy_dt.v = vec_set1(0.5f) * mj.v * visc_term.v * dvdr.v; + + /* Store the forces back on the particles. */ + for (k = 0; k < VEC_SIZE; k++) { + for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; + pi[k]->force.h_dt -= pih_dt.f[k]; + pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig,v_sig.f[k]); + pi[k]->entropy_dt += entropy_dt.f[k]; + } + +#else + + error("The Gadget2 serial version of runner_iact_nonsym_force was called when the vectorised version should have been used.") + +#endif } #endif /* SWIFT_RUNNER_IACT_LEGACY_H */ diff --git a/tests/Makefile.am b/tests/Makefile.am index ca5d6af21fe417da4d0b33dde592eeb0be544938..bff247a803c2f84a008fbd01445d01ba6579aefc 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -26,7 +26,7 @@ TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ - testSPHStep testPair test27cells testParser testKernel testNonSymInt testSymInt + testSPHStep testPair test27cells testParser testKernel testInteractions # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -47,9 +47,7 @@ testParser_SOURCES = testParser.c testKernel_SOURCES = testKernel.c -testNonSymInt_SOURCES = testNonSymInt.c - -testSymInt_SOURCES = testSymInt.c +testInteractions_SOURCES = testInteractions.c # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ diff --git a/tests/test27cells.c b/tests/test27cells.c index c21d4cc2337a19b6b2660b960fa953e5bc9df6ef..2cf82283189d8a3c402c350d7e1106b843879d92 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -355,7 +355,7 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); -#if defined(DEFAULT_SPH) || !defined(WITH_VECTORIZATION) +#if defined(DEFAULT_SPH) || defined(GADGET2_SPH) /* Run all the pairs */ for (int j = 0; j < 27; ++j) @@ -391,7 +391,7 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); -#if defined(DEFAULT_SPH) || !defined(WITH_VECTORIZATION) +#if defined(DEFAULT_SPH) || defined(GADGET2_SPH) /* Run all the brute-force pairs */ for (int j = 0; j < 27; ++j) diff --git a/tests/testNonSymInt.c b/tests/testInteractions.c similarity index 62% rename from tests/testNonSymInt.c rename to tests/testInteractions.c index da98c9efcedd595e133754e49d7396fbba574110..27fd511accf1440993a2af29bd20f6695afd2e92 100644 --- a/tests/testNonSymInt.c +++ b/tests/testInteractions.c @@ -24,8 +24,9 @@ #include <unistd.h> #include "swift.h" -char *serial_filename = "test_nonsym_serial.dat"; -char *vec_filename = "test_nonsym_vec.dat"; +/* Typdef function pointers for serial and vectorised versions of the interaction functions. */ +typedef void (*serial_interaction)(float, float *, float, float, struct part *, struct part *); +typedef void (*vec_interaction)(float *, float *, float *, float *, struct part **, struct part **); /** * @brief Constructs an array of particles in a valid state prior to @@ -46,6 +47,7 @@ struct part *make_particles(int count, double *offset, double spacing, double h, count * sizeof(struct part)) != 0) { error("couldn't allocate particles, no. of particles: %d", (int)count); } + bzero(particles, count * sizeof(struct part)); /* Construct the particles */ struct part *p; @@ -67,26 +69,44 @@ struct part *make_particles(int count, double *offset, double spacing, double h, return particles; } +/** + * @brief Populates particle properties needed for the force calculation. + */ +void prepare_force(struct part *parts) { + + struct part *p; + for (size_t i = 0; i < VEC_SIZE + 1; ++i) { + p = &parts[i]; + p->rho = i + 1; + p->force.balsara = i + 1; + p->force.P_over_rho2 = i + 1; + } +} + /** * @brief Dumps all particle information to a file */ void dump_indv_particle_fields(char *fileName, struct part *p) { FILE *file = fopen(fileName, "a"); - - fprintf(file, - "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " - "%13e %13e %13e\n", - p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->rho, - p->rho_dh, p->density.wcount, p->density.wcount_dh, + + fprintf( + file, + "%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e %13e %13e " + "%13e %13e %13e %10f\n", + p->id, p->x[0], p->x[1], + p->x[2], p->v[0], p->v[1], + p->v[2], p->a_hydro[0], p->a_hydro[1], + p->a_hydro[2], p->rho, p->rho_dh, + p->density.wcount, p->density.wcount_dh, p->force.h_dt, p->force.v_sig, #if defined(GADGET2_SPH) - p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], - p->density.rot_v[2] + p->density.div_v, p->density.rot_v[0], + p->density.rot_v[1], p->density.rot_v[2], p->entropy_dt #elif defined(DEFAULT_SPH) - p->density.div_v, p->density.curl_v[0], p->density.curl_v[1], - p->density.curl_v[2] + p->density.div_v, p->density.rot_v[0], + p->density.rot_v[1], p->density.rot_v[2], 0. #else - 0., 0., 0., 0. + 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. #endif ); fclose(file); @@ -98,14 +118,14 @@ void dump_indv_particle_fields(char *fileName, struct part *p) { void write_header(char *fileName) { FILE *file = fopen(fileName, "w"); - /* Write header */ - fprintf(file, - "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " - "%13s %13s %13s\n", - "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", - "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); - fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n"); - fclose(file); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s %13s %13s" + "%13s %13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", "a_z", "rho", "rho_dh", + "wcount", "wcount_dh", "dh/dt", "v_sig", "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); + fprintf(file,"\nPARTICLES BEFORE INTERACTION:\n"); + fclose(file); } /* @@ -116,13 +136,20 @@ void write_header(char *fileName) { * @param count No. of particles to be interacted * */ -void test_nonsym_density_interaction(struct part *parts, int count) { - +void test_interactions(struct part *parts, int count, serial_interaction serial_inter_func, vec_interaction vec_inter_func, char *filePrefix) { + /* Use the first particle in the array as the one that gets updated. */ struct part pi = parts[0]; - // const float hig2 = hi * hi * kernel_gamma2; FILE *file; + char serial_filename[200] = ""; + char vec_filename[200] = ""; + + strcpy(serial_filename,filePrefix); + strcpy(vec_filename,filePrefix); + sprintf(serial_filename + strlen(serial_filename), "_serial.dat"); + sprintf(vec_filename + strlen(vec_filename), "_vec.dat"); + write_header(serial_filename); write_header(vec_filename); @@ -152,8 +179,8 @@ void test_nonsym_density_interaction(struct part *parts, int count) { r2 += dx[k] * dx[k]; } - runner_iact_nonsym_density(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); - } + serial_inter_func(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); + } file = fopen(serial_filename, "a"); fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); @@ -189,8 +216,8 @@ void test_nonsym_density_interaction(struct part *parts, int count) { dump_indv_particle_fields(vec_filename, pjq[i]); /* Perform vector interaction. */ - runner_iact_nonsym_vec_density(r2q, dxq, hiq, hjq, piq, pjq); - + vec_inter_func(r2q, dxq, hiq, hjq, piq, pjq); + file = fopen(vec_filename, "a"); fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); fclose(file); @@ -233,19 +260,49 @@ int main(int argc, char *argv[]) { "runner_iact_vec_density." "\n\nOptions:" "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" - "\n-s spacing - Spacing between particles" - "\n-v type (0,1,2,3) - Velocity field: (zero, random, divergent, " - "rotating)", + "\n-s spacing - Spacing between particles", argv[0]); exit(1); } /* Build the infrastructure */ static long long partId = 0; - struct part *particles = make_particles(count, offset, spacing, h, &partId); - - /* Call the test non-sym density test. */ - test_nonsym_density_interaction(particles, count); + struct part *density_particles = make_particles(count,offset,spacing,h,&partId); + struct part *force_particles = make_particles(count,offset,spacing,h,&partId); + prepare_force(force_particles); + + /* Define which interactions to call */ + serial_interaction serial_inter_func = &runner_iact_nonsym_density; + vec_interaction vec_inter_func = &runner_iact_nonsym_vec_density; + + /* Call the non-sym density test. */ + test_interactions(density_particles,count,serial_inter_func,vec_inter_func,"test_nonsym_density"); + + density_particles = make_particles(count,offset,spacing,h,&partId); + + /* Re-assign function pointers. */ + serial_inter_func = &runner_iact_density; + vec_inter_func = &runner_iact_vec_density; + + /* Call the symmetrical density test. */ + test_interactions(density_particles,count,serial_inter_func,vec_inter_func,"test_sym_density"); + + /* Re-assign function pointers. */ + serial_inter_func = &runner_iact_nonsym_force; + vec_inter_func = &runner_iact_nonsym_vec_force; + + /* Call the test non-sym force test. */ + test_interactions(force_particles,count,serial_inter_func,vec_inter_func,"test_nonsym_force"); + + force_particles = make_particles(count,offset,spacing,h,&partId); + prepare_force(force_particles); + + /* Re-assign function pointers. */ + serial_inter_func = &runner_iact_force; + vec_inter_func = &runner_iact_vec_force; + + /* Call the test symmetrical force test. */ + test_interactions(force_particles,count,serial_inter_func,vec_inter_func,"test_sym_force"); return 0; } diff --git a/tests/testSymInt.c b/tests/testSymInt.c deleted file mode 100644 index 73fa8ed9b9c2c347bad2ff8fd2eadf9a6cae1336..0000000000000000000000000000000000000000 --- a/tests/testSymInt.c +++ /dev/null @@ -1,248 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - -#include <fenv.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include "swift.h" - -char *serial_filename = "test_sym_serial.dat"; -char *vec_filename = "test_sym_vec.dat"; - -/** - * @brief Constructs an array of particles in a valid state prior to - * a IACT_NONSYM and IACT_NONSYM_VEC call. - * - * @param count No. of particles to create - * @param offset The position of the particle offset from (0,0,0). - * @param spacing Particle spacing. - * @param h The smoothing length of the particles in units of the inter-particle - *separation. - * @param partId The running counter of IDs. - */ -struct part *make_particles(int count, double *offset, double spacing, double h, - long long *partId) { - - struct part *particles; - if (posix_memalign((void **)&particles, part_align, - count * sizeof(struct part)) != 0) { - error("couldn't allocate particles, no. of particles: %d", (int)count); - } - - /* Construct the particles */ - struct part *p; - for (size_t i = 0; i < VEC_SIZE + 1; ++i) { - p = &particles[i]; - p->x[0] = offset[0] + spacing * i; - p->x[1] = offset[1] + spacing * i; - p->x[2] = offset[2] + spacing * i; - p->v[0] = random_uniform(-0.05, 0.05); - p->v[1] = random_uniform(-0.05, 0.05); - p->v[2] = random_uniform(-0.05, 0.05); - p->h = h; - p->id = ++(*partId); - p->mass = 1.0f; - } - return particles; -} - -/** - * @brief Dumps all particle information to a file - */ -void dump_indv_particle_fields(char *fileName, struct part *p) { - - FILE *file = fopen(fileName, "a"); - - fprintf(file, - "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " - "%13e %13e %13e\n", - p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->rho, - p->rho_dh, p->density.wcount, p->density.wcount_dh, -#if defined(GADGET2_SPH) - p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], - p->density.rot_v[2] -#elif defined(DEFAULT_SPH) - p->density.div_v, p->density.curl_v[0], p->density.curl_v[1], - p->density.curl_v[2] -#else - 0., 0., 0., 0. -#endif - ); - fclose(file); -} - -/** - * @brief Creates a header for the output file - */ -void write_header(char *fileName) { - - FILE *file = fopen(fileName, "w"); - /* Write header */ - fprintf(file, - "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " - "%13s %13s %13s\n", - "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", - "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); - fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n"); - fclose(file); -} - -/* - * @brief Calls the serial and vectorised version of the non-symmetrical density - * interaction. - * - * @param parts Particle array to be interacted - * @param count No. of particles to be interacted - * - */ -void test_sym_density_interaction(struct part *parts, int count) { - - /* Use the first particle in the array as the one that gets updated. */ - struct part pi = parts[0]; - // const float hig2 = hi * hi * kernel_gamma2; - - FILE *file; - write_header(serial_filename); - write_header(vec_filename); - - /* Dump state of particles before serial interaction. */ - dump_indv_particle_fields(serial_filename, &pi); - for (size_t i = 1; i < count; i++) - dump_indv_particle_fields(serial_filename, &parts[i]); - - /* Make copy of pi to be used in vectorised version. */ - struct part pi_vec = pi; - struct part pj_vec[VEC_SIZE]; - for (int i = 0; i < VEC_SIZE; i++) pj_vec[i] = parts[i + 1]; - - float r2q[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - float hiq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - float hjq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; - - /* Perform serial interaction */ - for (int i = 1; i < count; i++) { - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pi.x[k] - parts[i].x[k]; - r2 += dx[k] * dx[k]; - } - - runner_iact_density(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); - } - - file = fopen(serial_filename, "a"); - fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); - fclose(file); - - /* Dump result of serial interaction. */ - dump_indv_particle_fields(serial_filename, &pi); - for (size_t i = 1; i < count; i++) - dump_indv_particle_fields(serial_filename, &parts[i]); - - /* Setup arrays for vector interaction. */ - for (int i = 0; i < VEC_SIZE; i++) { - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; - r2 += dx[k] * dx[k]; - } - r2q[i] = r2; - dxq[3 * i + 0] = dx[0]; - dxq[3 * i + 1] = dx[1]; - dxq[3 * i + 2] = dx[2]; - hiq[i] = pi_vec.h; - hjq[i] = pj_vec[i].h; - piq[i] = &pi_vec; - pjq[i] = &pj_vec[i]; - } - - /* Dump state of particles before vector interaction. */ - dump_indv_particle_fields(vec_filename, piq[0]); - for (size_t i = 0; i < VEC_SIZE; i++) - dump_indv_particle_fields(vec_filename, pjq[i]); - - /* Perform vector interaction. */ - runner_iact_vec_density(r2q, dxq, hiq, hjq, piq, pjq); - - file = fopen(vec_filename, "a"); - fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); - fclose(file); - - /* Dump result of serial interaction. */ - dump_indv_particle_fields(vec_filename, piq[0]); - for (size_t i = 0; i < VEC_SIZE; i++) - dump_indv_particle_fields(vec_filename, pjq[i]); -} - -/* And go... */ -int main(int argc, char *argv[]) { - double h = 1.2348, spacing = 0.5; - double offset[3] = {0.0, 0.0, 0.0}; - int count = VEC_SIZE + 1; - - /* Get some randomness going */ - srand(0); - - char c; - while ((c = getopt(argc, argv, "s:h:v:")) != -1) { - switch (c) { - case 'h': - sscanf(optarg, "%lf", &h); - break; - case 's': - sscanf(optarg, "%lf", &spacing); - break; - case '?': - error("Unknown option."); - break; - } - } - - if (h < 0 || spacing < 0) { - printf( - "\nUsage: %s [OPTIONS...]\n" - "\nGenerates a particle array with equal particle separation." - "\nThese are then interacted using runner_iact_density and " - "runner_iact_vec_density." - "\n\nOptions:" - "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" - "\n-s spacing - Spacing between particles" - "\n-v type (0,1,2,3) - Velocity field: (zero, random, divergent, " - "rotating)", - argv[0]); - exit(1); - } - - /* Build the infrastructure */ - static long long partId = 0; - struct part *particles = make_particles(count, offset, spacing, h, &partId); - - /* Call the test non-sym density test. */ - test_sym_density_interaction(particles, count); - - return 0; -}