diff --git a/src/drift.h b/src/drift.h index 932e956d8c9f9c8b1cb5afdbff33f99bc79c180d..c71cda49b0ad2b18b227fc2d485bbe76f32d5b94 100644 --- a/src/drift.h +++ b/src/drift.h @@ -77,7 +77,7 @@ __attribute__((always_inline)) INLINE static void drift_part( p->v[2] += p->a_hydro[2] * dt; /* Predict smoothing length */ - const float w1 = p->h_dt * h_inv * dt; + const float w1 = p->force.h_dt * h_inv * dt; if (fabsf(w1) < 0.2f) p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */ else diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index a316eb21db364b255ce9ccb4f97f39eb61e0330f..6e6e4f18d2e10ed4a6bf78e5e4ef0bf7b4eff183 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -103,8 +103,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Finish the calculation by inserting the missing h-factors */ p->rho *= ih * ih2; p->rho_dh *= ih4; - p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); - p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); + p->density.wcount *= kernel_norm; + p->density.wcount_dh *= ih * kernel_gamma * kernel_norm; const float irho = 1.f / p->rho; @@ -186,7 +186,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( /* Reset the time derivatives. */ p->force.u_dt = 0.0f; - p->h_dt = 0.0f; + p->force.h_dt = 0.0f; p->force.v_sig = 0.0f; } @@ -225,7 +225,9 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( * @param p The particle to act upon */ __attribute__((always_inline)) INLINE static void hydro_end_force( - struct part *restrict p) {} + struct part *restrict p) { + p->force.h_dt *= p->h * 0.333333333f; +} /** * @brief Kick the additional variables diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 2748bdca4ee933c6aa1a4e2777e38cd45cc1ddbc..41bf3d6fbc6a8a9feeef9a03bf945887d301981a 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -70,12 +70,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( for (k = 0; k < 3; k++) curlvr[k] *= ri; /* Compute density of pi. */ - h_inv = 1.0 / hi; + h_inv = 1.0f / hi; xi = r * h_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.0f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; @@ -83,12 +83,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( for (k = 0; k < 3; k++) pi->density.rot_v[k] += mj * curlvr[k] * wi_dx; /* Compute density of pj. */ - h_inv = 1.0 / hj; + h_inv = 1.0f / hj; xj = r * h_inv; kernel_deval(xj, &wj, &wj_dx); pj->rho += mi * wj; - pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx); + pj->rho_dh -= mi * (3.0f * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; @@ -245,12 +245,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; for (k = 0; k < 3; k++) curlvr[k] *= ri; - h_inv = 1.0 / hi; + h_inv = 1.0f / hi; xi = r * h_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.0f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; @@ -437,8 +437,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.u_dt += mi * tc * (pj->u - pi->u); /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr / rhoj * wi_dr; - pj->h_dt -= mi * dvdr / rhoi * wj_dr; + pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; + pj->force.h_dt -= mi * dvdr / rhoi * wj_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); @@ -645,8 +645,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; pj[k]->force.u_dt += pju_dt.f[k]; - pi[k]->h_dt -= pih_dt.f[k]; - pj[k]->h_dt -= pjh_dt.f[k]; + pi[k]->force.h_dt -= pih_dt.f[k]; + pj[k]->force.h_dt -= pjh_dt.f[k]; pi[k]->force.v_sig = vi_sig.f[k]; pj[k]->force.v_sig = vj_sig.f[k]; for (j = 0; j < 3; j++) { @@ -746,7 +746,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.u_dt += mj * tc * (pi->u - pj->u); /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr / rhoj * wi_dr; + pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); @@ -943,7 +943,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Store the forces back on the particles. */ for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; - pi[k]->h_dt -= pih_dt.f[k]; + pi[k]->force.h_dt -= pih_dt.f[k]; pi[k]->force.v_sig = vi_sig.f[k]; for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; } diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index fa207a7fb9ded16d6618df0cb448d1dd49088372..7730133fa4488cf51dbd8424b7dce3232f038909 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -46,9 +46,6 @@ struct part { /* Particle cutoff radius. */ float h; - /* Change in smoothing length over time. */ - float h_dt; - /* Particle time of beginning of time-step. */ int ti_begin; @@ -104,6 +101,9 @@ struct part { /* Sound speed */ float c; + /* Change in smoothing length over time. */ + float h_dt; + } force; }; diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index a523fbb99f469004562fb7194d0c3825e9cd1857..e998fae3e6078d4fa31141f1b31a3dc037313ada 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -65,7 +65,7 @@ __attribute__((always_inline)) INLINE static void hydro_init_part( p->density.wcount_dh = 0.f; p->rho = 0.f; p->rho_dh = 0.f; - p->div_v = 0.f; + p->density.div_v = 0.f; p->density.rot_v[0] = 0.f; p->density.rot_v[1] = 0.f; p->density.rot_v[2] = 0.f; @@ -97,8 +97,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Finish the calculation by inserting the missing h-factors */ p->rho *= ih * ih2; p->rho_dh *= ih4; - p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); - p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); + p->density.wcount *= kernel_norm; + p->density.wcount_dh *= ih * kernel_gamma * kernel_norm; const float irho = 1.f / p->rho; @@ -111,7 +111,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->density.rot_v[2] *= ih4 * irho; /* Finish calculation of the velocity divergence */ - p->div_v *= ih4 * irho; + p->density.div_v *= ih4 * irho; } /** @@ -128,17 +128,37 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part *restrict p, struct xpart *restrict xp, int ti_current, double timeBase) { + const float fac_mu = 1.f; /* Will change with cosmological integration */ + /* Compute the norm of the curl */ - p->force.curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + - p->density.rot_v[1] * p->density.rot_v[1] + - p->density.rot_v[2] * p->density.rot_v[2]); + const float curl_v = sqrtf(p->density.rot_v[0] * p->density.rot_v[0] + + p->density.rot_v[1] * p->density.rot_v[1] + + p->density.rot_v[2] * p->density.rot_v[2]); + + /* Compute the norm of div v */ + const float abs_div_v = fabsf(p->density.div_v); /* Compute the pressure */ - const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - p->force.pressure = (p->entropy + p->entropy_dt * dt) * pow_gamma(p->rho); + const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; + const float pressure = + (p->entropy + p->entropy_dt * half_dt) * pow_gamma(p->rho); + + const float irho = 1.f / p->rho; + + /* Divide the pressure by the density and density gradient */ + const float P_over_rho2 = pressure * irho * irho * p->rho_dh; /* Compute the sound speed */ - p->force.soundspeed = sqrtf(hydro_gamma * p->force.pressure / p->rho); + const float soundspeed = sqrtf(hydro_gamma * pressure * irho); + + /* Compute the Balsara switch */ + const float balsara = + abs_div_v / (abs_div_v + curl_v + 0.0001f * soundspeed / fac_mu / p->h); + + /* Update variables. */ + p->force.P_over_rho2 = P_over_rho2; + p->force.soundspeed = soundspeed; + p->force.balsara = balsara; } /** @@ -157,10 +177,9 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->a_hydro[1] = 0.0f; p->a_hydro[2] = 0.0f; - p->h_dt = 0.0f; - /* Reset the time derivatives. */ p->entropy_dt = 0.0f; + p->force.h_dt = 0.0f; /* Reset maximal signal velocity */ p->force.v_sig = 0.0f; @@ -181,11 +200,20 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( /* Drift the pressure */ const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase; - p->force.pressure = + const float pressure = (p->entropy + p->entropy_dt * dt_entr) * pow_gamma(p->rho); + const float irho = 1.f / p->rho; + + /* Divide the pressure by the density and density gradient */ + const float P_over_rho2 = pressure * irho * irho * p->rho_dh; + /* Compute the new sound speed */ - p->force.soundspeed = sqrtf(hydro_gamma * p->force.pressure / p->rho); + const float soundspeed = sqrtf(hydro_gamma * pressure * irho); + + /* Update variables */ + p->force.P_over_rho2 = P_over_rho2; + p->force.soundspeed = soundspeed; } /** @@ -198,6 +226,8 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( __attribute__((always_inline)) INLINE static void hydro_end_force( struct part *restrict p) { + p->force.h_dt *= p->h * 0.333333333f; + p->entropy_dt *= hydro_gamma_minus_one * pow_minus_gamma_minus_one(p->rho); } diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index b67e79182ccaaee7c0421c57a91ec9fa2adae65c..5500fbff6617b3c99ed038acb34a85cb43a426cd 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -22,16 +22,15 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " - "h=%.3e, " - "wcount=%d, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, " - "S=%.3e, " - "dS/dt=%.3e, c=%.3e\n" - "divV=%.3e, curlV=%.3e, rotV=[%.3e,%.3e,%.3e] \n " + "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, " + "P=%.3e, P_over_rho2=%.3e, S=%.3e, dS/dt=%.3e, c=%.3e\n" + "divV=%.3e, rotV=[%.3e,%.3e,%.3e], balsara=%.3e \n " "v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], - p->h, (int)p->density.wcount, p->density.wcount_dh, p->mass, p->rho_dh, - p->rho, p->force.pressure, p->entropy, p->entropy_dt, p->force.soundspeed, - p->div_v, p->force.curl_v, p->density.rot_v[0], p->density.rot_v[1], - p->density.rot_v[2], p->force.v_sig, p->h_dt, p->ti_begin, p->ti_end); + p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->rho_dh, p->rho, + p->force.P_over_rho2 * p->rho * p->rho / p->rho_dh, p->force.P_over_rho2, + p->entropy, p->entropy_dt, p->force.soundspeed, p->density.div_v, + p->density.rot_v[0], p->density.rot_v[1], p->density.rot_v[2], + p->force.balsara, p->force.v_sig, p->force.h_dt, p->ti_begin, p->ti_end); } diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index daa6c37681131e197b7e1834d0792238a600eaaa..2033e29604bea81bfaabfe3ce1a27178fee09679 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -86,8 +86,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( dv[2] = pi->v[2] - pj->v[2]; const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - pi->div_v -= faci * dvdr; - pj->div_v -= facj * dvdr; + pi->density.div_v -= faci * dvdr; + pj->density.div_v -= facj * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; @@ -109,9 +109,125 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( __attribute__((always_inline)) INLINE static void runner_iact_vec_density( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { + +#ifdef WITH_VECTORIZATION + + vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; + vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; + vector mi, mj; + vector dx[3], dv[3]; + vector vi[3], vj[3]; + vector dvdr, div_vi, div_vj; + vector curlvr[3], curl_vi[3], curl_vj[3]; + int k, j; + +#if VEC_SIZE == 8 + /* Get the masses. */ + mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, + pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, + pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); + /* Get each velocity component. */ + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], + pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], + pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); + } + /* Get each component of particle separation. + * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/ + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], + Dx[15 + k], Dx[18 + k], Dx[21 + k]); +#elif VEC_SIZE == 4 + mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); + } + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); +#else + error("Unknown vector size.") +#endif + + /* Get the radius and inverse radius. */ + r2.v = vec_load(R2); + ri.v = vec_rsqrt(r2.v); + /*vec_rsqrt does not have the level of accuracy we need, so an extra term is + * added below.*/ + ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + r.v = r2.v * ri.v; + + hi.v = vec_load(Hi); + hi_inv.v = vec_rcp(hi.v); + hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); + xi.v = r.v * hi_inv.v; + + hj.v = vec_load(Hj); + hj_inv.v = vec_rcp(hj.v); + hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); + xj.v = r.v * hj_inv.v; + + /* Compute the kernel function. */ + kernel_deval_vec(&xi, &wi, &wi_dx); + kernel_deval_vec(&xj, &wj, &wj_dx); + + /* Compute dv. */ + dv[0].v = vi[0].v - vj[0].v; + dv[1].v = vi[1].v - vj[1].v; + dv[2].v = vi[2].v - vj[2].v; + + /* Compute dv dot r */ + dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); + dvdr.v = dvdr.v * ri.v; + + /* Compute dv cross r */ + curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; + curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; + curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; + for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; + + /* Compute density of pi. */ + rhoi.v = mj.v * wi.v; + rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); + wcounti.v = wi.v; + wcounti_dh.v = xi.v * wi_dx.v; + div_vi.v = mj.v * dvdr.v * wi_dx.v; + for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; + + /* Compute density of pj. */ + rhoj.v = mi.v * wj.v; + rhoj_dh.v = mi.v * (vec_set1(3.0f) * wj.v + xj.v * wj_dx.v); + wcountj.v = wj.v; + wcountj_dh.v = xj.v * wj_dx.v; + div_vj.v = mi.v * dvdr.v * wj_dx.v; + for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; + + /* Update particles. */ + for (k = 0; k < VEC_SIZE; k++) { + pi[k]->rho += rhoi.f[k]; + pi[k]->rho_dh -= rhoi_dh.f[k]; + pi[k]->density.wcount += wcounti.f[k]; + pi[k]->density.wcount_dh -= wcounti_dh.f[k]; + pi[k]->density.div_v -= div_vi.f[k]; + for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k]; + pj[k]->rho += rhoj.f[k]; + pj[k]->rho_dh -= rhoj_dh.f[k]; + pj[k]->density.wcount += wcountj.f[k]; + pj[k]->density.wcount_dh -= wcountj_dh.f[k]; + pj[k]->density.div_v -= div_vj.f[k]; + for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += curl_vj[j].f[k]; + } + +#else + error( - "A vectorised version of the Gadget2 density interaction function does " - "not exist yet!"); + "The Gadget2 serial version of runner_iact_density was called when the " + "vectorised version should have been used.") + +#endif } /** @@ -150,7 +266,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( dv[1] = pi->v[1] - pj->v[1]; dv[2] = pi->v[2] - pj->v[2]; const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - pi->div_v -= fac * dvdr; + pi->density.div_v -= fac * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; @@ -168,9 +284,101 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { + +#ifdef WITH_VECTORIZATION + + vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx; + vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; + vector mj; + vector dx[3], dv[3]; + vector vi[3], vj[3]; + vector dvdr; + vector curlvr[3], curl_vi[3]; + int k, j; + +#if VEC_SIZE == 8 + /* Get the masses. */ + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, + pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); + /* Get each velocity component. */ + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], + pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], + pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); + } + /* Get each component of particle separation. + * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/ + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], + Dx[15 + k], Dx[18 + k], Dx[21 + k]); +#elif VEC_SIZE == 4 + mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); + for (k = 0; k < 3; k++) { + vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); + vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); + } + for (k = 0; k < 3; k++) + dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); +#else + error("Unknown vector size.") +#endif + + /* Get the radius and inverse radius. */ + r2.v = vec_load(R2); + ri.v = vec_rsqrt(r2.v); + /*vec_rsqrt does not have the level of accuracy we need, so an extra term is + * added below.*/ + ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + r.v = r2.v * ri.v; + + hi.v = vec_load(Hi); + hi_inv.v = vec_rcp(hi.v); + hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); + xi.v = r.v * hi_inv.v; + + kernel_deval_vec(&xi, &wi, &wi_dx); + + /* Compute dv. */ + dv[0].v = vi[0].v - vj[0].v; + dv[1].v = vi[1].v - vj[1].v; + dv[2].v = vi[2].v - vj[2].v; + + /* Compute dv dot r */ + dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); + dvdr.v = dvdr.v * ri.v; + + /* Compute dv cross r */ + curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; + curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; + curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; + for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; + + /* Compute density of pi. */ + rhoi.v = mj.v * wi.v; + rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); + wcounti.v = wi.v; + wcounti_dh.v = xi.v * wi_dx.v; + div_vi.v = mj.v * dvdr.v * wi_dx.v; + for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; + + /* Update particles. */ + for (k = 0; k < VEC_SIZE; k++) { + pi[k]->rho += rhoi.f[k]; + pi[k]->rho_dh -= rhoi_dh.f[k]; + pi[k]->density.wcount += wcounti.f[k]; + pi[k]->density.wcount_dh -= wcounti_dh.f[k]; + pi[k]->density.div_v -= div_vi.f[k]; + for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k]; + } + +#else + error( - "A vectorised version of the Gadget2 non-symmetric density interaction " - "function does not exist yet!"); + "The Gadget2 serial version of runner_iact_nonsym_density was called " + "when the vectorised version should have been used.") + +#endif } /** @@ -191,8 +399,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( const float mj = pj->mass; const float rhoi = pi->rho; const float rhoj = pj->rho; - const float pressurei = pi->force.pressure; - const float pressurej = pj->force.pressure; /* Get the kernel for hi. */ const float hi_inv = 1.0f / hi; @@ -209,8 +415,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( const float wj_dr = hj2_inv * hj2_inv * wj_dx; /* Compute gradient terms */ - const float P_over_rho_i = pressurei / (rhoi * rhoi) * pi->rho_dh; - const float P_over_rho_j = pressurej / (rhoj * rhoj) * pj->rho_dh; + const float P_over_rho2_i = pi->force.P_over_rho2; + const float P_over_rho2_j = pj->force.P_over_rho2; /* Compute sound speeds */ const float ci = pi->force.soundspeed; @@ -222,12 +428,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( (pi->v[2] - pj->v[2]) * dx[2]; /* Balsara term */ - const float balsara_i = - fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); - const float balsara_j = - fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); @@ -243,7 +445,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; - const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; + const float sph_term = + (P_over_rho2_i * wi_dr + P_over_rho2_j * wj_dr) * r_inv; /* Eventually got the acceleration */ const float acc = visc_term + sph_term; @@ -258,8 +461,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->a_hydro[2] += mi * acc * dx[2]; /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; - pj->h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; + pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); @@ -299,8 +502,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( const float mj = pj->mass; const float rhoi = pi->rho; const float rhoj = pj->rho; - const float pressurei = pi->force.pressure; - const float pressurej = pj->force.pressure; /* Get the kernel for hi. */ const float hi_inv = 1.0f / hi; @@ -317,8 +518,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( const float wj_dr = hj2_inv * hj2_inv * wj_dx; /* Compute gradient terms */ - const float P_over_rho_i = pressurei / (rhoi * rhoi) * pi->rho_dh; - const float P_over_rho_j = pressurej / (rhoj * rhoj) * pj->rho_dh; + const float P_over_rho2_i = pi->force.P_over_rho2; + const float P_over_rho2_j = pj->force.P_over_rho2; /* Compute sound speeds */ const float ci = pi->force.soundspeed; @@ -330,12 +531,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( (pi->v[2] - pj->v[2]) * dx[2]; /* Balsara term */ - const float balsara_i = - fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); - const float balsara_j = - fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); + const float balsara_i = pi->force.balsara; + const float balsara_j = pj->force.balsara; /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); @@ -351,7 +548,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv; - const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; + const float sph_term = + (P_over_rho2_i * wi_dr + P_over_rho2_j * wj_dr) * r_inv; /* Eventually got the acceleration */ const float acc = visc_term + sph_term; @@ -362,7 +560,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->a_hydro[2] -= mj * acc * dx[2]; /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 863bdbefde4543c1f1f6b0415dc6c229f3d58012..0cd830c51a1836c0a56e7b4097990bf4d07a101f 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -43,8 +43,8 @@ struct part { /* Particle cutoff radius. */ float h; - /* Time derivative of the smoothing length */ - float h_dt; + /* Particle mass. */ + float mass; /* Particle time of beginning of time-step. */ int ti_begin; @@ -55,54 +55,53 @@ struct part { /* Particle density. */ float rho; - /* Derivative of the density with respect to this particle's smoothing length. - */ - float rho_dh; - /* Particle entropy. */ float entropy; /* Entropy time derivative */ float entropy_dt; - /* Particle mass. */ - float mass; + /* Derivative of the density with respect to smoothing length. */ + float rho_dh; union { struct { - /* Number of neighbours */ + /* Number of neighbours. */ float wcount; - /* Number of neighbours spatial derivative */ + /* Number of neighbours spatial derivative. */ float wcount_dh; - /* Velocity curl components */ + /* Particle velocity curl. */ float rot_v[3]; + /* Particle velocity divergence. */ + float div_v; + } density; struct { - /* Velocity curl norm*/ - float curl_v; - - /* Signal velocity */ - float v_sig; + /* Balsara switch */ + float balsara; - /* Particle pressure */ - float pressure; + /* Pressure over density squared (including drho/dh term) */ + float P_over_rho2; - /* Particle sound speed */ + /* Particle sound speed. */ float soundspeed; + /* Signal velocity. */ + float v_sig; + + /* Time derivative of the smoothing length */ + float h_dt; + } force; }; - /* Velocity divergence */ - float div_v; - /* Particle ID. */ long long id; diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 87daae2bc43f671aa9549058bb8168c0af91ad5c..ee7001700cc63effc8dbd6bab7b74fc6c2f5d426 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -106,8 +106,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Finish the calculation by inserting the missing h-factors */ p->rho *= ih * ih2; p->rho_dh *= ih4; - p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); - p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); + p->density.wcount *= kernel_norm; + p->density.wcount_dh *= ih * kernel_gamma * kernel_norm; const float irho = 1.f / p->rho; @@ -155,7 +155,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( /* Reset the time derivatives. */ p->u_dt = 0.0f; - p->h_dt = 0.0f; + p->force.h_dt = 0.0f; p->force.v_sig = 0.0f; } @@ -191,7 +191,10 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( * @param p The particle to act upon */ __attribute__((always_inline)) INLINE static void hydro_end_force( - struct part *restrict p) {} + struct part *restrict p) { + + p->force.h_dt *= p->h * 0.333333333f; +} /** * @brief Kick the additional variables diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h index 1e0e3f1f111149a3babeef431893dff2cbe6bb3c..7ab348d255173a33fc29e482eb6032c03c82d9eb 100644 --- a/src/hydro/Minimal/hydro_debug.h +++ b/src/hydro/Minimal/hydro_debug.h @@ -28,6 +28,6 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], xp->u_full, p->u, p->u_dt, p->force.v_sig, p->force.pressure, p->h, - p->h_dt, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->ti_begin, - p->ti_end); + p->force.h_dt, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, + p->ti_begin, p->ti_end); } diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 4dbb212ef39e7fe32e8ffdf51cc3643758210d09..311bdc90744059ea1272c2ca6cf419e5fbc1ba82 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -173,8 +173,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->u_dt += P_over_rho_j * mi * dvdr * r_inv * wj_dr; /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; - pj->h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; + pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); @@ -251,7 +251,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->u_dt += P_over_rho_i * mj * dvdr * r_inv * wi_dr; /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index 2580ef2a94eabda5a34de9d4e4b48227bc0e5146..f4feb2311ad2bfd8c7227f3b072c5971724a5815 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -54,8 +54,6 @@ struct part { float h; /*!< Particle smoothing length. */ - float h_dt; /*!< Time derivative of smoothing length */ - int ti_begin; /*!< Time at the beginning of time-step. */ int ti_end; /*!< Time at the end of time-step. */ @@ -91,7 +89,7 @@ struct part { * neighbours. * * Quantities in this sub-structure should only be accessed in the force - * loop over neighbours and the ghost and kick tasks. + * loop over neighbours and the ghost, drift and kick tasks. */ struct { @@ -99,6 +97,8 @@ struct part { float v_sig; /*!< Particle signal velocity */ + float h_dt; /*!< Time derivative of smoothing length */ + } force; }; diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index 8a1dac79cd7766b23989fd377c80cb33fc0cccde..7a73bf4f3d51550c5bf9096c0121d0faab819c0b 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -154,6 +154,9 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_root \ ((float)(kernel_coeffs[kernel_degree]) * kernel_constant * kernel_igamma3) +/* Kernel normalisation constant (volume term) */ +#define kernel_norm ((float)(4.0 * M_PI * kernel_gamma3 / 3.0)) + /** * @brief Computes the kernel function and its derivative. * diff --git a/src/runner.c b/src/runner.c index 4d0b595b8241a8a6887f7da9e657f892c309103f..e48d2ccaba69315f4295a0dbacd36e9c7d26a0c4 100644 --- a/src/runner.c +++ b/src/runner.c @@ -787,9 +787,6 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) { struct xpart *restrict xp = &xparts[k]; /* First, finish the force loop */ - p->h_dt *= p->h * 0.333333333f; - - /* And do the same of the extra variable */ hydro_end_force(p); if (p->gpart != NULL) gravity_end_force(p->gpart); @@ -908,9 +905,6 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { if (p->ti_end <= ti_current) { /* First, finish the force loop */ - p->h_dt *= p->h * 0.333333333f; - - /* And do the same of the extra variable */ hydro_end_force(p); if (p->gpart != NULL) gravity_end_force(p->gpart); diff --git a/src/timestep.h b/src/timestep.h index b4da4cdbaf5b082bd5c5890ca6305dd1dd952781..f32a699f6338362c3de247e5a5abc419b9547374 100644 --- a/src/timestep.h +++ b/src/timestep.h @@ -117,8 +117,8 @@ __attribute__((always_inline)) INLINE static int get_part_timestep( /* Limit change in h */ const float dt_h_change = - (p->h_dt != 0.0f) - ? fabsf(e->hydro_properties->log_max_h_change * p->h / p->h_dt) + (p->force.h_dt != 0.0f) + ? fabsf(e->hydro_properties->log_max_h_change * p->h / p->force.h_dt) : FLT_MAX; new_dt = fminf(new_dt, dt_h_change); diff --git a/tests/Makefile.am b/tests/Makefile.am index 64496e0e420b3db591e4f00b8fd697050d761514..ca5d6af21fe417da4d0b33dde592eeb0be544938 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -26,7 +26,7 @@ TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ - testSPHStep testPair test27cells testParser testKernel + testSPHStep testPair test27cells testParser testKernel testNonSymInt testSymInt # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -47,6 +47,10 @@ testParser_SOURCES = testParser.c testKernel_SOURCES = testKernel.c +testNonSymInt_SOURCES = testNonSymInt.c + +testSymInt_SOURCES = testSymInt.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \ diff --git a/tests/test27cells.c b/tests/test27cells.c index e58f0b199577443c3fd0fe3d60446e46507306d7..c21d4cc2337a19b6b2660b960fa953e5bc9df6ef 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -184,7 +184,8 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, main_cell->parts[pid].rho_dh, main_cell->parts[pid].density.wcount, main_cell->parts[pid].density.wcount_dh, #if defined(GADGET2_SPH) - main_cell->parts[pid].div_v, main_cell->parts[pid].density.rot_v[0], + main_cell->parts[pid].density.div_v, + main_cell->parts[pid].density.rot_v[0], main_cell->parts[pid].density.rot_v[1], main_cell->parts[pid].density.rot_v[2] #elif defined(DEFAULT_SPH) @@ -219,7 +220,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, #if defined(GADGET2_SPH) - cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.div_v, cj->parts[pjd].density.rot_v[0], cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] #elif defined(DEFAULT_SPH) cj->parts[pjd].density.div_v, cj->parts[pjd].density.rot_v[0], diff --git a/tests/testNonSymInt.c b/tests/testNonSymInt.c new file mode 100644 index 0000000000000000000000000000000000000000..da98c9efcedd595e133754e49d7396fbba574110 --- /dev/null +++ b/tests/testNonSymInt.c @@ -0,0 +1,251 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "swift.h" + +char *serial_filename = "test_nonsym_serial.dat"; +char *vec_filename = "test_nonsym_vec.dat"; + +/** + * @brief Constructs an array of particles in a valid state prior to + * a IACT_NONSYM and IACT_NONSYM_VEC call. + * + * @param count No. of particles to create + * @param offset The position of the particle offset from (0,0,0). + * @param spacing Particle spacing. + * @param h The smoothing length of the particles in units of the inter-particle + *separation. + * @param partId The running counter of IDs. + */ +struct part *make_particles(int count, double *offset, double spacing, double h, + long long *partId) { + + struct part *particles; + if (posix_memalign((void **)&particles, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + + /* Construct the particles */ + struct part *p; + for (size_t i = 0; i < VEC_SIZE + 1; ++i) { + p = &particles[i]; + p->x[0] = offset[0] + spacing * i; + p->x[1] = offset[1] + spacing * i; + p->x[2] = offset[2] + spacing * i; + + /* Randomise velocities. */ + p->v[0] = random_uniform(-0.05, 0.05); + p->v[1] = random_uniform(-0.05, 0.05); + p->v[2] = random_uniform(-0.05, 0.05); + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + } + return particles; +} + +/** + * @brief Dumps all particle information to a file + */ +void dump_indv_particle_fields(char *fileName, struct part *p) { + + FILE *file = fopen(fileName, "a"); + + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->rho, + p->rho_dh, p->density.wcount, p->density.wcount_dh, +#if defined(GADGET2_SPH) + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2] +#elif defined(DEFAULT_SPH) + p->density.div_v, p->density.curl_v[0], p->density.curl_v[1], + p->density.curl_v[2] +#else + 0., 0., 0., 0. +#endif + ); + fclose(file); +} + +/** + * @brief Creates a header for the output file + */ +void write_header(char *fileName) { + + FILE *file = fopen(fileName, "w"); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n"); + fclose(file); +} + +/* + * @brief Calls the serial and vectorised version of the non-symmetrical density + * interaction. + * + * @param parts Particle array to be interacted + * @param count No. of particles to be interacted + * + */ +void test_nonsym_density_interaction(struct part *parts, int count) { + + /* Use the first particle in the array as the one that gets updated. */ + struct part pi = parts[0]; + // const float hig2 = hi * hi * kernel_gamma2; + + FILE *file; + write_header(serial_filename); + write_header(vec_filename); + + /* Dump state of particles before serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi); + for (size_t i = 1; i < count; i++) + dump_indv_particle_fields(serial_filename, &parts[i]); + + /* Make copy of pi to be used in vectorised version. */ + struct part pi_vec = pi; + struct part pj_vec[VEC_SIZE]; + for (int i = 0; i < VEC_SIZE; i++) pj_vec[i] = parts[i + 1]; + + float r2q[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float hiq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float hjq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; + + /* Perform serial interaction */ + for (int i = 1; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi.x[k] - parts[i].x[k]; + r2 += dx[k] * dx[k]; + } + + runner_iact_nonsym_density(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); + } + + file = fopen(serial_filename, "a"); + fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi); + for (size_t i = 1; i < count; i++) + dump_indv_particle_fields(serial_filename, &parts[i]); + + /* Setup arrays for vector interaction. */ + for (int i = 0; i < VEC_SIZE; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; + r2 += dx[k] * dx[k]; + } + r2q[i] = r2; + dxq[3 * i + 0] = dx[0]; + dxq[3 * i + 1] = dx[1]; + dxq[3 * i + 2] = dx[2]; + hiq[i] = pi_vec.h; + hjq[i] = pj_vec[i].h; + piq[i] = &pi_vec; + pjq[i] = &pj_vec[i]; + } + + /* Dump state of particles before vector interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < VEC_SIZE; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + + /* Perform vector interaction. */ + runner_iact_nonsym_vec_density(r2q, dxq, hiq, hjq, piq, pjq); + + file = fopen(vec_filename, "a"); + fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < VEC_SIZE; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); +} + +/* And go... */ +int main(int argc, char *argv[]) { + double h = 1.2348, spacing = 0.5; + double offset[3] = {0.0, 0.0, 0.0}; + int count = VEC_SIZE + 1; + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "s:h:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &spacing); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || spacing < 0) { + printf( + "\nUsage: %s [OPTIONS...]\n" + "\nGenerates a particle array with equal particle separation." + "\nThese are then interacted using runner_iact_density and " + "runner_iact_vec_density." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-s spacing - Spacing between particles" + "\n-v type (0,1,2,3) - Velocity field: (zero, random, divergent, " + "rotating)", + argv[0]); + exit(1); + } + + /* Build the infrastructure */ + static long long partId = 0; + struct part *particles = make_particles(count, offset, spacing, h, &partId); + + /* Call the test non-sym density test. */ + test_nonsym_density_interaction(particles, count); + + return 0; +} diff --git a/tests/testPair.c b/tests/testPair.c index a1e9055d0bd7cb4a87605fee689ec02148b38888..385e9a9e709b95685d86559e34b3b4c5fb3d3884 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -136,7 +136,7 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { ci->parts[pid].v[2], ci->parts[pid].rho, ci->parts[pid].rho_dh, ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, #ifdef GADGET2_SPH - ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], + ci->parts[pid].density.div_v, ci->parts[pid].density.rot_v[0], ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2] #else 0., 0., 0., 0. @@ -155,7 +155,7 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, #ifdef GADGET2_SPH - cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.div_v, cj->parts[pjd].density.rot_v[0], cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] #else 0., 0., 0., 0. diff --git a/tests/testSymInt.c b/tests/testSymInt.c new file mode 100644 index 0000000000000000000000000000000000000000..73fa8ed9b9c2c347bad2ff8fd2eadf9a6cae1336 --- /dev/null +++ b/tests/testSymInt.c @@ -0,0 +1,248 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "swift.h" + +char *serial_filename = "test_sym_serial.dat"; +char *vec_filename = "test_sym_vec.dat"; + +/** + * @brief Constructs an array of particles in a valid state prior to + * a IACT_NONSYM and IACT_NONSYM_VEC call. + * + * @param count No. of particles to create + * @param offset The position of the particle offset from (0,0,0). + * @param spacing Particle spacing. + * @param h The smoothing length of the particles in units of the inter-particle + *separation. + * @param partId The running counter of IDs. + */ +struct part *make_particles(int count, double *offset, double spacing, double h, + long long *partId) { + + struct part *particles; + if (posix_memalign((void **)&particles, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + + /* Construct the particles */ + struct part *p; + for (size_t i = 0; i < VEC_SIZE + 1; ++i) { + p = &particles[i]; + p->x[0] = offset[0] + spacing * i; + p->x[1] = offset[1] + spacing * i; + p->x[2] = offset[2] + spacing * i; + p->v[0] = random_uniform(-0.05, 0.05); + p->v[1] = random_uniform(-0.05, 0.05); + p->v[2] = random_uniform(-0.05, 0.05); + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + } + return particles; +} + +/** + * @brief Dumps all particle information to a file + */ +void dump_indv_particle_fields(char *fileName, struct part *p) { + + FILE *file = fopen(fileName, "a"); + + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->rho, + p->rho_dh, p->density.wcount, p->density.wcount_dh, +#if defined(GADGET2_SPH) + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2] +#elif defined(DEFAULT_SPH) + p->density.div_v, p->density.curl_v[0], p->density.curl_v[1], + p->density.curl_v[2] +#else + 0., 0., 0., 0. +#endif + ); + fclose(file); +} + +/** + * @brief Creates a header for the output file + */ +void write_header(char *fileName) { + + FILE *file = fopen(fileName, "w"); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n"); + fclose(file); +} + +/* + * @brief Calls the serial and vectorised version of the non-symmetrical density + * interaction. + * + * @param parts Particle array to be interacted + * @param count No. of particles to be interacted + * + */ +void test_sym_density_interaction(struct part *parts, int count) { + + /* Use the first particle in the array as the one that gets updated. */ + struct part pi = parts[0]; + // const float hig2 = hi * hi * kernel_gamma2; + + FILE *file; + write_header(serial_filename); + write_header(vec_filename); + + /* Dump state of particles before serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi); + for (size_t i = 1; i < count; i++) + dump_indv_particle_fields(serial_filename, &parts[i]); + + /* Make copy of pi to be used in vectorised version. */ + struct part pi_vec = pi; + struct part pj_vec[VEC_SIZE]; + for (int i = 0; i < VEC_SIZE; i++) pj_vec[i] = parts[i + 1]; + + float r2q[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float hiq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float hjq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; + + /* Perform serial interaction */ + for (int i = 1; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi.x[k] - parts[i].x[k]; + r2 += dx[k] * dx[k]; + } + + runner_iact_density(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); + } + + file = fopen(serial_filename, "a"); + fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi); + for (size_t i = 1; i < count; i++) + dump_indv_particle_fields(serial_filename, &parts[i]); + + /* Setup arrays for vector interaction. */ + for (int i = 0; i < VEC_SIZE; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; + r2 += dx[k] * dx[k]; + } + r2q[i] = r2; + dxq[3 * i + 0] = dx[0]; + dxq[3 * i + 1] = dx[1]; + dxq[3 * i + 2] = dx[2]; + hiq[i] = pi_vec.h; + hjq[i] = pj_vec[i].h; + piq[i] = &pi_vec; + pjq[i] = &pj_vec[i]; + } + + /* Dump state of particles before vector interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < VEC_SIZE; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + + /* Perform vector interaction. */ + runner_iact_vec_density(r2q, dxq, hiq, hjq, piq, pjq); + + file = fopen(vec_filename, "a"); + fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < VEC_SIZE; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); +} + +/* And go... */ +int main(int argc, char *argv[]) { + double h = 1.2348, spacing = 0.5; + double offset[3] = {0.0, 0.0, 0.0}; + int count = VEC_SIZE + 1; + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "s:h:v:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &spacing); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || spacing < 0) { + printf( + "\nUsage: %s [OPTIONS...]\n" + "\nGenerates a particle array with equal particle separation." + "\nThese are then interacted using runner_iact_density and " + "runner_iact_vec_density." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-s spacing - Spacing between particles" + "\n-v type (0,1,2,3) - Velocity field: (zero, random, divergent, " + "rotating)", + argv[0]); + exit(1); + } + + /* Build the infrastructure */ + static long long partId = 0; + struct part *particles = make_particles(count, offset, spacing, h, &partId); + + /* Call the test non-sym density test. */ + test_sym_density_interaction(particles, count); + + return 0; +}