diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 7b1c8c3b91ce917af46efc28f6001a4d47747e2a..89b90be22b831066c8b9211bfe50208b4c4af67e 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -96,120 +96,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( for (k = 0; k < 3; k++) pj->density.rot_v[k] += mi * curlvr[k] * wj_dx; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; - vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr, div_vi, div_vj; - vector curlvr[3], rot_vi[3], rot_vj[3]; - int k, j; - -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); - xj.v = r.v * hj_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - kernel_deval_vec(&xj, &wj, &wj_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v); - wcountj.v = wj.v; - wcountj_dh.v = xj.v * wj_dx.v; - div_vj.v = mi.v * dvdr.v * wj_dx.v; - for (k = 0; k < 3; k++) rot_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k]; - pj[k]->rho += rhoj.f[k]; - pj[k]->rho_dh -= rhoj_dh.f[k]; - pj[k]->density.wcount += wcountj.f[k]; - pj[k]->density.wcount_dh -= wcountj_dh.f[k]; - pj[k]->density.div_v -= div_vj.f[k]; - for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += rot_vj[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Density loop (non-symmetric version) */ @@ -258,98 +144,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( for (k = 0; k < 3; k++) pi->density.rot_v[k] += mj * curlvr[k] * wi_dx; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx; - vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr; - vector curlvr[3], rot_vi[3]; - int k, j; - -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Force loop */ @@ -445,212 +239,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.v_sig = max(pj->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; - vector mi, mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3], pja[3]; - vector piu_dt, pju_dt; - vector pih_dt, pjh_dt; - vector ci, cj, v_sig; - vector omega_ij, Pi_ij, balsara; - vector pialpha, pjalpha, alpha_ij, v_sig_u, tc; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u, - pi[6]->u, pi[7]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, - pj[6]->u, pj[7]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, - pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, - pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hid_inv = pow_dimension_plus_one_vec(hi_inv); - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - pjh_dt.v = mi.v / pirho.v * dvdr.v * wj_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v; - - /* Compute viscosity parameter */ - alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v); - - /* Compute viscosity tensor */ - Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Thermal conductivity */ - v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) * - vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) / - (pirho.v + pjrho.v)); - tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v); - tc.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - pja[k].v = mi.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - pju_dt.v = - mi.v * dvdr.v * (pjPOrho2.v * wj_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* Add the thermal conductivity */ - piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - pju_dt.v += mi.v * tc.v * (pju.v - piu.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pj[k]->force.u_dt += pju_dt.f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]); - for (j = 0; j < 3; j++) { - pi[k]->a_hydro[j] -= pia[j].f[k]; - pj[k]->a_hydro[j] += pja[j].f[k]; - } - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Force loop (non-symmetric version) */ @@ -740,196 +328,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; - vector mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3]; - vector piu_dt; - vector pih_dt; - vector ci, cj, v_sig; - vector omega_ij, Pi_ij, balsara; - vector pialpha, pjalpha, alpha_ij, v_sig_u, tc; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u, - pi[6]->u, pi[7]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, - pj[6]->u, pj[7]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, - pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, - pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hid_inv = pow_dimension_plus_one_vec(hi_inv); - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v; - - /* Compute viscosity parameter */ - alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v); - - /* Compute viscosity tensor */ - Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Thermal conductivity */ - v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) * - vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) / - (pirho.v + pjrho.v)); - tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v); - tc.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* Add the thermal conductivity */ - piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - #endif /* SWIFT_DEFAULT_HYDRO_IACT_H */ diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index cce4fb03ee405fbdf2232184778a0f426e63014a..3851dd6e894eafff29d35942ab0f364b5919e029 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -105,128 +105,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.rot_v[2] += facj * curlvr[2]; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, ri, r2, ui, uj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; - vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr, div_vi, div_vj; - vector curlvr[3], curl_vi[3], curl_vj[3]; - int k, j; - -#if VEC_SIZE == 8 - /* Get the masses. */ - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - /* Get each velocity component. */ - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - /* Get each component of particle separation. - * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/ - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - ui.v = r.v * hi_inv.v; - - hj.v = vec_load(Hj); - hj_inv = vec_reciprocal(hj); - uj.v = r.v * hj_inv.v; - - /* Compute the kernel function. */ - kernel_deval_vec(&ui, &wi, &wi_dx); - kernel_deval_vec(&uj, &wj, &wj_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - /* Compute density of pi. */ - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - /* Compute density of pj. */ - rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v); - wcountj.v = wj.v; - wcountj_dh.v = (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v); - div_vj.v = mi.v * dvdr.v * wj_dx.v; - for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; - - /* Update particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->density.rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k]; - pj[k]->rho += rhoj.f[k]; - pj[k]->density.rho_dh -= rhoj_dh.f[k]; - pj[k]->density.wcount += wcountj.f[k]; - pj[k]->density.wcount_dh -= wcountj_dh.f[k]; - pj[k]->density.div_v -= div_vj.f[k]; - for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += curl_vj[j].f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_density was called when the " - "vectorised version should have been used."); - -#endif -} - /** * @brief Density loop (non-symmetric version) */ @@ -275,105 +153,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.rot_v[2] += fac * curlvr[2]; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, ri, r2, ui, hi, hi_inv, wi, wi_dx; - vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr; - vector curlvr[3], curl_vi[3]; - int k, j; - -#if VEC_SIZE == 8 - /* Get the masses. */ - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - /* Get each velocity component. */ - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - /* Get each component of particle separation. - * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/ - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - ui.v = r.v * hi_inv.v; - - kernel_deval_vec(&ui, &wi, &wi_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - /* Compute density of pi. */ - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - /* Update particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->density.rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_nonsym_density was called " - "when the vectorised version should have been used."); - -#endif -} - #ifdef WITH_VECTORIZATION /** @@ -703,199 +482,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->entropy_dt += mi * visc_term * dvdr; } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector piPOrho2, pjPOrho2, pirho, pjrho; - vector mi, mj; - vector f; - vector grad_hi, grad_hj; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3], pja[3]; - vector pih_dt, pjh_dt; - vector ci, cj, v_sig; - vector omega_ij, mu_ij, fac_mu, balsara; - vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; - int j, k; - - fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ - -/* Load stuff. */ -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f, - pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f, - pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */ - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv = vec_reciprocal(hj); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */ - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - // dvdr.v = dvdr.v * ri.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; - - /* Now construct the full viscosity term */ - rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); - visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * - mu_ij.v * balsara.v / rho_ij.v; - - /* Now, convolve with the kernel */ - visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; - sph_term.v = - (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) * - ri.v; - - /* Eventually get the acceleration */ - acc.v = visc_term.v + sph_term.v; - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * acc.v; - pia[k].v = mj.v * f.v; - pja[k].v = mi.v * f.v; - } - - /* Get the time derivative for h. */ - pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v; - pjh_dt.v = mi.v * dvdr.v * ri.v / pirho.v * wj_dr.v; - - /* Change in entropy */ - entropy_dt.v = visc_term.v * dvdr.v; - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - for (j = 0; j < 3; j++) { - pi[k]->a_hydro[j] -= pia[j].f[k]; - pj[k]->a_hydro[j] += pja[j].f[k]; - } - pi[k]->force.h_dt -= pih_dt.f[k]; - pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]); - pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k]; - pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_nonsym_force was called when " - "the vectorised version should have been used."); - -#endif -} - /** * @brief Force loop (non-symmetric version) */ @@ -985,188 +571,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->entropy_dt += mj * visc_term * dvdr; } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector piPOrho2, pjPOrho2, pirho, pjrho; - vector mj; - vector f; - vector grad_hi, grad_hj; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3]; - vector pih_dt; - vector ci, cj, v_sig; - vector omega_ij, mu_ij, fac_mu, balsara; - vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; - int j, k; - - fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ - -/* Load stuff. */ -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f, - pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f, - pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - hid_inv = pow_dimension_plus_one_vec(hi_inv); - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv = vec_reciprocal(hj); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - // dvdr.v = dvdr.v * ri.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; - - /* Now construct the full viscosity term */ - rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); - visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * - mu_ij.v * balsara.v / rho_ij.v; - - /* Now, convolve with the kernel */ - visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; - sph_term.v = - (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) * - ri.v; - - /* Eventually get the acceleration */ - acc.v = visc_term.v + sph_term.v; - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * acc.v; - pia[k].v = mj.v * f.v; - } - - /* Get the time derivative for h. */ - pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v; - - /* Change in entropy */ - entropy_dt.v = mj.v * visc_term.v * dvdr.v; - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - pi[k]->entropy_dt += entropy_dt.f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_nonsym_force was called when " - "the vectorised version should have been used."); - -#endif -} - #ifdef WITH_VECTORIZATION static const vector const_viscosity_alpha_fac = FILL_VEC(-0.25f * const_viscosity_alpha); diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h index 0c7c8251b7d1c105dfc0c4b1637724accadaa4ae..3dd66eecb408213fef5c03fae56c33b7d4de34cb 100644 --- a/src/hydro/Gizmo/hydro_iact.h +++ b/src/hydro/Gizmo/hydro_iact.h @@ -594,37 +594,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0); } - -//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...) - -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} - -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} - -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} - -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 621177a3363e651e12dd728ad96ddadce3812f0e..0d4db1123d23e59146a2b026700c7b44a9e99f0c 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -70,17 +70,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "A vectorised version of the Minimal density interaction function does " - "not exist yet!"); -} - /** * @brief Density loop (non-symmetric version) */ @@ -105,17 +94,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - error( - "A vectorised version of the Minimal non-symmetric density interaction " - "function does not exist yet!"); -} - /** * @brief Force loop */ @@ -216,17 +194,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.v_sig = max(pj->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "A vectorised version of the Minimal force interaction function does not " - "exist yet!"); -} - /** * @brief Force loop (non-symmetric version) */ @@ -318,15 +285,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "A vectorised version of the Minimal non-symmetric force interaction " - "function does not exist yet!"); -} - #endif /* SWIFT_MINIMAL_HYDRO_IACT_H */ diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h index 37a9f2b01af16fe598b414a9f67123849bee1442..171f7911a31b53fce74713e6d7e244bc79c59828 100644 --- a/src/hydro/PressureEntropy/hydro_iact.h +++ b/src/hydro/PressureEntropy/hydro_iact.h @@ -110,16 +110,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.rot_v[2] += facj * curlvr[2]; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - /** * @brief Density loop (non-symmetric version) */ @@ -173,16 +163,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.rot_v[2] += fac * curlvr[2]; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - /** * @brief Force loop */ @@ -284,16 +264,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->entropy_dt += mi * visc_term * r_inv * dvdr; } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - /** * @brief Force loop (non-symmetric version) */ @@ -388,14 +358,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->entropy_dt += mj * visc_term * r_inv * dvdr; } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */ diff --git a/src/hydro/Shadowswift/hydro_iact.h b/src/hydro/Shadowswift/hydro_iact.h index 63f7bdc6900c8fe9887879f3ff7e6c5eaff1b281..28c3ce5521c8f15a971d47dbb3e48cdb2a57e77d 100644 --- a/src/hydro/Shadowswift/hydro_iact.h +++ b/src/hydro/Shadowswift/hydro_iact.h @@ -346,20 +346,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0); } -//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...) - -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) {} - -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) {} - -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) {} - -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) {}