Skip to content
Snippets Groups Projects
Commit f0198314 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Removed all the old vectorized interaction routines.

parent 62555575
No related branches found
No related tags found
1 merge request!419Dopair2 fix
......@@ -96,120 +96,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
for (k = 0; k < 3; k++) pj->density.rot_v[k] += mi * curlvr[k] * wj_dx;
}
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx;
vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh;
vector mi, mj;
vector dx[3], dv[3];
vector vi[3], vj[3];
vector dvdr, div_vi, div_vj;
vector curlvr[3], rot_vi[3], rot_vj[3];
int k, j;
#if VEC_SIZE == 8
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
xi.v = r.v * hi_inv.v;
hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v);
hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f));
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
kernel_deval_vec(&xj, &wj, &wj_dx);
/* Compute dv. */
dv[0].v = vi[0].v - vj[0].v;
dv[1].v = vi[1].v - vj[1].v;
dv[2].v = vi[2].v - vj[2].v;
/* Compute dv dot r */
dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Compute dv cross r */
curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;
rhoi.v = mj.v * wi.v;
rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v);
wcounti.v = wi.v;
wcounti_dh.v = xi.v * wi_dx.v;
div_vi.v = mj.v * dvdr.v * wi_dx.v;
for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;
rhoj.v = mi.v * wj.v;
rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v);
wcountj.v = wj.v;
wcountj_dh.v = xj.v * wj_dx.v;
div_vj.v = mi.v * dvdr.v * wj_dx.v;
for (k = 0; k < 3; k++) rot_vj[k].v = mi.v * curlvr[k].v * wj_dx.v;
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->rho += rhoi.f[k];
pi[k]->rho_dh -= rhoi_dh.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v -= div_vi.f[k];
for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k];
pj[k]->rho += rhoj.f[k];
pj[k]->rho_dh -= rhoj_dh.f[k];
pj[k]->density.wcount += wcountj.f[k];
pj[k]->density.wcount_dh -= wcountj_dh.f[k];
pj[k]->density.div_v -= div_vj.f[k];
for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += rot_vj[j].f[k];
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
/**
* @brief Density loop (non-symmetric version)
*/
......@@ -258,98 +144,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
for (k = 0; k < 3; k++) pi->density.rot_v[k] += mj * curlvr[k] * wi_dx;
}
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx;
vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi;
vector mj;
vector dx[3], dv[3];
vector vi[3], vj[3];
vector dvdr;
vector curlvr[3], rot_vi[3];
int k, j;
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
/* Compute dv. */
dv[0].v = vi[0].v - vj[0].v;
dv[1].v = vi[1].v - vj[1].v;
dv[2].v = vi[2].v - vj[2].v;
/* Compute dv dot r */
dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Compute dv cross r */
curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;
rhoi.v = mj.v * wi.v;
rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v);
wcounti.v = wi.v;
wcounti_dh.v = xi.v * wi_dx.v;
div_vi.v = mj.v * dvdr.v * wi_dx.v;
for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->rho += rhoi.f[k];
pi[k]->rho_dh -= rhoi_dh.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v -= div_vi.f[k];
for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k];
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_nonsym_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
/**
* @brief Force loop
*/
......@@ -445,212 +239,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->force.v_sig = max(pj->force.v_sig, v_sig);
}
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju;
vector mi, mj;
vector f;
vector dx[3];
vector vi[3], vj[3];
vector pia[3], pja[3];
vector piu_dt, pju_dt;
vector pih_dt, pjh_dt;
vector ci, cj, v_sig;
vector omega_ij, Pi_ij, balsara;
vector pialpha, pjalpha, alpha_ij, v_sig_u, tc;
int j, k;
/* Load stuff. */
#if VEC_SIZE == 8
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u,
pi[6]->u, pi[7]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u,
pj[6]->u, pj[7]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha,
pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha,
pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha);
#elif VEC_SIZE == 4
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
pi[2]->force.balsara, pi[3]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
pj[2]->force.balsara, pj[3]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha);
#else
#error
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v);
hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Get the time derivative for h. */
pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v;
pjh_dt.v = mi.v / pirho.v * dvdr.v * wj_dr.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v;
/* Compute viscosity parameter */
alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v);
/* Compute viscosity tensor */
Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v *= (wi_dr.v + wj_dr.v);
/* Thermal conductivity */
v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) *
vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) /
(pirho.v + pjrho.v));
tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v);
tc.v *= (wi_dr.v + wj_dr.v);
/* Get the common factor out. */
w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) +
vec_set1(0.25f) * Pi_ij.v);
/* Use the force, Luke! */
for (k = 0; k < 3; k++) {
f.v = dx[k].v * w.v;
pia[k].v = mj.v * f.v;
pja[k].v = mi.v * f.v;
}
/* Get the time derivative for u. */
piu_dt.v =
mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v);
pju_dt.v =
mi.v * dvdr.v * (pjPOrho2.v * wj_dr.v + vec_set1(0.125f) * Pi_ij.v);
/* Add the thermal conductivity */
piu_dt.v += mj.v * tc.v * (piu.v - pju.v);
pju_dt.v += mi.v * tc.v * (pju.v - piu.v);
/* Store the forces back on the particles. */
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->force.u_dt += piu_dt.f[k];
pj[k]->force.u_dt += pju_dt.f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pj[k]->force.h_dt -= pjh_dt.f[k];
pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
for (j = 0; j < 3; j++) {
pi[k]->a_hydro[j] -= pia[j].f[k];
pj[k]->a_hydro[j] += pja[j].f[k];
}
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
/**
* @brief Force loop (non-symmetric version)
*/
......@@ -740,196 +328,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->force.v_sig = max(pi->force.v_sig, v_sig);
}
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju;
vector mj;
vector f;
vector dx[3];
vector vi[3], vj[3];
vector pia[3];
vector piu_dt;
vector pih_dt;
vector ci, cj, v_sig;
vector omega_ij, Pi_ij, balsara;
vector pialpha, pjalpha, alpha_ij, v_sig_u, tc;
int j, k;
/* Load stuff. */
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u,
pi[6]->u, pi[7]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u,
pj[6]->u, pj[7]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha,
pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha,
pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha);
#elif VEC_SIZE == 4
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
pi[2]->force.balsara, pi[3]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
pj[2]->force.balsara, pj[3]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha);
#else
#error
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v);
hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Get the time derivative for h. */
pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v;
/* Compute viscosity parameter */
alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v);
/* Compute viscosity tensor */
Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v *= (wi_dr.v + wj_dr.v);
/* Thermal conductivity */
v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) *
vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) /
(pirho.v + pjrho.v));
tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v);
tc.v *= (wi_dr.v + wj_dr.v);
/* Get the common factor out. */
w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) +
vec_set1(0.25f) * Pi_ij.v);
/* Use the force, Luke! */
for (k = 0; k < 3; k++) {
f.v = dx[k].v * w.v;
pia[k].v = mj.v * f.v;
}
/* Get the time derivative for u. */
piu_dt.v =
mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v);
/* Add the thermal conductivity */
piu_dt.v += mj.v * tc.v * (piu.v - pju.v);
/* Store the forces back on the particles. */
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->force.u_dt += piu_dt.f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_nonsym_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
#endif /* SWIFT_DEFAULT_HYDRO_IACT_H */
This diff is collapsed.
......@@ -594,37 +594,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0);
}
//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...)
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
......@@ -70,17 +70,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx);
}
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal density interaction function does "
"not exist yet!");
}
/**
* @brief Density loop (non-symmetric version)
*/
......@@ -105,17 +94,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx);
}
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error(
"A vectorised version of the Minimal non-symmetric density interaction "
"function does not exist yet!");
}
/**
* @brief Force loop
*/
......@@ -216,17 +194,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->force.v_sig = max(pj->force.v_sig, v_sig);
}
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal force interaction function does not "
"exist yet!");
}
/**
* @brief Force loop (non-symmetric version)
*/
......@@ -318,15 +285,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->force.v_sig = max(pi->force.v_sig, v_sig);
}
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal non-symmetric force interaction "
"function does not exist yet!");
}
#endif /* SWIFT_MINIMAL_HYDRO_IACT_H */
......@@ -110,16 +110,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.rot_v[2] += facj * curlvr[2];
}
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
/**
* @brief Density loop (non-symmetric version)
*/
......@@ -173,16 +163,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.rot_v[2] += fac * curlvr[2];
}
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
/**
* @brief Force loop
*/
......@@ -284,16 +264,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->entropy_dt += mi * visc_term * r_inv * dvdr;
}
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
/**
* @brief Force loop (non-symmetric version)
*/
......@@ -388,14 +358,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->entropy_dt += mj * visc_term * r_inv * dvdr;
}
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
#endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */
......@@ -346,20 +346,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0);
}
//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...)
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {}
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {}
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {}
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment