Skip to content
Snippets Groups Projects
Commit f0198314 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Removed all the old vectorized interaction routines.

parent 62555575
No related branches found
No related tags found
1 merge request!419Dopair2 fix
...@@ -96,120 +96,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -96,120 +96,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
for (k = 0; k < 3; k++) pj->density.rot_v[k] += mi * curlvr[k] * wj_dx; for (k = 0; k < 3; k++) pj->density.rot_v[k] += mi * curlvr[k] * wj_dx;
} }
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx;
vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh;
vector mi, mj;
vector dx[3], dv[3];
vector vi[3], vj[3];
vector dvdr, div_vi, div_vj;
vector curlvr[3], rot_vi[3], rot_vj[3];
int k, j;
#if VEC_SIZE == 8
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
xi.v = r.v * hi_inv.v;
hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v);
hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f));
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
kernel_deval_vec(&xj, &wj, &wj_dx);
/* Compute dv. */
dv[0].v = vi[0].v - vj[0].v;
dv[1].v = vi[1].v - vj[1].v;
dv[2].v = vi[2].v - vj[2].v;
/* Compute dv dot r */
dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Compute dv cross r */
curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;
rhoi.v = mj.v * wi.v;
rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v);
wcounti.v = wi.v;
wcounti_dh.v = xi.v * wi_dx.v;
div_vi.v = mj.v * dvdr.v * wi_dx.v;
for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;
rhoj.v = mi.v * wj.v;
rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v);
wcountj.v = wj.v;
wcountj_dh.v = xj.v * wj_dx.v;
div_vj.v = mi.v * dvdr.v * wj_dx.v;
for (k = 0; k < 3; k++) rot_vj[k].v = mi.v * curlvr[k].v * wj_dx.v;
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->rho += rhoi.f[k];
pi[k]->rho_dh -= rhoi_dh.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v -= div_vi.f[k];
for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k];
pj[k]->rho += rhoj.f[k];
pj[k]->rho_dh -= rhoj_dh.f[k];
pj[k]->density.wcount += wcountj.f[k];
pj[k]->density.wcount_dh -= wcountj_dh.f[k];
pj[k]->density.div_v -= div_vj.f[k];
for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += rot_vj[j].f[k];
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
/** /**
* @brief Density loop (non-symmetric version) * @brief Density loop (non-symmetric version)
*/ */
...@@ -258,98 +144,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -258,98 +144,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
for (k = 0; k < 3; k++) pi->density.rot_v[k] += mj * curlvr[k] * wi_dx; for (k = 0; k < 3; k++) pi->density.rot_v[k] += mj * curlvr[k] * wi_dx;
} }
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx;
vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi;
vector mj;
vector dx[3], dv[3];
vector vi[3], vj[3];
vector dvdr;
vector curlvr[3], rot_vi[3];
int k, j;
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
/* Compute dv. */
dv[0].v = vi[0].v - vj[0].v;
dv[1].v = vi[1].v - vj[1].v;
dv[2].v = vi[2].v - vj[2].v;
/* Compute dv dot r */
dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Compute dv cross r */
curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;
rhoi.v = mj.v * wi.v;
rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v);
wcounti.v = wi.v;
wcounti_dh.v = xi.v * wi_dx.v;
div_vi.v = mj.v * dvdr.v * wi_dx.v;
for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->rho += rhoi.f[k];
pi[k]->rho_dh -= rhoi_dh.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v -= div_vi.f[k];
for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k];
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_nonsym_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
/** /**
* @brief Force loop * @brief Force loop
*/ */
...@@ -445,212 +239,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( ...@@ -445,212 +239,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->force.v_sig = max(pj->force.v_sig, v_sig); pj->force.v_sig = max(pj->force.v_sig, v_sig);
} }
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju;
vector mi, mj;
vector f;
vector dx[3];
vector vi[3], vj[3];
vector pia[3], pja[3];
vector piu_dt, pju_dt;
vector pih_dt, pjh_dt;
vector ci, cj, v_sig;
vector omega_ij, Pi_ij, balsara;
vector pialpha, pjalpha, alpha_ij, v_sig_u, tc;
int j, k;
/* Load stuff. */
#if VEC_SIZE == 8
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u,
pi[6]->u, pi[7]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u,
pj[6]->u, pj[7]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha,
pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha,
pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha);
#elif VEC_SIZE == 4
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
pi[2]->force.balsara, pi[3]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
pj[2]->force.balsara, pj[3]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha);
#else
#error
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v);
hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Get the time derivative for h. */
pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v;
pjh_dt.v = mi.v / pirho.v * dvdr.v * wj_dr.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v;
/* Compute viscosity parameter */
alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v);
/* Compute viscosity tensor */
Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v *= (wi_dr.v + wj_dr.v);
/* Thermal conductivity */
v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) *
vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) /
(pirho.v + pjrho.v));
tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v);
tc.v *= (wi_dr.v + wj_dr.v);
/* Get the common factor out. */
w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) +
vec_set1(0.25f) * Pi_ij.v);
/* Use the force, Luke! */
for (k = 0; k < 3; k++) {
f.v = dx[k].v * w.v;
pia[k].v = mj.v * f.v;
pja[k].v = mi.v * f.v;
}
/* Get the time derivative for u. */
piu_dt.v =
mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v);
pju_dt.v =
mi.v * dvdr.v * (pjPOrho2.v * wj_dr.v + vec_set1(0.125f) * Pi_ij.v);
/* Add the thermal conductivity */
piu_dt.v += mj.v * tc.v * (piu.v - pju.v);
pju_dt.v += mi.v * tc.v * (pju.v - piu.v);
/* Store the forces back on the particles. */
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->force.u_dt += piu_dt.f[k];
pj[k]->force.u_dt += pju_dt.f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pj[k]->force.h_dt -= pjh_dt.f[k];
pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
for (j = 0; j < 3; j++) {
pi[k]->a_hydro[j] -= pia[j].f[k];
pj[k]->a_hydro[j] += pja[j].f[k];
}
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
/** /**
* @brief Force loop (non-symmetric version) * @brief Force loop (non-symmetric version)
*/ */
...@@ -740,196 +328,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -740,196 +328,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->force.v_sig = max(pi->force.v_sig, v_sig); pi->force.v_sig = max(pi->force.v_sig, v_sig);
} }
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju;
vector mj;
vector f;
vector dx[3];
vector vi[3], vj[3];
vector pia[3];
vector piu_dt;
vector pih_dt;
vector ci, cj, v_sig;
vector omega_ij, Pi_ij, balsara;
vector pialpha, pjalpha, alpha_ij, v_sig_u, tc;
int j, k;
/* Load stuff. */
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u,
pi[6]->u, pi[7]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u,
pj[6]->u, pj[7]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha,
pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha,
pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha);
#elif VEC_SIZE == 4
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u);
pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
pi[2]->force.balsara, pi[3]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
pj[2]->force.balsara, pj[3]->force.balsara);
pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha);
pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha);
#else
#error
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri.v = vec_rsqrt(r2.v);
ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load(Hi);
hi_inv.v = vec_rcp(hi.v);
hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load(Hj);
hj_inv.v = vec_rcp(hj.v);
hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Get the time derivative for h. */
pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v;
/* Compute viscosity parameter */
alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v);
/* Compute viscosity tensor */
Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v *= (wi_dr.v + wj_dr.v);
/* Thermal conductivity */
v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) *
vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) /
(pirho.v + pjrho.v));
tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v);
tc.v *= (wi_dr.v + wj_dr.v);
/* Get the common factor out. */
w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) +
vec_set1(0.25f) * Pi_ij.v);
/* Use the force, Luke! */
for (k = 0; k < 3; k++) {
f.v = dx[k].v * w.v;
pia[k].v = mj.v * f.v;
}
/* Get the time derivative for u. */
piu_dt.v =
mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v);
/* Add the thermal conductivity */
piu_dt.v += mj.v * tc.v * (piu.v - pju.v);
/* Store the forces back on the particles. */
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->force.u_dt += piu_dt.f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
}
#else
for (int k = 0; k < VEC_SIZE; k++)
runner_iact_nonsym_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]);
#endif
}
#endif /* SWIFT_DEFAULT_HYDRO_IACT_H */ #endif /* SWIFT_DEFAULT_HYDRO_IACT_H */
...@@ -105,128 +105,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -105,128 +105,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.rot_v[2] += facj * curlvr[2]; pj->density.rot_v[2] += facj * curlvr[2];
} }
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_OLD_VECTORIZATION
vector r, ri, r2, ui, uj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx;
vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh;
vector mi, mj;
vector dx[3], dv[3];
vector vi[3], vj[3];
vector dvdr, div_vi, div_vj;
vector curlvr[3], curl_vi[3], curl_vj[3];
int k, j;
#if VEC_SIZE == 8
/* Get the masses. */
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
/* Get each velocity component. */
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
/* Get each component of particle separation.
* (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
#else
error("Unknown vector size.");
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri = vec_reciprocal_sqrt(r2);
r.v = r2.v * ri.v;
hi.v = vec_load(Hi);
hi_inv = vec_reciprocal(hi);
ui.v = r.v * hi_inv.v;
hj.v = vec_load(Hj);
hj_inv = vec_reciprocal(hj);
uj.v = r.v * hj_inv.v;
/* Compute the kernel function. */
kernel_deval_vec(&ui, &wi, &wi_dx);
kernel_deval_vec(&uj, &wj, &wj_dx);
/* Compute dv. */
dv[0].v = vi[0].v - vj[0].v;
dv[1].v = vi[1].v - vj[1].v;
dv[2].v = vi[2].v - vj[2].v;
/* Compute dv dot r */
dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Compute dv cross r */
curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;
/* Compute density of pi. */
rhoi.v = mj.v * wi.v;
rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v);
wcounti.v = wi.v;
wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v);
div_vi.v = mj.v * dvdr.v * wi_dx.v;
for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;
/* Compute density of pj. */
rhoj.v = mi.v * wj.v;
rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v);
wcountj.v = wj.v;
wcountj_dh.v = (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v);
div_vj.v = mi.v * dvdr.v * wj_dx.v;
for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v;
/* Update particles. */
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->rho += rhoi.f[k];
pi[k]->density.rho_dh -= rhoi_dh.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v -= div_vi.f[k];
for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k];
pj[k]->rho += rhoj.f[k];
pj[k]->density.rho_dh -= rhoj_dh.f[k];
pj[k]->density.wcount += wcountj.f[k];
pj[k]->density.wcount_dh -= wcountj_dh.f[k];
pj[k]->density.div_v -= div_vj.f[k];
for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += curl_vj[j].f[k];
}
#else
error(
"The Gadget2 serial version of runner_iact_density was called when the "
"vectorised version should have been used.");
#endif
}
/** /**
* @brief Density loop (non-symmetric version) * @brief Density loop (non-symmetric version)
*/ */
...@@ -275,105 +153,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -275,105 +153,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.rot_v[2] += fac * curlvr[2]; pi->density.rot_v[2] += fac * curlvr[2];
} }
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
#ifdef WITH_OLD_VECTORIZATION
vector r, ri, r2, ui, hi, hi_inv, wi, wi_dx;
vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi;
vector mj;
vector dx[3], dv[3];
vector vi[3], vj[3];
vector dvdr;
vector curlvr[3], curl_vi[3];
int k, j;
#if VEC_SIZE == 8
/* Get the masses. */
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
/* Get each velocity component. */
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
/* Get each component of particle separation.
* (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
#else
error("Unknown vector size.");
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri = vec_reciprocal_sqrt(r2);
r.v = r2.v * ri.v;
hi.v = vec_load(Hi);
hi_inv = vec_reciprocal(hi);
ui.v = r.v * hi_inv.v;
kernel_deval_vec(&ui, &wi, &wi_dx);
/* Compute dv. */
dv[0].v = vi[0].v - vj[0].v;
dv[1].v = vi[1].v - vj[1].v;
dv[2].v = vi[2].v - vj[2].v;
/* Compute dv dot r */
dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
dvdr.v = dvdr.v * ri.v;
/* Compute dv cross r */
curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;
/* Compute density of pi. */
rhoi.v = mj.v * wi.v;
rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v);
wcounti.v = wi.v;
wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v);
div_vi.v = mj.v * dvdr.v * wi_dx.v;
for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;
/* Update particles. */
for (k = 0; k < VEC_SIZE; k++) {
pi[k]->rho += rhoi.f[k];
pi[k]->density.rho_dh -= rhoi_dh.f[k];
pi[k]->density.wcount += wcounti.f[k];
pi[k]->density.wcount_dh -= wcounti_dh.f[k];
pi[k]->density.div_v -= div_vi.f[k];
for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k];
}
#else
error(
"The Gadget2 serial version of runner_iact_nonsym_density was called "
"when the vectorised version should have been used.");
#endif
}
#ifdef WITH_VECTORIZATION #ifdef WITH_VECTORIZATION
/** /**
...@@ -703,199 +482,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( ...@@ -703,199 +482,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->entropy_dt += mi * visc_term * dvdr; pj->entropy_dt += mi * visc_term * dvdr;
} }
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_OLD_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector piPOrho2, pjPOrho2, pirho, pjrho;
vector mi, mj;
vector f;
vector grad_hi, grad_hj;
vector dx[3];
vector vi[3], vj[3];
vector pia[3], pja[3];
vector pih_dt, pjh_dt;
vector ci, cj, v_sig;
vector omega_ij, mu_ij, fac_mu, balsara;
vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
int j, k;
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
#if VEC_SIZE == 8
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
grad_hi.v =
vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f,
pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f);
grad_hj.v =
vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f,
pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
#elif VEC_SIZE == 4
mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
grad_hi.v =
vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f);
grad_hj.v =
vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
pi[2]->force.balsara, pi[3]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
pj[2]->force.balsara, pj[3]->force.balsara);
#else
error("Unknown vector size.");
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri = vec_reciprocal_sqrt(r2);
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load(Hi);
hi_inv = vec_reciprocal(hi);
hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load(Hj);
hj_inv = vec_reciprocal(hj);
hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
// dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v;
/* Now construct the full viscosity term */
rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
mu_ij.v * balsara.v / rho_ij.v;
/* Now, convolve with the kernel */
visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
sph_term.v =
(grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) *
ri.v;
/* Eventually get the acceleration */
acc.v = visc_term.v + sph_term.v;
/* Use the force, Luke! */
for (k = 0; k < 3; k++) {
f.v = dx[k].v * acc.v;
pia[k].v = mj.v * f.v;
pja[k].v = mi.v * f.v;
}
/* Get the time derivative for h. */
pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v;
pjh_dt.v = mi.v * dvdr.v * ri.v / pirho.v * wj_dr.v;
/* Change in entropy */
entropy_dt.v = visc_term.v * dvdr.v;
/* Store the forces back on the particles. */
for (k = 0; k < VEC_SIZE; k++) {
for (j = 0; j < 3; j++) {
pi[k]->a_hydro[j] -= pia[j].f[k];
pj[k]->a_hydro[j] += pja[j].f[k];
}
pi[k]->force.h_dt -= pih_dt.f[k];
pj[k]->force.h_dt -= pjh_dt.f[k];
pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k];
pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k];
}
#else
error(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used.");
#endif
}
/** /**
* @brief Force loop (non-symmetric version) * @brief Force loop (non-symmetric version)
*/ */
...@@ -985,188 +571,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -985,188 +571,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->entropy_dt += mj * visc_term * dvdr; pi->entropy_dt += mj * visc_term * dvdr;
} }
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
#ifdef WITH_OLD_VECTORIZATION
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector piPOrho2, pjPOrho2, pirho, pjrho;
vector mj;
vector f;
vector grad_hi, grad_hj;
vector dx[3];
vector vi[3], vj[3];
vector pia[3];
vector pih_dt;
vector ci, cj, v_sig;
vector omega_ij, mu_ij, fac_mu, balsara;
vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
int j, k;
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
#if VEC_SIZE == 8
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
grad_hi.v =
vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f,
pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f);
grad_hj.v =
vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f,
pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
Dx[15 + k], Dx[18 + k], Dx[21 + k]);
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
#elif VEC_SIZE == 4
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
grad_hi.v =
vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f);
grad_hj.v =
vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
}
for (k = 0; k < 3; k++)
dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
pi[2]->force.balsara, pi[3]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
pj[2]->force.balsara, pj[3]->force.balsara);
#else
error("Unknown vector size.");
#endif
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri = vec_reciprocal_sqrt(r2);
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load(Hi);
hi_inv = vec_reciprocal(hi);
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load(Hj);
hj_inv = vec_reciprocal(hj);
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
((vi[2].v - vj[2].v) * dx[2].v);
// dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v;
/* Now construct the full viscosity term */
rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
mu_ij.v * balsara.v / rho_ij.v;
/* Now, convolve with the kernel */
visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
sph_term.v =
(grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) *
ri.v;
/* Eventually get the acceleration */
acc.v = visc_term.v + sph_term.v;
/* Use the force, Luke! */
for (k = 0; k < 3; k++) {
f.v = dx[k].v * acc.v;
pia[k].v = mj.v * f.v;
}
/* Get the time derivative for h. */
pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v;
/* Change in entropy */
entropy_dt.v = mj.v * visc_term.v * dvdr.v;
/* Store the forces back on the particles. */
for (k = 0; k < VEC_SIZE; k++) {
for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
pi[k]->force.h_dt -= pih_dt.f[k];
pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
pi[k]->entropy_dt += entropy_dt.f[k];
}
#else
error(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used.");
#endif
}
#ifdef WITH_VECTORIZATION #ifdef WITH_VECTORIZATION
static const vector const_viscosity_alpha_fac = static const vector const_viscosity_alpha_fac =
FILL_VEC(-0.25f * const_viscosity_alpha); FILL_VEC(-0.25f * const_viscosity_alpha);
......
...@@ -594,37 +594,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -594,37 +594,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0); runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0);
} }
//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...)
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"Vectorised versions of the Gizmo interaction functions do not exist "
"yet!");
}
...@@ -70,17 +70,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -70,17 +70,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx);
} }
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal density interaction function does "
"not exist yet!");
}
/** /**
* @brief Density loop (non-symmetric version) * @brief Density loop (non-symmetric version)
*/ */
...@@ -105,17 +94,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -105,17 +94,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx);
} }
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error(
"A vectorised version of the Minimal non-symmetric density interaction "
"function does not exist yet!");
}
/** /**
* @brief Force loop * @brief Force loop
*/ */
...@@ -216,17 +194,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( ...@@ -216,17 +194,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->force.v_sig = max(pj->force.v_sig, v_sig); pj->force.v_sig = max(pj->force.v_sig, v_sig);
} }
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal force interaction function does not "
"exist yet!");
}
/** /**
* @brief Force loop (non-symmetric version) * @brief Force loop (non-symmetric version)
*/ */
...@@ -318,15 +285,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -318,15 +285,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->force.v_sig = max(pi->force.v_sig, v_sig); pi->force.v_sig = max(pi->force.v_sig, v_sig);
} }
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal non-symmetric force interaction "
"function does not exist yet!");
}
#endif /* SWIFT_MINIMAL_HYDRO_IACT_H */ #endif /* SWIFT_MINIMAL_HYDRO_IACT_H */
...@@ -110,16 +110,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -110,16 +110,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.rot_v[2] += facj * curlvr[2]; pj->density.rot_v[2] += facj * curlvr[2];
} }
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
/** /**
* @brief Density loop (non-symmetric version) * @brief Density loop (non-symmetric version)
*/ */
...@@ -173,16 +163,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -173,16 +163,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.rot_v[2] += fac * curlvr[2]; pi->density.rot_v[2] += fac * curlvr[2];
} }
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
/** /**
* @brief Force loop * @brief Force loop
*/ */
...@@ -284,16 +264,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( ...@@ -284,16 +264,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->entropy_dt += mi * visc_term * r_inv * dvdr; pj->entropy_dt += mi * visc_term * r_inv * dvdr;
} }
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
/** /**
* @brief Force loop (non-symmetric version) * @brief Force loop (non-symmetric version)
*/ */
...@@ -388,14 +358,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -388,14 +358,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->entropy_dt += mj * visc_term * r_inv * dvdr; pi->entropy_dt += mj * visc_term * r_inv * dvdr;
} }
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error("Vectorized version of Pressure-Entropy SPH routine not existant yet.");
}
#endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */ #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */
...@@ -346,20 +346,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -346,20 +346,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0); runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0);
} }
//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...)
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {}
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {}
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {}
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment