Commit 470cfbcf authored by James Willis's avatar James Willis
Browse files

Created vectorised interaction functions for the force.

parent e156b185
......@@ -1202,4 +1202,267 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
#endif
}
#ifdef WITH_VECTORIZATION
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_1_vec_force(
vector *r2, vector *dx, vector *dy, vector *dz, vector hi_inv, vector hj_inv, struct part **pi,
struct part **pj, vector *a_hydro_xSum, vector *a_hydro_ySum, vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum, vector *entropy_dtSum, vector mask) {
#ifdef WITH_VECTORIZATION
vector r, ri;
vector xi, xj;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector piPOrho2, pjPOrho2, pirho, pjrho;
vector mj;
vector grad_hi, grad_hj;
vector vi[3], vj[3];
vector pia[3];
vector pih_dt;
vector ci, cj, v_sig;
vector omega_ij, mu_ij, fac_mu, balsara;
vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
int k;
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
grad_hi.v =
vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f,
pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f);
grad_hj.v =
vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f,
pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f);
pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
pi[5]->rho, pi[6]->rho, pi[7]->rho);
pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
pj[5]->rho, pj[6]->rho, pj[7]->rho);
ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
pi[2]->force.soundspeed, pi[3]->force.soundspeed,
pi[4]->force.soundspeed, pi[5]->force.soundspeed,
pi[6]->force.soundspeed, pi[7]->force.soundspeed);
cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
pj[2]->force.soundspeed, pj[3]->force.soundspeed,
pj[4]->force.soundspeed, pj[5]->force.soundspeed,
pj[6]->force.soundspeed, pj[7]->force.soundspeed);
for (k = 0; k < 3; k++) {
vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
}
balsara.v =
vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
pi[6]->force.balsara, pi[7]->force.balsara) +
vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
pj[6]->force.balsara, pj[7]->force.balsara);
/* Get the radius and inverse radius. */
ri = vec_reciprocal_sqrt(*r2);
r.v = r2->v * ri.v;
/* Get the kernel for hi. */
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
//dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
// ((vi[2].v - vj[2].v) * dx[2].v);
dvdr.v = ((vi[0].v - vj[0].v) * dx->v) + ((vi[1].v - vj[1].v) * dy->v) +
((vi[2].v - vj[2].v) * dz->v);
// dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v;
/* Now construct the full viscosity term */
rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
mu_ij.v * balsara.v / rho_ij.v;
/* Now, convolve with the kernel */
visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
sph_term.v =
(grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) *
ri.v;
/* Eventually get the acceleration */
acc.v = visc_term.v + sph_term.v;
/* Use the force, Luke! */
//for (k = 0; k < 3; k++) {
// f.v = dx[k].v * acc.v;
// pia[k].v = mj.v * f.v;
//}
pia[0].v = mj.v * dx->v * acc.v;
pia[1].v = mj.v * dy->v * acc.v;
pia[2].v = mj.v * dz->v * acc.v;
/* Get the time derivative for h. */
pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v;
/* Change in entropy */
entropy_dt.v = mj.v * visc_term.v * dvdr.v;
/* Store the forces back on the particles. */
//for (k = 0; k < VEC_SIZE; k++) {
// for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
// pi[k]->force.h_dt -= pih_dt.f[k];
// pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
// pi[k]->entropy_dt += entropy_dt.f[k];
//}
a_hydro_xSum->v -= vec_and(pia[0].v, mask.v);
a_hydro_ySum->v -= vec_and(pia[1].v, mask.v);
a_hydro_zSum->v -= vec_and(pia[2].v, mask.v);
h_dtSum->v -= vec_and(pih_dt.v, mask.v);
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and(v_sig.v, mask.v));
entropy_dtSum->v += vec_and(entropy_dt.v,mask.v);
#else
error(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used.");
#endif
}
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_1_vec_force_2(
vector *r2, vector *dx, vector *dy, vector *dz, vector *vix, vector *viy, vector *viz, vector *pirho, vector *grad_hi, vector *piPOrho2, vector *balsara_i, vector *ci, vector *vjx, vector *vjy, vector *vjz, vector *pjrho, vector *grad_hj, vector *pjPOrho2, vector *balsara_j, vector *cj, vector *mj, vector hi_inv, vector hj_inv,
vector *a_hydro_xSum, vector *a_hydro_ySum, vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum, vector *entropy_dtSum, vector mask) {
#ifdef WITH_VECTORIZATION
vector r, ri;
vector xi, xj;
vector hid_inv, hjd_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector pia[3];
vector pih_dt;
vector v_sig;
vector omega_ij, mu_ij, fac_mu, balsara;
vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
balsara.v = balsara_i->v + balsara_j->v;
/* Get the radius and inverse radius. */
ri = vec_reciprocal_sqrt(*r2);
r.v = r2->v * ri.v;
/* Get the kernel for hi. */
hid_inv = pow_dimension_plus_one_vec(hi_inv);
xi.v = r.v * hi_inv.v;
kernel_deval_1_vec(&xi, &wi, &wi_dx);
wi_dr.v = hid_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = r.v * hj_inv.v;
kernel_deval_1_vec(&xj, &wj, &wj_dx);
wj_dr.v = hjd_inv.v * wj_dx.v;
/* Compute dv dot r. */
//dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
// ((vi[2].v - vj[2].v) * dx[2].v);
dvdr.v = ((vix->v - vjx->v) * dx->v) + ((viy->v - vjy->v) * dy->v) +
((viz->v - vjz->v) * dz->v);
// dvdr.v = dvdr.v * ri.v;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = ci->v + cj->v - vec_set1(3.0f) * mu_ij.v;
/* Now construct the full viscosity term */
rho_ij.v = vec_set1(0.5f) * (pirho->v + pjrho->v);
visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
mu_ij.v * balsara.v / rho_ij.v;
/* Now, convolve with the kernel */
visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
sph_term.v =
(grad_hi->v * piPOrho2->v * wi_dr.v + grad_hj->v * pjPOrho2->v * wj_dr.v) *
ri.v;
/* Eventually get the acceleration */
acc.v = visc_term.v + sph_term.v;
/* Use the force, Luke! */
//for (k = 0; k < 3; k++) {
// f.v = dx[k].v * acc.v;
// pia[k].v = mj.v * f.v;
//}
pia[0].v = mj->v * dx->v * acc.v;
pia[1].v = mj->v * dy->v * acc.v;
pia[2].v = mj->v * dz->v * acc.v;
/* Get the time derivative for h. */
pih_dt.v = mj->v * dvdr.v * ri.v / pjrho->v * wi_dr.v;
/* Change in entropy */
entropy_dt.v = mj->v * visc_term.v * dvdr.v;
/* Store the forces back on the particles. */
//for (k = 0; k < VEC_SIZE; k++) {
// for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
// pi[k]->force.h_dt -= pih_dt.f[k];
// pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
// pi[k]->entropy_dt += entropy_dt.f[k];
//}
a_hydro_xSum->v -= vec_and(pia[0].v, mask.v);
a_hydro_ySum->v -= vec_and(pia[1].v, mask.v);
a_hydro_zSum->v -= vec_and(pia[2].v, mask.v);
h_dtSum->v -= vec_and(pih_dt.v, mask.v);
v_sigSum->v = vec_fmax(v_sigSum->v, vec_and(v_sig.v, mask.v));
entropy_dtSum->v += vec_and(entropy_dt.v,mask.v);
#else
error(
"The Gadget2 serial version of runner_iact_nonsym_force was called when "
"the vectorised version should have been used.");
#endif
}
#endif
#endif /* SWIFT_GADGET2_HYDRO_IACT_H */
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment