Commit 013e52d6 authored by James Willis's avatar James Willis
Browse files

Incorporate cosmological terms in vectorised hydro interactions.

parent a3866435
......@@ -659,7 +659,7 @@ runner_iact_nonsym_1_vec_force(
vector viz, vector pirho, vector grad_hi, vector piPOrho2, vector balsara_i,
vector ci, float *Vjx, float *Vjy, float *Vjz, float *Pjrho, float *Grad_hj,
float *PjPOrho2, float *Balsara_j, float *Cj, float *Mj, vector hi_inv,
vector hj_inv, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector hj_inv, vector fac_mu, vector a2_Hubble, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum,
vector *entropy_dtSum, mask_t mask) {
......@@ -687,9 +687,6 @@ runner_iact_nonsym_1_vec_force(
const vector balsara_j = vector_load(Balsara_j);
const vector cj = vector_load(Cj);
const vector fac_mu =
vector_set1(1.f); /* Will change with cosmological integration */
/* Load stuff. */
balsara.v = vec_add(balsara_i.v, balsara_j.v);
......@@ -718,7 +715,7 @@ runner_iact_nonsym_1_vec_force(
dvz.v = vec_sub(viz.v, vjz.v);
/* Compute dv dot r. */
dvdr.v = vec_fma(dvx.v, dx->v, vec_fma(dvy.v, dy->v, vec_mul(dvz.v, dz->v)));
dvdr.v = vec_fma(dvx.v, dx->v, vec_fma(dvy.v, dy->v, vec_fma(dvz.v, dz->v, vec_mul(a2_Hubble.v, r2->v))));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
......@@ -787,7 +784,7 @@ runner_iact_nonsym_2_vec_force(
vector viz, vector pirho, vector grad_hi, vector piPOrho2, vector balsara_i,
vector ci, float *Vjx, float *Vjy, float *Vjz, float *Pjrho, float *Grad_hj,
float *PjPOrho2, float *Balsara_j, float *Cj, float *Mj, vector hi_inv,
float *Hj_inv, vector *a_hydro_xSum, vector *a_hydro_ySum,
float *Hj_inv, vector fac_mu, vector a2_Hubble, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum,
vector *entropy_dtSum, mask_t mask, mask_t mask_2, short mask_cond) {
......@@ -853,9 +850,6 @@ runner_iact_nonsym_2_vec_force(
const vector hj_inv = vector_load(Hj_inv);
const vector hj_inv_2 = vector_load(&Hj_inv[VEC_SIZE]);
const vector fac_mu =
vector_set1(1.f); /* Will change with cosmological integration */
/* Find the balsara switch. */
balsara.v = vec_add(balsara_i.v, balsara_j.v);
balsara_2.v = vec_add(balsara_i.v, balsara_j_2.v);
......@@ -891,9 +885,9 @@ runner_iact_nonsym_2_vec_force(
dvz_2.v = vec_sub(viz.v, vjz_2.v);
/* Compute dv dot r. */
dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v)));
dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_fma(dvz.v, dz.v, vec_mul(a2_Hubble.v, r2.v))));
dvdr_2.v = vec_fma(dvx_2.v, dx_2.v,
vec_fma(dvy_2.v, dy_2.v, vec_mul(dvz_2.v, dz_2.v)));
vec_fma(dvy_2.v, dy_2.v, vec_fma(dvz_2.v, dz_2.v, vec_mul(a2_Hubble.v, r2_2.v))));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
......
......@@ -1111,6 +1111,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
const struct engine *e = r->e;
const struct cosmology *restrict cosmo = e->cosmology;
const timebin_t max_active_bin = e->max_active_bin;
struct part *restrict parts = c->parts;
const int count = c->count;
......@@ -1139,6 +1140,14 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
/* Read the particles from the cell and store them locally in the cache. */
cache_read_force_particles(c, cell_cache);
/* Cosmological terms */
const float a = cosmo->a;
const float H = cosmo->H;
const float fac_mu = pow_three_gamma_minus_five_over_two(a);
const float a2_Hubble = a * a * H;
const vector v_fac_mu = vector_set1(fac_mu);
const vector v_a2_Hubble = vector_set1(a2_Hubble);
/* Loop over the particles in the cell. */
for (int pid = 0; pid < count; pid++) {
......@@ -1262,7 +1271,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
&cell_cache->vy[pjd], &cell_cache->vz[pjd], &cell_cache->rho[pjd],
&cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
&cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, &v_a_hydro_xSum,
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, v_fac_mu, v_a2_Hubble, &v_a_hydro_xSum,
&v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum,
&v_entropy_dtSum, v_doi_mask);
}
......@@ -1988,6 +1997,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
const struct engine *restrict e = r->e;
const struct cosmology *restrict cosmo = e->cosmology;
const timebin_t max_active_bin = e->max_active_bin;
TIMER_TIC;
......@@ -2019,6 +2029,14 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const int active_ci = cell_is_active_hydro(ci, e) && ci_local;
const int active_cj = cell_is_active_hydro(cj, e) && cj_local;
/* Cosmological terms */
const float a = cosmo->a;
const float H = cosmo->H;
const float fac_mu = pow_three_gamma_minus_five_over_two(a);
const float a2_Hubble = a * a * H;
const vector v_fac_mu = vector_set1(fac_mu);
const vector v_a2_Hubble = vector_set1(a2_Hubble);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
for (int pid = 0; pid < count_i; pid++)
......@@ -2213,7 +2231,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&cj_cache->grad_h[cj_cache_idx], &cj_cache->pOrho2[cj_cache_idx],
&cj_cache->balsara[cj_cache_idx],
&cj_cache->soundspeed[cj_cache_idx], &cj_cache->m[cj_cache_idx],
v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
v_hi_inv, v_hj_inv, v_fac_mu, v_a2_Hubble, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doi_mask);
}
......@@ -2349,7 +2367,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&ci_cache->grad_h[ci_cache_idx], &ci_cache->pOrho2[ci_cache_idx],
&ci_cache->balsara[ci_cache_idx],
&ci_cache->soundspeed[ci_cache_idx], &ci_cache->m[ci_cache_idx],
v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
v_hj_inv, v_hi_inv, v_fac_mu, v_a2_Hubble, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doj_mask);
}
......
......@@ -602,6 +602,10 @@ void test_force_interactions(struct part test_part, struct part *parts,
const ticks vec_tic = getticks();
/* Set cosmological variables to one. */
const vector v_fac_mu = vector_set1(1.f);
const vector v_a2_Hubble = vector_set1(1.f);
for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {
if (num_vec_proc == 2) {
......@@ -610,7 +614,7 @@ void test_force_interactions(struct part test_part, struct part *parts,
(viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec,
ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]),
&(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]),
&(mjq[i]), hi_inv_vec, &(hj_invq[i]), &a_hydro_xSum, &a_hydro_ySum,
&(mjq[i]), hi_inv_vec, &(hj_invq[i]), v_fac_mu, v_a2_Hubble, &a_hydro_xSum, &a_hydro_ySum,
&a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum, mask, mask2, 0);
} else { /* Only use one vector for interaction. */
......@@ -626,7 +630,7 @@ void test_force_interactions(struct part test_part, struct part *parts,
&my_r2, &my_dx, &my_dy, &my_dz, vix_vec, viy_vec, viz_vec, rhoi_vec,
grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]),
&(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]),
&(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv,
&(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv, v_fac_mu, v_a2_Hubble,
&a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum,
&entropy_dtSum, mask);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment