Commit 11f5d292 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'cosmo_vec' into 'master'

Cosmological terms in vectorized version of Gadget SPH.

Closes #406

See merge request !510
parents 560cbdde 0901bce8
......@@ -659,7 +659,7 @@ runner_iact_nonsym_1_vec_force(
vector viz, vector pirho, vector grad_hi, vector piPOrho2, vector balsara_i,
vector ci, float *Vjx, float *Vjy, float *Vjz, float *Pjrho, float *Grad_hj,
float *PjPOrho2, float *Balsara_j, float *Cj, float *Mj, vector hi_inv,
vector hj_inv, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector hj_inv, const float a, const float H, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum,
vector *entropy_dtSum, mask_t mask) {
......@@ -687,8 +687,11 @@ runner_iact_nonsym_1_vec_force(
const vector balsara_j = vector_load(Balsara_j);
const vector cj = vector_load(Cj);
const vector fac_mu =
vector_set1(1.f); /* Will change with cosmological integration */
/* Cosmological terms */
const float fac_mu = pow_three_gamma_minus_five_over_two(a);
const float a2_Hubble = a * a * H;
const vector v_fac_mu = vector_set1(fac_mu);
const vector v_a2_Hubble = vector_set1(a2_Hubble);
/* Load stuff. */
balsara.v = vec_add(balsara_i.v, balsara_j.v);
......@@ -718,13 +721,13 @@ runner_iact_nonsym_1_vec_force(
dvz.v = vec_sub(viz.v, vjz.v);
/* Compute dv dot r. */
dvdr.v = vec_fma(dvx.v, dx->v, vec_fma(dvy.v, dy->v, vec_mul(dvz.v, dz->v)));
dvdr.v = vec_fma(dvx.v, dx->v, vec_fma(dvy.v, dy->v, vec_fma(dvz.v, dz->v, vec_mul(v_a2_Hubble.v, r2->v))));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_setzero());
mu_ij.v =
vec_mul(fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
vec_mul(v_fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = vec_fnma(vec_set1(3.f), mu_ij.v, vec_add(ci.v, cj.v));
......@@ -787,7 +790,7 @@ runner_iact_nonsym_2_vec_force(
vector viz, vector pirho, vector grad_hi, vector piPOrho2, vector balsara_i,
vector ci, float *Vjx, float *Vjy, float *Vjz, float *Pjrho, float *Grad_hj,
float *PjPOrho2, float *Balsara_j, float *Cj, float *Mj, vector hi_inv,
float *Hj_inv, vector *a_hydro_xSum, vector *a_hydro_ySum,
float *Hj_inv, const float a, const float H, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum,
vector *entropy_dtSum, mask_t mask, mask_t mask_2, short mask_cond) {
......@@ -853,8 +856,11 @@ runner_iact_nonsym_2_vec_force(
const vector hj_inv = vector_load(Hj_inv);
const vector hj_inv_2 = vector_load(&Hj_inv[VEC_SIZE]);
const vector fac_mu =
vector_set1(1.f); /* Will change with cosmological integration */
/* Cosmological terms */
const float fac_mu = pow_three_gamma_minus_five_over_two(a);
const float a2_Hubble = a * a * H;
const vector v_fac_mu = vector_set1(fac_mu);
const vector v_a2_Hubble = vector_set1(a2_Hubble);
/* Find the balsara switch. */
balsara.v = vec_add(balsara_i.v, balsara_j.v);
......@@ -891,18 +897,18 @@ runner_iact_nonsym_2_vec_force(
dvz_2.v = vec_sub(viz.v, vjz_2.v);
/* Compute dv dot r. */
dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v)));
dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_fma(dvz.v, dz.v, vec_mul(v_a2_Hubble.v, r2.v))));
dvdr_2.v = vec_fma(dvx_2.v, dx_2.v,
vec_fma(dvy_2.v, dy_2.v, vec_mul(dvz_2.v, dz_2.v)));
vec_fma(dvy_2.v, dy_2.v, vec_fma(dvz_2.v, dz_2.v, vec_mul(v_a2_Hubble.v, r2_2.v))));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_setzero());
omega_ij_2.v = vec_fmin(dvdr_2.v, vec_setzero());
mu_ij.v =
vec_mul(fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
vec_mul(v_fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
mu_ij_2.v = vec_mul(
fac_mu.v, vec_mul(ri_2.v, omega_ij_2.v)); /* This is 0 or negative */
v_fac_mu.v, vec_mul(ri_2.v, omega_ij_2.v)); /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = vec_fnma(vec_set1(3.f), mu_ij.v, vec_add(ci.v, cj.v));
......
......@@ -1111,6 +1111,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
const struct engine *e = r->e;
const struct cosmology *restrict cosmo = e->cosmology;
const timebin_t max_active_bin = e->max_active_bin;
struct part *restrict parts = c->parts;
const int count = c->count;
......@@ -1139,6 +1140,10 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
/* Read the particles from the cell and store them locally in the cache. */
cache_read_force_particles(c, cell_cache);
/* Cosmological terms */
const float a = cosmo->a;
const float H = cosmo->H;
/* Loop over the particles in the cell. */
for (int pid = 0; pid < count; pid++) {
......@@ -1262,7 +1267,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
&cell_cache->vy[pjd], &cell_cache->vz[pjd], &cell_cache->rho[pjd],
&cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
&cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, &v_a_hydro_xSum,
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, a, H, &v_a_hydro_xSum,
&v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum,
&v_entropy_dtSum, v_doi_mask);
}
......@@ -1988,6 +1993,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
const struct engine *restrict e = r->e;
const struct cosmology *restrict cosmo = e->cosmology;
const timebin_t max_active_bin = e->max_active_bin;
TIMER_TIC;
......@@ -2019,6 +2025,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const int active_ci = cell_is_active_hydro(ci, e) && ci_local;
const int active_cj = cell_is_active_hydro(cj, e) && cj_local;
/* Cosmological terms */
const float a = cosmo->a;
const float H = cosmo->H;
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
for (int pid = 0; pid < count_i; pid++)
......@@ -2213,7 +2223,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&cj_cache->grad_h[cj_cache_idx], &cj_cache->pOrho2[cj_cache_idx],
&cj_cache->balsara[cj_cache_idx],
&cj_cache->soundspeed[cj_cache_idx], &cj_cache->m[cj_cache_idx],
v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
v_hi_inv, v_hj_inv, a, H, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doi_mask);
}
......@@ -2349,7 +2359,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&ci_cache->grad_h[ci_cache_idx], &ci_cache->pOrho2[ci_cache_idx],
&ci_cache->balsara[ci_cache_idx],
&ci_cache->soundspeed[ci_cache_idx], &ci_cache->m[ci_cache_idx],
v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
v_hj_inv, v_hi_inv, a, H, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doj_mask);
}
......
......@@ -610,7 +610,7 @@ void test_force_interactions(struct part test_part, struct part *parts,
(viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec,
ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]),
&(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]),
&(mjq[i]), hi_inv_vec, &(hj_invq[i]), &a_hydro_xSum, &a_hydro_ySum,
&(mjq[i]), hi_inv_vec, &(hj_invq[i]), a, H, &a_hydro_xSum, &a_hydro_ySum,
&a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum, mask, mask2, 0);
} else { /* Only use one vector for interaction. */
......@@ -626,7 +626,7 @@ void test_force_interactions(struct part test_part, struct part *parts,
&my_r2, &my_dx, &my_dy, &my_dz, vix_vec, viy_vec, viz_vec, rhoi_vec,
grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]),
&(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]),
&(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv,
&(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv, a, H,
&a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum,
&entropy_dtSum, mask);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment