Commit d71aa3df authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Bring variable definitions closer to their actual use. Better readability.

parent 722cf879
...@@ -532,8 +532,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( ...@@ -532,8 +532,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
struct part *restrict parts = c->parts; struct part *restrict parts = c->parts;
const int count = c->count; const int count = c->count;
vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
TIMER_TIC; TIMER_TIC;
/* Anything to do here? */ /* Anything to do here? */
...@@ -566,6 +564,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( ...@@ -566,6 +564,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
if (!part_is_active_no_debug(pi, max_active_bin)) continue; if (!part_is_active_no_debug(pi, max_active_bin)) continue;
vector v_pix, v_piy, v_piz; vector v_pix, v_piy, v_piz;
vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
const float hi = cell_cache->h[pid]; const float hi = cell_cache->h[pid];
...@@ -586,9 +585,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( ...@@ -586,9 +585,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_curlvxSum, v_curlvySum, v_curlvzSum; v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hi. */ /* Get the inverse of hi. */
vector v_hi_inv; vector v_hi_inv = vec_reciprocal(v_hi);
v_hi_inv = vec_reciprocal(v_hi);
v_rhoSum.v = vec_setzero(); v_rhoSum.v = vec_setzero();
v_rho_dhSum.v = vec_setzero(); v_rho_dhSum.v = vec_setzero();
...@@ -754,9 +751,6 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -754,9 +751,6 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
struct part *restrict parts = c->parts; struct part *restrict parts = c->parts;
const int count = c->count; const int count = c->count;
vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci;
TIMER_TIC; TIMER_TIC;
if (!cell_is_active(c, e)) return; if (!cell_is_active(c, e)) return;
...@@ -793,6 +787,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -793,6 +787,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
if (!part_is_active_no_debug(pi, max_active_bin)) continue; if (!part_is_active_no_debug(pi, max_active_bin)) continue;
vector v_pix, v_piy, v_piz; vector v_pix, v_piy, v_piz;
vector v_hi, v_vix, v_viy, v_viz, v_hig2;
vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci;
const float hi = cell_cache->h[pid]; const float hi = cell_cache->h[pid];
...@@ -819,9 +815,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -819,9 +815,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
v_entropy_dtSum; v_entropy_dtSum;
/* Get the inverse of hi. */ /* Get the inverse of hi. */
vector v_hi_inv; vector v_hi_inv = vec_reciprocal(v_hi);
v_hi_inv = vec_reciprocal(v_hi);
v_a_hydro_xSum.v = vec_setzero(); v_a_hydro_xSum.v = vec_setzero();
v_a_hydro_ySum.v = vec_setzero(); v_a_hydro_ySum.v = vec_setzero();
...@@ -853,13 +847,12 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -853,13 +847,12 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
} }
} }
vector v_pjx, v_pjy, v_pjz, hj, hjg2;
/* Find all of particle pi's interacions and store needed values in the /* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/ * secondary cache.*/
for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) { for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
/* Load 1 set of vectors from the particle cache. */ /* Load 1 set of vectors from the particle cache. */
vector v_pjx, v_pjy, v_pjz, hj, hjg2;
v_pjx.v = vec_load(&cell_cache->x[pjd]); v_pjx.v = vec_load(&cell_cache->x[pjd]);
v_pjy.v = vec_load(&cell_cache->y[pjd]); v_pjy.v = vec_load(&cell_cache->y[pjd]);
v_pjz.v = vec_load(&cell_cache->z[pjd]); v_pjz.v = vec_load(&cell_cache->z[pjd]);
...@@ -867,8 +860,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -867,8 +860,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v); hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
vector v_dx, v_dy, v_dz; vector v_dx, v_dy, v_dz, v_r2;
v_dx.v = vec_sub(v_pix.v, v_pjx.v); v_dx.v = vec_sub(v_pix.v, v_pjx.v);
v_dy.v = vec_sub(v_piy.v, v_pjy.v); v_dy.v = vec_sub(v_piy.v, v_pjy.v);
v_dz.v = vec_sub(v_piz.v, v_pjz.v); v_dz.v = vec_sub(v_piz.v, v_pjz.v);
...@@ -895,12 +887,10 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -895,12 +887,10 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
/* If there are any interactions perform them. */ /* If there are any interactions perform them. */
if (doi_mask) { if (doi_mask) {
vector v_hj_inv; vector v_hj_inv = vec_reciprocal(hj);
v_hj_inv = vec_reciprocal(hj);
/* To stop floating point exceptions for when particle separations are /* To stop floating point exceptions for when particle separations are
* 0. * 0. */
*/
v_r2.v = vec_add(v_r2.v, vec_set1(FLT_MIN)); v_r2.v = vec_add(v_r2.v, vec_set1(FLT_MIN));
runner_iact_nonsym_1_vec_force( runner_iact_nonsym_1_vec_force(
...@@ -947,8 +937,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -947,8 +937,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
const struct engine *restrict e = r->e; const struct engine *restrict e = r->e;
const timebin_t max_active_bin = e->max_active_bin; const timebin_t max_active_bin = e->max_active_bin;
vector v_hi, v_vix, v_viy, v_viz, v_hig2;
TIMER_TIC; TIMER_TIC;
/* Get the cutoff shift. */ /* Get the cutoff shift. */
...@@ -1064,6 +1052,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1064,6 +1052,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
const int exit_iteration = max_index_i[pid]; const int exit_iteration = max_index_i[pid];
vector v_pix, v_piy, v_piz; vector v_pix, v_piy, v_piz;
vector v_hi, v_vix, v_viy, v_viz, v_hig2;
/* Fill particle pi vectors. */ /* Fill particle pi vectors. */
v_pix.v = vec_set1(ci_cache->x[ci_cache_idx]); v_pix.v = vec_set1(ci_cache->x[ci_cache_idx]);
...@@ -1082,9 +1071,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1082,9 +1071,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_curlvxSum, v_curlvySum, v_curlvzSum; v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hi. */ /* Get the inverse of hi. */
vector v_hi_inv; vector v_hi_inv = vec_reciprocal(v_hi);
v_hi_inv = vec_reciprocal(v_hi);
v_rhoSum.v = vec_setzero(); v_rhoSum.v = vec_setzero();
v_rho_dhSum.v = vec_setzero(); v_rho_dhSum.v = vec_setzero();
...@@ -1105,14 +1092,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1105,14 +1092,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
exit_iteration_align += pad; exit_iteration_align += pad;
} }
vector v_pjx, v_pjy, v_pjz;
/* Loop over the parts in cj. */ /* Loop over the parts in cj. */
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd; const int cj_cache_idx = pjd;
vector v_pjx, v_pjy, v_pjz;
vector v_dx, v_dy, v_dz, v_r2; vector v_dx, v_dy, v_dz, v_r2;
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
...@@ -1211,9 +1197,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1211,9 +1197,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_curlvxSum, v_curlvySum, v_curlvzSum; v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hj. */ /* Get the inverse of hj. */
vector v_hj_inv; vector v_hj_inv = vec_reciprocal(v_hj);
v_hj_inv = vec_reciprocal(v_hj);
v_rhoSum.v = vec_setzero(); v_rhoSum.v = vec_setzero();
v_rho_dhSum.v = vec_setzero(); v_rho_dhSum.v = vec_setzero();
...@@ -1224,8 +1208,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1224,8 +1208,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_curlvySum.v = vec_setzero(); v_curlvySum.v = vec_setzero();
v_curlvzSum.v = vec_setzero(); v_curlvzSum.v = vec_setzero();
vector v_pix, v_piy, v_piz;
/* Convert exit iteration to cache indices. */ /* Convert exit iteration to cache indices. */
int exit_iteration_align = exit_iteration - first_pi; int exit_iteration_align = exit_iteration - first_pi;
...@@ -1250,6 +1232,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1250,6 +1232,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
} }
#endif #endif
vector v_pix, v_piy, v_piz;
vector v_dx, v_dy, v_dz, v_r2; vector v_dx, v_dy, v_dz, v_r2;
/* Load 2 sets of vectors from the particle cache. */ /* Load 2 sets of vectors from the particle cache. */
...@@ -1323,9 +1306,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1323,9 +1306,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const struct engine *restrict e = r->e; const struct engine *restrict e = r->e;
const timebin_t max_active_bin = e->max_active_bin; const timebin_t max_active_bin = e->max_active_bin;
vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci;
TIMER_TIC; TIMER_TIC;
/* Get the cutoff shift. */ /* Get the cutoff shift. */
...@@ -1448,6 +1428,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1448,6 +1428,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const int exit_iteration = max_index_i[pid]; const int exit_iteration = max_index_i[pid];
vector v_pix, v_piy, v_piz; vector v_pix, v_piy, v_piz;
vector v_hi, v_vix, v_viy, v_viz, v_hig2;
vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci;
/* Fill particle pi vectors. */ /* Fill particle pi vectors. */
v_pix.v = vec_set1(ci_cache->x[ci_cache_idx]); v_pix.v = vec_set1(ci_cache->x[ci_cache_idx]);
...@@ -1472,8 +1454,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1472,8 +1454,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_sigSum, v_entropy_dtSum; v_sigSum, v_entropy_dtSum;
/* Get the inverse of hi. */ /* Get the inverse of hi. */
vector v_hi_inv; vector v_hi_inv = vec_reciprocal(v_hi);
v_hi_inv = vec_reciprocal(v_hi);
v_a_hydro_xSum.v = vec_setzero(); v_a_hydro_xSum.v = vec_setzero();
v_a_hydro_ySum.v = vec_setzero(); v_a_hydro_ySum.v = vec_setzero();
...@@ -1492,8 +1473,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1492,8 +1473,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
exit_iteration_align += pad; exit_iteration_align += pad;
} }
vector v_pjx, v_pjy, v_pjz, hj, hjg2;
/* Loop over the parts in cj. */ /* Loop over the parts in cj. */
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) {
...@@ -1501,6 +1480,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1501,6 +1480,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const int cj_cache_idx = pjd; const int cj_cache_idx = pjd;
vector v_dx, v_dy, v_dz; vector v_dx, v_dy, v_dz;
vector v_pjx, v_pjy, v_pjz, v_hj, v_hjg2, v_r2;
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 || if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
...@@ -1514,8 +1494,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1514,8 +1494,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_pjx.v = vec_load(&cj_cache->x[cj_cache_idx]); v_pjx.v = vec_load(&cj_cache->x[cj_cache_idx]);
v_pjy.v = vec_load(&cj_cache->y[cj_cache_idx]); v_pjy.v = vec_load(&cj_cache->y[cj_cache_idx]);
v_pjz.v = vec_load(&cj_cache->z[cj_cache_idx]); v_pjz.v = vec_load(&cj_cache->z[cj_cache_idx]);
hj.v = vec_load(&cj_cache->h[cj_cache_idx]); v_hj.v = vec_load(&cj_cache->h[cj_cache_idx]);
hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v); v_hjg2.v = vec_mul(vec_mul(v_hj.v, v_hj.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
v_dx.v = vec_sub(v_pix.v, v_pjx.v); v_dx.v = vec_sub(v_pix.v, v_pjx.v);
...@@ -1531,7 +1511,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1531,7 +1511,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */ /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
vector v_h2; vector v_h2;
v_h2.v = vec_fmax(v_hig2.v, hjg2.v); v_h2.v = vec_fmax(v_hig2.v, v_hjg2.v);
vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v)); vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v));
/* Form integer masks. */ /* Form integer masks. */
...@@ -1539,8 +1519,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1539,8 +1519,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* If there are any interactions perform them. */ /* If there are any interactions perform them. */
if (doi_mask) { if (doi_mask) {
vector v_hj_inv; vector v_hj_inv = vec_reciprocal(v_hj);
v_hj_inv = vec_reciprocal(hj);
runner_iact_nonsym_1_vec_force( runner_iact_nonsym_1_vec_force(
&v_r2, &v_dx, &v_dy, &v_dz, v_vix, v_viy, v_viz, v_rhoi, &v_r2, &v_dx, &v_dy, &v_dz, v_vix, v_viy, v_viz, v_rhoi,
...@@ -1616,9 +1595,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1616,9 +1595,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_sigSum, v_entropy_dtSum; v_sigSum, v_entropy_dtSum;
/* Get the inverse of hj. */ /* Get the inverse of hj. */
vector v_hj_inv; vector v_hj_inv = vec_reciprocal(v_hj);
v_hj_inv = vec_reciprocal(v_hj);
v_a_hydro_xSum.v = vec_setzero(); v_a_hydro_xSum.v = vec_setzero();
v_a_hydro_ySum.v = vec_setzero(); v_a_hydro_ySum.v = vec_setzero();
...@@ -1637,8 +1614,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1637,8 +1614,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
} else } else
exit_iteration_align -= rem; exit_iteration_align -= rem;
vector v_pix, v_piy, v_piz, hi, hig2;
/* Loop over the parts in ci. */ /* Loop over the parts in ci. */
for (int ci_cache_idx = exit_iteration_align; for (int ci_cache_idx = exit_iteration_align;
ci_cache_idx < ci_cache_count; ci_cache_idx += VEC_SIZE) { ci_cache_idx < ci_cache_count; ci_cache_idx += VEC_SIZE) {
...@@ -1649,14 +1624,15 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1649,14 +1624,15 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
} }
#endif #endif
vector v_dx, v_dy, v_dz; vector v_pix, v_piy, v_piz, v_hi, v_hig2;
vector v_dx, v_dy, v_dz, v_r2;
/* Load 2 sets of vectors from the particle cache. */ /* Load 2 sets of vectors from the particle cache. */
v_pix.v = vec_load(&ci_cache->x[ci_cache_idx]); v_pix.v = vec_load(&ci_cache->x[ci_cache_idx]);
v_piy.v = vec_load(&ci_cache->y[ci_cache_idx]); v_piy.v = vec_load(&ci_cache->y[ci_cache_idx]);
v_piz.v = vec_load(&ci_cache->z[ci_cache_idx]); v_piz.v = vec_load(&ci_cache->z[ci_cache_idx]);
hi.v = vec_load(&ci_cache->h[ci_cache_idx]); v_hi.v = vec_load(&ci_cache->h[ci_cache_idx]);
hig2.v = vec_mul(vec_mul(hi.v, hi.v), kernel_gamma2_vec.v); v_hig2.v = vec_mul(vec_mul(v_hi.v, v_hi.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
v_dx.v = vec_sub(v_pjx.v, v_pix.v); v_dx.v = vec_sub(v_pjx.v, v_pix.v);
...@@ -1672,7 +1648,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1672,7 +1648,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */ /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
vector v_h2; vector v_h2;
v_h2.v = vec_fmax(v_hjg2.v, hig2.v); v_h2.v = vec_fmax(v_hjg2.v, v_hig2.v);
vec_create_mask(v_doj_mask, vec_cmp_lt(v_r2.v, v_h2.v)); vec_create_mask(v_doj_mask, vec_cmp_lt(v_r2.v, v_h2.v));
/* Form integer masks. */ /* Form integer masks. */
...@@ -1680,8 +1656,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1680,8 +1656,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* If there are any interactions perform them. */ /* If there are any interactions perform them. */
if (doj_mask) { if (doj_mask) {
vector v_hi_inv; vector v_hi_inv = vec_reciprocal(v_hi);
v_hi_inv = vec_reciprocal(hi);
runner_iact_nonsym_1_vec_force( runner_iact_nonsym_1_vec_force(
&v_r2, &v_dx, &v_dy, &v_dz, v_vjx, v_vjy, v_vjz, v_rhoj, &v_r2, &v_dx, &v_dy, &v_dz, v_vjx, v_vjy, v_vjz, v_rhoj,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment