diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index aaeb23a55d495c06bd8f576c628891c0a4746c12..0253ee11495ca88fc21a824572a4bc60eb2dda33 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -593,7 +593,7 @@ runner_iact_nonsym_1_vec_force( vector piax, piay, piaz; vector pih_dt; vector v_sig; - vector omega_ij, mu_ij, fac_mu, balsara; + vector omega_ij, mu_ij, balsara; vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; /* Fill vectors. */ @@ -607,7 +607,7 @@ runner_iact_nonsym_1_vec_force( const vector balsara_j = vector_load(Balsara_j); const vector cj = vector_load(Cj); - fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ + const vector fac_mu = vector_set1(1.f); /* Will change with cosmological integration */ /* Load stuff. */ balsara.v = vec_add(balsara_i.v, balsara_j.v); @@ -720,7 +720,7 @@ runner_iact_nonsym_2_vec_force( vector piax, piay, piaz; vector pih_dt; vector v_sig; - vector omega_ij, mu_ij, fac_mu, balsara; + vector omega_ij, mu_ij, balsara; vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; vector r_2, ri_2; @@ -772,7 +772,7 @@ runner_iact_nonsym_2_vec_force( const vector hj_inv = vector_load(Hj_inv); const vector hj_inv_2 = vector_load(&Hj_inv[VEC_SIZE]); - fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ + const vector fac_mu = vector_set1(1.f); /* Will change with cosmological integration */ /* Find the balsara switch. */ balsara.v = vec_add(balsara_i.v, balsara_j.v); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 23d3042ff2c8ed8177c85a68d689a0e27e5ff345..5fc0088b302ea4807a3a6e6ca45bd49b077326a7 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -583,21 +583,18 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( const float hig2 = hi * hi * kernel_gamma2; const vector v_hig2 = vector_set1(hig2); - /* Reset cumulative sums of update vectors. */ - vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, - v_curlvxSum, v_curlvySum, v_curlvzSum; - /* Get the inverse of hi. */ vector v_hi_inv = vec_reciprocal(v_hi); - v_rhoSum.v = vec_setzero(); - v_rho_dhSum.v = vec_setzero(); - v_wcountSum.v = vec_setzero(); - v_wcount_dhSum.v = vec_setzero(); - v_div_vSum.v = vec_setzero(); - v_curlvxSum.v = vec_setzero(); - v_curlvySum.v = vec_setzero(); - v_curlvzSum.v = vec_setzero(); + /* Reset cumulative sums of update vectors. */ + vector v_rhoSum = vector_setzero(); + vector v_rho_dhSum = vector_setzero(); + vector v_wcountSum = vector_setzero(); + vector v_wcount_dhSum = vector_setzero(); + vector v_div_vSum = vector_setzero(); + vector v_curlvxSum = vector_setzero(); + vector v_curlvySum = vector_setzero(); + vector v_curlvzSum = vector_setzero(); /* Pad cache if there is a serial remainder. */ int count_align = count; @@ -798,19 +795,16 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( const float hig2 = hi * hi * kernel_gamma2; const vector v_hig2 = vector_set1(hig2); - /* Reset cumulative sums of update vectors. */ - vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum, - v_entropy_dtSum; - /* Get the inverse of hi. */ vector v_hi_inv = vec_reciprocal(v_hi); - v_a_hydro_xSum.v = vec_setzero(); - v_a_hydro_ySum.v = vec_setzero(); - v_a_hydro_zSum.v = vec_setzero(); - v_h_dtSum.v = vec_setzero(); - v_sigSum = vector_set1(pi->force.v_sig); - v_entropy_dtSum.v = vec_setzero(); + /* Reset cumulative sums of update vectors. */ + vector v_a_hydro_xSum = vector_setzero(); + vector v_a_hydro_ySum = vector_setzero(); + vector v_a_hydro_zSum = vector_setzero(); + vector v_h_dtSum = vector_setzero(); + vector v_sigSum = vector_set1(pi->force.v_sig); + vector v_entropy_dtSum = vector_setzero(); /* Pad cache if there is a serial remainder. */ count_align = count; @@ -1049,21 +1043,18 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, const float hig2 = hi * hi * kernel_gamma2; const vector v_hig2 = vector_set1(hig2); - /* Reset cumulative sums of update vectors. */ - vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, - v_curlvxSum, v_curlvySum, v_curlvzSum; - /* Get the inverse of hi. */ vector v_hi_inv = vec_reciprocal(v_hi); - v_rhoSum.v = vec_setzero(); - v_rho_dhSum.v = vec_setzero(); - v_wcountSum.v = vec_setzero(); - v_wcount_dhSum.v = vec_setzero(); - v_div_vSum.v = vec_setzero(); - v_curlvxSum.v = vec_setzero(); - v_curlvySum.v = vec_setzero(); - v_curlvzSum.v = vec_setzero(); + /* Reset cumulative sums of update vectors. */ + vector v_rhoSum = vector_setzero(); + vector v_rho_dhSum = vector_setzero(); + vector v_wcountSum = vector_setzero(); + vector v_wcount_dhSum = vector_setzero(); + vector v_div_vSum = vector_setzero(); + vector v_curlvxSum = vector_setzero(); + vector v_curlvySum = vector_setzero(); + vector v_curlvzSum = vector_setzero(); /* Pad the exit iteration if there is a serial remainder. */ int exit_iteration_align = exit_iteration; @@ -1169,21 +1160,18 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, const float hjg2 = hj * hj * kernel_gamma2; const vector v_hjg2 = vector_set1(hjg2); - /* Reset cumulative sums of update vectors. */ - vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, - v_curlvxSum, v_curlvySum, v_curlvzSum; - /* Get the inverse of hj. */ vector v_hj_inv = vec_reciprocal(v_hj); - v_rhoSum.v = vec_setzero(); - v_rho_dhSum.v = vec_setzero(); - v_wcountSum.v = vec_setzero(); - v_wcount_dhSum.v = vec_setzero(); - v_div_vSum.v = vec_setzero(); - v_curlvxSum.v = vec_setzero(); - v_curlvySum.v = vec_setzero(); - v_curlvzSum.v = vec_setzero(); + /* Reset cumulative sums of update vectors. */ + vector v_rhoSum = vector_setzero(); + vector v_rho_dhSum = vector_setzero(); + vector v_wcountSum = vector_setzero(); + vector v_wcount_dhSum = vector_setzero(); + vector v_div_vSum = vector_setzero(); + vector v_curlvxSum = vector_setzero(); + vector v_curlvySum = vector_setzero(); + vector v_curlvzSum = vector_setzero(); /* Convert exit iteration to cache indices. */ int exit_iteration_align = exit_iteration - first_pi; @@ -1416,19 +1404,16 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, const float hig2 = hi * hi * kernel_gamma2; const vector v_hig2 = vector_set1(hig2); - /* Reset cumulative sums of update vectors. */ - vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, - v_sigSum, v_entropy_dtSum; - /* Get the inverse of hi. */ vector v_hi_inv = vec_reciprocal(v_hi); - v_a_hydro_xSum.v = vec_setzero(); - v_a_hydro_ySum.v = vec_setzero(); - v_a_hydro_zSum.v = vec_setzero(); - v_h_dtSum.v = vec_setzero(); - v_sigSum = vector_set1(pi->force.v_sig); - v_entropy_dtSum.v = vec_setzero(); + /* Reset cumulative sums of update vectors. */ + vector v_a_hydro_xSum = vector_setzero(); + vector v_a_hydro_ySum = vector_setzero(); + vector v_a_hydro_zSum = vector_setzero(); + vector v_h_dtSum = vector_setzero(); + vector v_sigSum = vector_set1(pi->force.v_sig); + vector v_entropy_dtSum = vector_setzero(); /* Pad the exit iteration if there is a serial remainder. */ int exit_iteration_align = exit_iteration; @@ -1550,19 +1535,16 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, const float hjg2 = hj * hj * kernel_gamma2; const vector v_hjg2 = vector_set1(hjg2); - /* Reset cumulative sums of update vectors. */ - vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, - v_sigSum, v_entropy_dtSum; - /* Get the inverse of hj. */ vector v_hj_inv = vec_reciprocal(v_hj); - v_a_hydro_xSum.v = vec_setzero(); - v_a_hydro_ySum.v = vec_setzero(); - v_a_hydro_zSum.v = vec_setzero(); - v_h_dtSum.v = vec_setzero(); - v_sigSum = vector_set1(pj->force.v_sig); - v_entropy_dtSum.v = vec_setzero(); + /* Reset cumulative sums of update vectors. */ + vector v_a_hydro_xSum = vector_setzero(); + vector v_a_hydro_ySum = vector_setzero(); + vector v_a_hydro_zSum = vector_setzero(); + vector v_h_dtSum = vector_setzero(); + vector v_sigSum = vector_set1(pj->force.v_sig); + vector v_entropy_dtSum = vector_setzero(); /* Convert exit iteration to cache indices. */ int exit_iteration_align = exit_iteration - first_pi; diff --git a/src/vector.h b/src/vector.h index 4907101a017caec1f4635299e3d420853c572083..b3606cc1491995813491e946cf231d98e482844e 100644 --- a/src/vector.h +++ b/src/vector.h @@ -461,6 +461,20 @@ __attribute__((always_inline)) INLINE vector vector_set1(const float x) { return temp; } +/** + * @brief Loads a vector filled with zeros. + * + * @return temp set #vector. + */ +__attribute__((always_inline)) INLINE vector vector_setzero() { + + vector temp; + + temp.v = vec_setzero(); + + return temp; +} + #else /* Needed for cache alignment. */ #define VEC_SIZE 8