Skip to content
Snippets Groups Projects
Commit bcffe08f authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Code formatting. Small style changes. Updates some comments in the cache construction.

parent 52507e8b
No related branches found
No related tags found
1 merge request!440Dopair2 vectorisation
......@@ -198,8 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts;
const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < ci->count; i++) {
......@@ -250,7 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts;
const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
......@@ -296,7 +296,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const struct entry *restrict sort_j, const double *restrict const shift,
int *first_pi, int *last_pj) {
int idx;
/* Pad number of particles read to the vector size. */
int rem = (ci->count - *first_pi) % VEC_SIZE;
if (rem != 0) {
......@@ -312,17 +311,17 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
if (*last_pj + pad < cj->count) *last_pj += pad;
}
/* Get some local pointers */
const int first_pi_align = *first_pi;
const int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts;
/* Shift particles to the local frame and account for boundary conditions.*/
const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
cj->loc[2]};
const double total_ci_shift[3] = {
cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
......@@ -335,16 +334,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
int ci_cache_count = ci->count - first_pi_align;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
* precision can be used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i;
const int idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
......@@ -371,30 +365,31 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. x=%f, ci->width[0]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, x[i],
ci->width[0]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, x[i], ci->width[0]);
if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. y=%f, ci->width[1]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, y[i],
ci->width[1]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, y[i], ci->width[1]);
if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. z=%f, ci->width[2]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, z[i],
ci->width[2]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, z[i], ci->width[2]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
......@@ -425,7 +420,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i;
const int idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
......@@ -445,29 +440,30 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. xj=%f, ci->width[0]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, xj[i],
ci->width[0]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, xj[i], ci->width[0]);
if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. yj=%f, ci->width[1]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, yj[i],
ci->width[1]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, yj[i], ci->width[1]);
if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. zj=%f, ci->width[2]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, zj[i],
ci->width[2]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, zj[i], ci->width[2]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
......@@ -508,7 +504,6 @@ cache_read_two_partial_cells_sorted_force(
const struct entry *restrict sort_i, const struct entry *restrict sort_j,
const double *const shift, int *first_pi, int *last_pj) {
int idx;
/* Pad number of particles read to the vector size. */
int rem = (ci->count - *first_pi) % VEC_SIZE;
if (rem != 0) {
......@@ -524,16 +519,16 @@ cache_read_two_partial_cells_sorted_force(
if (*last_pj + pad < cj->count) *last_pj += pad;
}
/* Get some local pointers */
const int first_pi_align = *first_pi;
const int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts;
/* Shift particles to the local frame and account for boundary conditions.*/
const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
cj->loc[2]};
const double total_ci_shift[3] = {
cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
......@@ -557,14 +552,10 @@ cache_read_two_partial_cells_sorted_force(
int ci_cache_count = ci->count - first_pi_align;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
* precision can be used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i;
const int idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
......@@ -581,8 +572,9 @@ cache_read_two_partial_cells_sorted_force(
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
......@@ -626,7 +618,7 @@ cache_read_two_partial_cells_sorted_force(
SWIFT_CACHE_ALIGNMENT);
for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i;
const int idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
......@@ -643,7 +635,8 @@ cache_read_two_partial_cells_sorted_force(
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
......
......@@ -41,9 +41,11 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
* gradient update on pi.
* @param v_wcountSum (return) #vector holding the cumulative sum of the wcount
* update on pi.
* @param v_wcount_dhSum (return) #vector holding the cumulative sum of the wcount
* @param v_wcount_dhSum (return) #vector holding the cumulative sum of the
* wcount
* gradient update on pi.
* @param v_div_vSum (return) #vector holding the cumulative sum of the divergence
* @param v_div_vSum (return) #vector holding the cumulative sum of the
* divergence
* update on pi.
* @param v_curlvxSum (return) #vector holding the cumulative sum of the curl of
* vx update on pi.
......@@ -61,9 +63,9 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
__attribute__((always_inline)) INLINE static void calcRemInteractions(
struct c2_cache *const int_cache, const int icount, vector *v_rhoSum,
vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz,
int *icount_align) {
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
vector v_viz, int *icount_align) {
mask_t int_mask, int_mask2;
......@@ -108,8 +110,8 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align],
&int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align],
&int_cache->mq[*icount_align], v_rhoSum, v_rho_dhSum, v_wcountSum,
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum, int_mask,
int_mask2, 1);
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
int_mask, int_mask2, 1);
}
}
......@@ -127,20 +129,25 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param v_rhoSum #vector holding the cumulative sum of the density update on pi.
* @param v_rhoSum #vector holding the cumulative sum of the density update on
* pi.
* @param v_rho_dhSum #vector holding the cumulative sum of the density gradient
* update on pi.
* @param v_wcountSum #vector holding the cumulative sum of the wcount update on
* pi.
* @param v_wcount_dhSum #vector holding the cumulative sum of the wcount gradient
* @param v_wcount_dhSum #vector holding the cumulative sum of the wcount
* gradient
* update on pi.
* @param v_div_vSum #vector holding the cumulative sum of the divergence update
* on pi.
* @param v_curlvxSum #vector holding the cumulative sum of the curl of vx update
* @param v_curlvxSum #vector holding the cumulative sum of the curl of vx
* update
* on pi.
* @param v_curlvySum #vector holding the cumulative sum of the curl of vy update
* @param v_curlvySum #vector holding the cumulative sum of the curl of vy
* update
* on pi.
* @param v_curlvzSum #vector holding the cumulative sum of the curl of vz update
* @param v_curlvzSum #vector holding the cumulative sum of the curl of vz
* update
* on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
......@@ -152,8 +159,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
vector *v_dz, const struct cache *const cell_cache,
struct c2_cache *const int_cache, int *icount, vector *v_rhoSum,
vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz) {
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
vector v_viz) {
/* Left-pack values needed into the secondary cache using the interaction mask.
*/
......@@ -203,8 +211,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
/* Peform remainder interactions. */
calcRemInteractions(int_cache, *icount, v_rhoSum, v_rho_dhSum, v_wcountSum,
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
v_hi_inv, v_vix, v_viy, v_viz, &icount_align);
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
&icount_align);
mask_t int_mask, int_mask2;
vec_init_mask_true(int_mask);
......@@ -216,8 +225,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
&int_cache->r2q[j], &int_cache->dxq[j], &int_cache->dyq[j],
&int_cache->dzq[j], v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[j],
&int_cache->vyq[j], &int_cache->vzq[j], &int_cache->mq[j], v_rhoSum,
v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
v_curlvzSum, int_mask, int_mask2, 0);
v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum, int_mask, int_mask2, 0);
}
/* Reset interaction count. */
......@@ -574,8 +583,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum;
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hi. */
vector v_hi_inv;
......@@ -671,24 +680,25 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
* cache. */
if (doi_mask) {
storeInteractions(doi_mask, pjd, &v_r2, &v_dx, &v_dy, &v_dz, cell_cache,
&int_cache, &icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
&int_cache, &icount, &v_rhoSum, &v_rho_dhSum,
&v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
v_vix, v_viy, v_viz);
}
if (doi_mask2) {
storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_2, &v_dy_2,
&v_dz_2, cell_cache, &int_cache, &icount, &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv, v_vix,
v_viy, v_viz);
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_hi_inv, v_vix, v_viy, v_viz);
}
}
/* Perform padded vector remainder interactions if any are present. */
calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
&icount_align);
calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum,
&v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
v_vix, v_viy, v_viz, &icount_align);
/* Initialise masks to true in case remainder interactions have been
* performed. */
......@@ -702,9 +712,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
&int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
&int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
&int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
&int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum, int_mask, int_mask2,
0);
&int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, int_mask, int_mask2, 0);
}
/* Perform horizontal adds on vector sums and store result in particle pi.
......@@ -903,8 +913,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
&cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
&cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, &v_a_hydro_xSum,
&v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doi_mask);
&v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum,
&v_entropy_dtSum, v_doi_mask);
}
} /* Loop over all other particles. */
......@@ -1030,8 +1040,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
/* Read the needed particles into the two caches. */
cache_read_two_partial_cells_sorted(ci, cj, ci_cache, cj_cache, sort_i,
sort_j, shift, &first_pi,
&last_pj);
sort_j, shift, &first_pi, &last_pj);
/* Get the number of particles read into the ci cache. */
int ci_cache_count = count_i - first_pi;
......@@ -1073,8 +1082,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum;
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hi. */
vector v_hi_inv;
......@@ -1113,8 +1122,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
cj_cache_idx, last_pj);
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
last_pj);
}
#endif
......@@ -1146,9 +1155,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
runner_iact_nonsym_1_vec_density(
&v_r2, &v_dx, &v_dy, &v_dz, v_hi_inv, v_vix, v_viy, v_viz,
&cj_cache->vx[cj_cache_idx], &cj_cache->vy[cj_cache_idx],
&cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx], &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
&v_curlvySum, &v_curlvzSum, v_doi_mask);
&cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx],
&v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_doi_mask);
} /* loop over the parts in cj. */
......@@ -1203,8 +1213,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_hjg2.v = vec_set1(hjg2);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum;
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hj. */
vector v_hj_inv;
......@@ -1238,8 +1248,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0 ||
ci_cache_idx + (VEC_SIZE - 1) >
(count_i - first_pi + VEC_SIZE)) {
ci_cache_idx + (VEC_SIZE - 1) > (count_i - first_pi + VEC_SIZE)) {
error(
"Unaligned read!!! ci_cache_idx=%d, first_pi=%d, "
"count_i=%d",
......@@ -1277,9 +1286,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
runner_iact_nonsym_1_vec_density(
&v_r2, &v_dx, &v_dy, &v_dz, v_hj_inv, v_vjx, v_vjy, v_vjz,
&ci_cache->vx[ci_cache_idx], &ci_cache->vy[ci_cache_idx],
&ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx], &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
&v_curlvySum, &v_curlvzSum, v_doj_mask);
&ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx],
&v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_doj_mask);
} /* loop over the parts in ci. */
......@@ -1351,7 +1361,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Check if any particles are active and return if there are none. */
int numActive = 0;
/* Use the largest smoothing length to make sure that no interactions are missed. */
/* Use the largest smoothing length to make sure that no interactions are
* missed. */
const double h_max = max(hi_max, hj_max);
if (active_ci) {
......@@ -1416,8 +1427,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Read the needed particles into the two caches. */
cache_read_two_partial_cells_sorted_force(ci, cj, ci_cache, cj_cache, sort_i,
sort_j, shift, &first_pi,
&last_pj);
sort_j, shift, &first_pi, &last_pj);
/* Get the number of particles read into the ci cache. */
int ci_cache_count = count_i - first_pi;
......@@ -1465,8 +1475,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
v_entropy_dtSum;
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
v_sigSum, v_entropy_dtSum;
/* Get the inverse of hi. */
vector v_hi_inv;
......@@ -1502,8 +1512,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
cj_cache_idx, last_pj);
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
last_pj);
}
#endif
......@@ -1547,8 +1557,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&cj_cache->grad_h[cj_cache_idx], &cj_cache->pOrho2[cj_cache_idx],
&cj_cache->balsara[cj_cache_idx],
&cj_cache->soundspeed[cj_cache_idx], &cj_cache->m[cj_cache_idx],
v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
&v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doi_mask);
v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doi_mask);
}
} /* loop over the parts in cj. */
......@@ -1610,8 +1621,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_hjg2.v = vec_set1(hjg2);
/* Reset cumulative sums of update vectors. */
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
v_entropy_dtSum;
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
v_sigSum, v_entropy_dtSum;
/* Get the inverse of hj. */
vector v_hj_inv;
......@@ -1689,8 +1700,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&ci_cache->grad_h[ci_cache_idx], &ci_cache->pOrho2[ci_cache_idx],
&ci_cache->balsara[ci_cache_idx],
&ci_cache->soundspeed[ci_cache_idx], &ci_cache->m[ci_cache_idx],
v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
&v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doj_mask);
v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doj_mask);
}
} /* loop over the parts in ci. */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment