Commit bcffe08f authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Code formatting. Small style changes. Updates some comments in the cache construction.

parent 52507e8b
......@@ -198,8 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts;
const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < ci->count; i++) {
......@@ -250,7 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts;
const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
......@@ -296,7 +296,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const struct entry *restrict sort_j, const double *restrict const shift,
int *first_pi, int *last_pj) {
int idx;
/* Pad number of particles read to the vector size. */
int rem = (ci->count - *first_pi) % VEC_SIZE;
if (rem != 0) {
......@@ -312,17 +311,17 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
if (*last_pj + pad < cj->count) *last_pj += pad;
}
/* Get some local pointers */
const int first_pi_align = *first_pi;
const int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts;
/* Shift particles to the local frame and account for boundary conditions.*/
const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
cj->loc[2]};
const double total_ci_shift[3] = {
cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
......@@ -335,16 +334,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
int ci_cache_count = ci->count - first_pi_align;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
* precision can be used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i;
const int idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
......@@ -371,30 +365,31 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. x=%f, ci->width[0]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, x[i],
ci->width[0]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, x[i], ci->width[0]);
if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. y=%f, ci->width[1]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, y[i],
ci->width[1]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, y[i], ci->width[1]);
if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. z=%f, ci->width[2]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, z[i],
ci->width[2]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, z[i], ci->width[2]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
......@@ -425,7 +420,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i;
const int idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
......@@ -445,29 +440,30 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. xj=%f, ci->width[0]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, xj[i],
ci->width[0]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, xj[i], ci->width[0]);
if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. yj=%f, ci->width[1]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, yj[i],
ci->width[1]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, yj[i], ci->width[1]);
if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. zj=%f, ci->width[2]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, zj[i],
ci->width[2]);
ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], i, zj[i], ci->width[2]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
......@@ -508,7 +504,6 @@ cache_read_two_partial_cells_sorted_force(
const struct entry *restrict sort_i, const struct entry *restrict sort_j,
const double *const shift, int *first_pi, int *last_pj) {
int idx;
/* Pad number of particles read to the vector size. */
int rem = (ci->count - *first_pi) % VEC_SIZE;
if (rem != 0) {
......@@ -524,16 +519,16 @@ cache_read_two_partial_cells_sorted_force(
if (*last_pj + pad < cj->count) *last_pj += pad;
}
/* Get some local pointers */
const int first_pi_align = *first_pi;
const int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts;
/* Shift particles to the local frame and account for boundary conditions.*/
const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
cj->loc[2]};
const double total_ci_shift[3] = {
cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
......@@ -557,14 +552,10 @@ cache_read_two_partial_cells_sorted_force(
int ci_cache_count = ci->count - first_pi_align;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
* precision can be used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i;
const int idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
......@@ -581,8 +572,9 @@ cache_read_two_partial_cells_sorted_force(
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
......@@ -626,7 +618,7 @@ cache_read_two_partial_cells_sorted_force(
SWIFT_CACHE_ALIGNMENT);
for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i;
const int idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
......@@ -643,7 +635,8 @@ cache_read_two_partial_cells_sorted_force(
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
......
......@@ -41,9 +41,11 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
* gradient update on pi.
* @param v_wcountSum (return) #vector holding the cumulative sum of the wcount
* update on pi.
* @param v_wcount_dhSum (return) #vector holding the cumulative sum of the wcount
* @param v_wcount_dhSum (return) #vector holding the cumulative sum of the
* wcount
* gradient update on pi.
* @param v_div_vSum (return) #vector holding the cumulative sum of the divergence
* @param v_div_vSum (return) #vector holding the cumulative sum of the
* divergence
* update on pi.
* @param v_curlvxSum (return) #vector holding the cumulative sum of the curl of
* vx update on pi.
......@@ -61,9 +63,9 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
__attribute__((always_inline)) INLINE static void calcRemInteractions(
struct c2_cache *const int_cache, const int icount, vector *v_rhoSum,
vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz,
int *icount_align) {
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
vector v_viz, int *icount_align) {
mask_t int_mask, int_mask2;
......@@ -108,8 +110,8 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align],
&int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align],
&int_cache->mq[*icount_align], v_rhoSum, v_rho_dhSum, v_wcountSum,
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum, int_mask,
int_mask2, 1);
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
int_mask, int_mask2, 1);
}
}
......@@ -127,20 +129,25 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param v_rhoSum #vector holding the cumulative sum of the density update on pi.
* @param v_rhoSum #vector holding the cumulative sum of the density update on
* pi.
* @param v_rho_dhSum #vector holding the cumulative sum of the density gradient
* update on pi.
* @param v_wcountSum #vector holding the cumulative sum of the wcount update on
* pi.
* @param v_wcount_dhSum #vector holding the cumulative sum of the wcount gradient
* @param v_wcount_dhSum #vector holding the cumulative sum of the wcount
* gradient
* update on pi.
* @param v_div_vSum #vector holding the cumulative sum of the divergence update
* on pi.
* @param v_curlvxSum #vector holding the cumulative sum of the curl of vx update
* @param v_curlvxSum #vector holding the cumulative sum of the curl of vx
* update
* on pi.
* @param v_curlvySum #vector holding the cumulative sum of the curl of vy update
* @param v_curlvySum #vector holding the cumulative sum of the curl of vy
* update
* on pi.
* @param v_curlvzSum #vector holding the cumulative sum of the curl of vz update
* @param v_curlvzSum #vector holding the cumulative sum of the curl of vz
* update
* on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
......@@ -152,8 +159,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
vector *v_dz, const struct cache *const cell_cache,
struct c2_cache *const int_cache, int *icount, vector *v_rhoSum,
vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz) {
vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
vector v_viz) {
/* Left-pack values needed into the secondary cache using the interaction mask.
*/
......@@ -203,8 +211,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
/* Peform remainder interactions. */
calcRemInteractions(int_cache, *icount, v_rhoSum, v_rho_dhSum, v_wcountSum,
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
v_hi_inv, v_vix, v_viy, v_viz, &icount_align);
v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
&icount_align);
mask_t int_mask, int_mask2;
vec_init_mask_true(int_mask);
......@@ -216,8 +225,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
&int_cache->r2q[j], &int_cache->dxq[j], &int_cache->dyq[j],
&int_cache->dzq[j], v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[j],
&int_cache->vyq[j], &int_cache->vzq[j], &int_cache->mq[j], v_rhoSum,
v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
v_curlvzSum, int_mask, int_mask2, 0);
v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum, int_mask, int_mask2, 0);
}
/* Reset interaction count. */
......@@ -574,8 +583,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum;
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hi. */
vector v_hi_inv;
......@@ -671,24 +680,25 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
* cache. */
if (doi_mask) {
storeInteractions(doi_mask, pjd, &v_r2, &v_dx, &v_dy, &v_dz, cell_cache,
&int_cache, &icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
&int_cache, &icount, &v_rhoSum, &v_rho_dhSum,
&v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
v_vix, v_viy, v_viz);
}
if (doi_mask2) {
storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_2, &v_dy_2,
&v_dz_2, cell_cache, &int_cache, &icount, &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv, v_vix,
v_viy, v_viz);
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_hi_inv, v_vix, v_viy, v_viz);
}
}
/* Perform padded vector remainder interactions if any are present. */
calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
&icount_align);
calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum,
&v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
v_vix, v_viy, v_viz, &icount_align);
/* Initialise masks to true in case remainder interactions have been
* performed. */
......@@ -702,9 +712,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
&int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
&int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
&int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
&int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum, int_mask, int_mask2,
0);
&int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, int_mask, int_mask2, 0);
}
/* Perform horizontal adds on vector sums and store result in particle pi.
......@@ -903,8 +913,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
&cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
&cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, &v_a_hydro_xSum,
&v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doi_mask);
&v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum,
&v_entropy_dtSum, v_doi_mask);
}
} /* Loop over all other particles. */
......@@ -1030,8 +1040,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
/* Read the needed particles into the two caches. */
cache_read_two_partial_cells_sorted(ci, cj, ci_cache, cj_cache, sort_i,
sort_j, shift, &first_pi,
&last_pj);
sort_j, shift, &first_pi, &last_pj);
/* Get the number of particles read into the ci cache. */
int ci_cache_count = count_i - first_pi;
......@@ -1073,8 +1082,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum;
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hi. */
vector v_hi_inv;
......@@ -1113,8 +1122,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
cj_cache_idx, last_pj);
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
last_pj);
}
#endif
......@@ -1146,9 +1155,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
runner_iact_nonsym_1_vec_density(
&v_r2, &v_dx, &v_dy, &v_dz, v_hi_inv, v_vix, v_viy, v_viz,
&cj_cache->vx[cj_cache_idx], &cj_cache->vy[cj_cache_idx],
&cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx], &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
&v_curlvySum, &v_curlvzSum, v_doi_mask);
&cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx],
&v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_doi_mask);
} /* loop over the parts in cj. */
......@@ -1203,8 +1213,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_hjg2.v = vec_set1(hjg2);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
v_curlvySum, v_curlvzSum;
vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
v_curlvxSum, v_curlvySum, v_curlvzSum;
/* Get the inverse of hj. */
vector v_hj_inv;
......@@ -1238,8 +1248,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0 ||
ci_cache_idx + (VEC_SIZE - 1) >
(count_i - first_pi + VEC_SIZE)) {
ci_cache_idx + (VEC_SIZE - 1) > (count_i - first_pi + VEC_SIZE)) {
error(
"Unaligned read!!! ci_cache_idx=%d, first_pi=%d, "
"count_i=%d",
......@@ -1277,9 +1286,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
runner_iact_nonsym_1_vec_density(
&v_r2, &v_dx, &v_dy, &v_dz, v_hj_inv, v_vjx, v_vjy, v_vjz,
&ci_cache->vx[ci_cache_idx], &ci_cache->vy[ci_cache_idx],
&ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx], &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
&v_curlvySum, &v_curlvzSum, v_doj_mask);
&ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx],
&v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_doj_mask);
} /* loop over the parts in ci. */
......@@ -1351,7 +1361,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Check if any particles are active and return if there are none. */
int numActive = 0;
/* Use the largest smoothing length to make sure that no interactions are missed. */
/* Use the largest smoothing length to make sure that no interactions are
* missed. */
const double h_max = max(hi_max, hj_max);
if (active_ci) {
......@@ -1416,8 +1427,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Read the needed particles into the two caches. */
cache_read_two_partial_cells_sorted_force(ci, cj, ci_cache, cj_cache, sort_i,
sort_j, shift, &first_pi,
&last_pj);
sort_j, shift, &first_pi, &last_pj);
/* Get the number of particles read into the ci cache. */
int ci_cache_count = count_i - first_pi;
......@@ -1465,8 +1475,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
v_entropy_dtSum;
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
v_sigSum, v_entropy_dtSum;
/* Get the inverse of hi. */
vector v_hi_inv;
......@@ -1502,8 +1512,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
cj_cache_idx, last_pj);
error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
last_pj);
}
#endif
......@@ -1547,8 +1557,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&cj_cache->grad_h[cj_cache_idx], &cj_cache->pOrho2[cj_cache_idx],
&cj_cache->balsara[cj_cache_idx],
&cj_cache->soundspeed[cj_cache_idx], &cj_cache->m[cj_cache_idx],
v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
&v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doi_mask);
v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doi_mask);
}
} /* loop over the parts in cj. */
......@@ -1610,8 +1621,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_hjg2.v = vec_set1(hjg2);
/* Reset cumulative sums of update vectors. */
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
v_entropy_dtSum;
vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
v_sigSum, v_entropy_dtSum;
/* Get the inverse of hj. */
vector v_hj_inv;
......@@ -1689,8 +1700,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&ci_cache->grad_h[ci_cache_idx], &ci_cache->pOrho2[ci_cache_idx],
&ci_cache->balsara[ci_cache_idx],
&ci_cache->soundspeed[ci_cache_idx], &ci_cache->m[ci_cache_idx],
v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
&v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doj_mask);
v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
&v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
v_doj_mask);
}
} /* loop over the parts in ci. */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment