diff --git a/src/cache.h b/src/cache.h index 6e014dc831bdf5069a2309c4fddc316e70d5d5a4..0e138cc5f0ae19e858aeb62f5172e88c9e6a3a9b 100644 --- a/src/cache.h +++ b/src/cache.h @@ -216,6 +216,21 @@ __attribute__((always_inline)) INLINE void cache_read_particles( #endif } +/** + * @brief Populate cache by only reading particles that are within range of + * each other within the adjoining cell. Also read the particles into the cache + * in sorted order. + * + * @param ci The i #cell. + * @param ci_cache The #cache for cell ci. + * @param sort_i The array of sorted particle indices for cell ci. + * @param shift The amount to shift the particle positions to account for BCs + * @param first_pi The first particle in cell ci that is in range. + * @param last_pi The last particle in cell ci that is in range. + * @param last_pi The last particle in cell ci that is in range. + * @param loc The cell location to remove from the particle positions. + * @param flipped Flag to check whether the cells have been flipped or not. + */ __attribute__((always_inline)) INLINE void cache_read_particles_subpair( const struct cell *restrict const ci, struct cache *restrict const ci_cache, const struct entry *restrict sort_i, int *first_pi, int *last_pi, const double *loc, const int flipped) { @@ -235,17 +250,19 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subpair( const struct part *restrict parts = ci->parts; - /* Shift the particles positions to a local frame so single precision can be - * used instead of double precision. */ + /* The cell is on the right so read the particles + * into the cache from the start of the cell. */ if(!flipped) { int rem = (*last_pi + 1) % VEC_SIZE; if (rem != 0) { int pad = VEC_SIZE - rem; - /* Increase last_pj if there are particles in the cell left to read. */ + /* Increase last_pi if there are particles in the cell left to read. */ if (*last_pi + pad < ci->count) *last_pi += pad; } + /* Shift the particles positions to a local frame so single precision can be + * used instead of double precision. */ for (int i = 0; i < *last_pi; i++) { const int idx = sort_i[i].i; x[i] = (float)(parts[idx].x[0] - loc[0]); @@ -277,8 +294,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subpair( vz[i] = 1.f; } } + /* The cell is on the left so read the particles + * into the cache from the end of the cell. */ else { - int rem = (ci->count - *first_pi) % VEC_SIZE; if (rem != 0) { int pad = VEC_SIZE - rem; @@ -289,10 +307,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subpair( int ci_cache_count = ci->count - *first_pi; - //message("ci_cache_count: %d, count_i: %d, first_pi: %d", ci_cache_count, ci->count, *first_pi); - - /* Shift the particles positions to a local frame (ci frame) so single - * precision can be used instead of double precision. */ + /* Shift the particles positions to a local frame so single precision can be + * used instead of double precision. */ for (int i = 0; i < ci_cache_count; i++) { const int idx = sort_i[i + *first_pi].i; x[i] = (float)(parts[idx].x[0] - loc[0]); @@ -402,7 +418,6 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles( * @param shift The amount to shift the particle positions to account for BCs * @param first_pi The first particle in cell ci that is in range. * @param last_pj The last particle in cell cj that is in range. - * interaction. */ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( const struct cell *restrict const ci, const struct cell *restrict const cj, @@ -620,7 +635,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( * @param shift The amount to shift the particle positions to account for BCs * @param first_pi The first particle in cell ci that is in range. * @param last_pj The last particle in cell cj that is in range. - * interaction. */ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted_force( diff --git a/src/runner_doiact.h b/src/runner_doiact.h index a605f5d65b20bfeb9b03164ea937ace2cf66b0a5..2b29f8239e7b13b87706e23e7bd424168b7a6503 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -470,6 +470,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { * @param ind The list of indices of particles in @c ci to interact with. * @param count The number of particles in @c ind. * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. */ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, @@ -539,6 +540,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, * @param ind The list of indices of particles in @c ci to interact with. * @param count The number of particles in @c ind. * @param cj The second #cell. + * @param sid The direction of the pair. + * @param flipped Flag to check whether the cells have been flipped or not. + * @param shift The shift vector to apply to the particles in ci. */ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, @@ -569,11 +573,9 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; -// int ctr = 0; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { -// ctr++; /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; const float hj = pj->h; @@ -599,7 +601,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, IACT_NONSYM(r2, dx, hi, hj, pi, pj); } } /* loop over the parts in cj. */ -// message("pi: %lld, iterations in inner loop: %d", pi->id, ctr); } /* loop over the parts in ci. */ } @@ -783,7 +784,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. - * @param sid The direction of the pair + * @param sid The direction of the pair. * @param shift The shift vector to apply to the particles in ci. */ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 1d32d954110996e859ce22e04932b420d251281a..032f3246c6087d5893916d4dbc3a664e6d25e2ab 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -513,29 +513,25 @@ populate_max_index_force( } /** - * @brief Populates the arrays max_index_i and max_index_j with the maximum - * indices of - * particles into their neighbouring cells. Also finds the first pi that - * interacts with any particle in cj and the last pj that interacts with any - * particle in ci. + * @brief Populates the array max_index_i with the maximum + * index of + * particles into the neighbouring cell. Also finds the first/last pj that + * interacts with any particle in ci. * - * @param ci #cell pointer to ci - * @param cj #cell pointer to cj - * @param sort_i #entry array for particle distance in ci + * @param count_i The number of particles in ci. + * @param count_j The number of particles in cj. + * @param parts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param total_ci_shift The shift vector to apply to the particles in ci. + * @param dxj Maximum particle movement allowed in cell cj. + * @param di_shift_correction The correction to di after the particles have been shifted to the frame of cell ci. + * @param runner_shift_x The runner_shift in the x direction. + * @param runner_shift_y The runner_shift in the y direction. + * @param runner_shift_z The runner_shift in the z direction. * @param sort_j #entry array for particle distance in cj - * @param dx_max maximum particle movement allowed in cell - * @param rshift cutoff shift - * @param hi_max Maximal smoothing length in cell ci - * @param hj_max Maximal smoothing length in cell cj - * @param di_max Maximal position on the axis that can interact in cell ci - * @param dj_min Minimal position on the axis that can interact in cell ci - * @param max_index_i array to hold the maximum distances of pi particles into - * #cell cj - * @param max_index_j array to hold the maximum distances of pj particles into - * #cell cj - * @param init_pi first pi to interact with a pj particle - * @param init_pj last pj to interact with a pi particle - * @param max_active_bin The largest time-bin active during this step. + * @param max_index_i array to hold the maximum distances of pi particles into #cell cj + * @param flipped Flag to check whether the cells have been flipped or not. + * @return first_pj/last_pj first or last pj to interact with any particle in ci depending whether the cells have been flipped or not. */ __attribute__((always_inline)) INLINE static int populate_max_index_subset( const int count_i, @@ -551,6 +547,8 @@ __attribute__((always_inline)) INLINE static int populate_max_index_subset( const struct entry *restrict sort_j, int *max_index_i, const int flipped) { + /* The cell is on the right so read the particles + * into the cache from the start of the cell. */ if(!flipped) { /* Find the rightmost particle in cell j that interacts with any @@ -573,6 +571,8 @@ __attribute__((always_inline)) INLINE static int populate_max_index_subset( } return last_pj; } + /* The cell is on the left so read the particles + * into the cache from the end of the cell. */ else { int first_pj = count_j - 1; @@ -1626,6 +1626,9 @@ __attribute__((always_inline)) INLINE void runner_dopair1_density_vec(struct run * @param ind The list of indices of particles in @c ci to interact with. * @param count The number of particles in @c ind. * @param cj The second #cell. + * @param sid The direction of the pair. + * @param flipped Flag to check whether the cells have been flipped or not. + * @param shift The shift vector to apply to the particles in ci. */ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, @@ -1655,6 +1658,7 @@ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(stru const double total_ci_shift[3] = { ci->loc[0] + shift[0], ci->loc[1] + shift[1], ci->loc[2] + shift[2]}; + /* Calculate the correction to di after the particles have been shifted to the frame of cell ci. */ const double di_shift_correction = ci->loc[0]*runner_shift_x + ci->loc[1]*runner_shift_y + ci->loc[2]*runner_shift_z; @@ -1663,7 +1667,6 @@ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(stru for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; int *restrict max_index_i SWIFT_CACHE_ALIGN; - max_index_i = r->ci_cache.max_index; /* Parts are on the left? */ @@ -1715,10 +1718,10 @@ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(stru vector v_curlvySum = vector_setzero(); vector v_curlvzSum = vector_setzero(); - int exit_iteration = max_index_i[pid]; + int exit_iteration_end = max_index_i[pid] + 1; /* Loop over the parts in cj. */ - for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) { + for (int pjd = 0; pjd < exit_iteration_end; pjd += VEC_SIZE) { /* Get the cache index to the jth particle. */ const int cj_cache_idx = pjd;