Commit 6ddfd467 authored by James Willis's avatar James Willis
Browse files

Formatting and comments.

parent d71aa3df
...@@ -296,11 +296,16 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -296,11 +296,16 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const struct entry *restrict sort_j, const double *restrict const shift, const struct entry *restrict sort_j, const double *restrict const shift,
int *first_pi, int *last_pj) { int *first_pi, int *last_pj) {
/* Pad number of particles read to the vector size. */ /* Make the number of particles to be read a multiple of the vector size.
* This eliminates serial remainder loops where possible when populating the
* cache. */
/* Is the number of particles to read a multiple of the vector size? */
int rem = (ci->count - *first_pi) % VEC_SIZE; int rem = (ci->count - *first_pi) % VEC_SIZE;
if (rem != 0) { if (rem != 0) {
int pad = VEC_SIZE - rem; int pad = VEC_SIZE - rem;
/* Decrease first_pi if there are particles in the cell left to read. */
if (*first_pi - pad >= 0) *first_pi -= pad; if (*first_pi - pad >= 0) *first_pi -= pad;
} }
...@@ -308,6 +313,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -308,6 +313,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
if (rem != 0) { if (rem != 0) {
int pad = VEC_SIZE - rem; int pad = VEC_SIZE - rem;
/* Increase last_pj if there are particles in the cell left to read. */
if (*last_pj + pad < cj->count) *last_pj += pad; if (*last_pj + pad < cj->count) *last_pj += pad;
} }
...@@ -504,11 +510,16 @@ cache_read_two_partial_cells_sorted_force( ...@@ -504,11 +510,16 @@ cache_read_two_partial_cells_sorted_force(
const struct entry *restrict sort_i, const struct entry *restrict sort_j, const struct entry *restrict sort_i, const struct entry *restrict sort_j,
const double *const shift, int *first_pi, int *last_pj) { const double *const shift, int *first_pi, int *last_pj) {
/* Pad number of particles read to the vector size. */ /* Make the number of particles to be read a multiple of the vector size.
* This eliminates serial remainder loops where possible when populating the
* cache. */
/* Is the number of particles to read a multiple of the vector size? */
int rem = (ci->count - *first_pi) % VEC_SIZE; int rem = (ci->count - *first_pi) % VEC_SIZE;
if (rem != 0) { if (rem != 0) {
int pad = VEC_SIZE - rem; int pad = VEC_SIZE - rem;
/* Decrease first_pi if there are particles in the cell left to read. */
if (*first_pi - pad >= 0) *first_pi -= pad; if (*first_pi - pad >= 0) *first_pi -= pad;
} }
...@@ -516,6 +527,7 @@ cache_read_two_partial_cells_sorted_force( ...@@ -516,6 +527,7 @@ cache_read_two_partial_cells_sorted_force(
if (rem != 0) { if (rem != 0) {
int pad = VEC_SIZE - rem; int pad = VEC_SIZE - rem;
/* Increase last_pj if there are particles in the cell left to read. */
if (*last_pj + pad < cj->count) *last_pj += pad; if (*last_pj + pad < cj->count) *last_pj += pad;
} }
......
...@@ -297,7 +297,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( ...@@ -297,7 +297,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
const float first_di = const float first_di =
sort_i[first_pi].d + pi->h * kernel_gamma + dx_max - rshift; sort_i[first_pi].d + pi->h * kernel_gamma + dx_max - rshift;
/* Loop through particles in cell j until they are not in range of pi. */ /* Loop through particles in cell j until they are not in range of pi.
* Make sure that temp stays between 0 and cj->count - 1.*/
while (temp < cj->count - 1 && first_di > sort_j[temp].d) temp++; while (temp < cj->count - 1 && first_di > sort_j[temp].d) temp++;
max_index_i[first_pi] = temp; max_index_i[first_pi] = temp;
...@@ -309,6 +310,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( ...@@ -309,6 +310,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
const float di = sort_i[i].d + pi->h * kernel_gamma + dx_max - rshift; const float di = sort_i[i].d + pi->h * kernel_gamma + dx_max - rshift;
/* Make sure that temp stays between 0 and cj->count - 1.*/
while (temp < cj->count - 1 && di > sort_j[temp].d) temp++; while (temp < cj->count - 1 && di > sort_j[temp].d) temp++;
max_index_i[i] = temp; max_index_i[i] = temp;
...@@ -438,7 +440,8 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, ...@@ -438,7 +440,8 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
max(pi->h, hj_max_raw) * kernel_gamma + dx_max - max(pi->h, hj_max_raw) * kernel_gamma + dx_max -
rshift; rshift;
/* Loop through particles in cell j until they are not in range of pi. */ /* Loop through particles in cell j until they are not in range of pi.
* Make sure that temp stays between 0 and cj->count - 1.*/
while (temp < cj->count - 1 && first_di > sort_j[temp].d) temp++; while (temp < cj->count - 1 && first_di > sort_j[temp].d) temp++;
max_index_i[first_pi] = temp; max_index_i[first_pi] = temp;
...@@ -451,6 +454,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, ...@@ -451,6 +454,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
const float di = const float di =
sort_i[i].d + max(pi->h, hj_max_raw) * kernel_gamma + dx_max - rshift; sort_i[i].d + max(pi->h, hj_max_raw) * kernel_gamma + dx_max - rshift;
/* Make sure that temp stays between 0 and cj->count - 1.*/
while (temp < cj->count - 1 && di > sort_j[temp].d) temp++; while (temp < cj->count - 1 && di > sort_j[temp].d) temp++;
max_index_i[i] = temp; max_index_i[i] = temp;
...@@ -1092,7 +1096,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1092,7 +1096,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
exit_iteration_align += pad; exit_iteration_align += pad;
} }
/* Loop over the parts in cj. */ /* Loop over the parts in cj. Making sure to perform an iteration of the
* loop even if exit_iteration_align is zero and there is only one
* particle to interact with.*/
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
...@@ -1473,7 +1479,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1473,7 +1479,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
exit_iteration_align += pad; exit_iteration_align += pad;
} }
/* Loop over the parts in cj. */ /* Loop over the parts in cj. Making sure to perform an iteration of the
* loop even if exit_iteration_align is zero and there is only one
* particle to interact with.*/
for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) { for (int pjd = 0; pjd <= exit_iteration_align; pjd += VEC_SIZE) {
/* Get the cache index to the jth particle. */ /* Get the cache index to the jth particle. */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment