Commit e33d87f3 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

No need to zero the acceleration caches before use.

parent 95cb0e07
......@@ -127,12 +127,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param zero_output Do we need to zero the output caches ?
* @param shift A shift to apply to all the particles.
*/
__attribute__((always_inline)) INLINE void gravity_cache_populate(
struct gravity_cache *c, const struct gpart *restrict gparts, int gcount,
int gcount_padded, int zero_output, double shift[3]) {
int gcount_padded, double shift[3]) {
/* Make the compiler understand we are in happy vectorization land */
float *restrict x = c->x;
......@@ -140,17 +139,11 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
float *restrict z = c->z;
float *restrict m = c->m;
float *restrict epsilon = c->epsilon;
float *restrict a_x = c->a_x;
float *restrict a_y = c->a_y;
float *restrict a_z = c->a_z;
swift_align_information(x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(z, SWIFT_CACHE_ALIGNMENT);
swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT);
swift_align_information(m, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
......@@ -174,13 +167,6 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
epsilon[i] = 0.f;
m[i] = 0.f;
}
/* Zero the output caches */
if (zero_output) {
bzero(a_x, gcount_padded * sizeof(float));
bzero(a_y, gcount_padded * sizeof(float));
bzero(a_z, gcount_padded * sizeof(float));
}
}
/**
......@@ -191,11 +177,10 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param zero_output Do we need to zero the output caches ?
*/
__attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
struct gravity_cache *c, const struct gpart *restrict gparts, int gcount,
int gcount_padded, int zero_output) {
int gcount_padded) {
/* Make the compiler understand we are in happy vectorization land */
float *restrict x = c->x;
......@@ -203,17 +188,12 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
float *restrict z = c->z;
float *restrict m = c->m;
float *restrict epsilon = c->epsilon;
float *restrict a_x = c->a_x;
float *restrict a_y = c->a_y;
float *restrict a_z = c->a_z;
swift_align_information(x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(z, SWIFT_CACHE_ALIGNMENT);
swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT);
swift_align_information(m, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT);
swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
for (int i = 0; i < gcount; ++i) {
......@@ -236,13 +216,6 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
epsilon[i] = 0.f;
m[i] = 0.f;
}
/* Zero the output caches */
if (zero_output) {
bzero(a_x, gcount_padded * sizeof(float));
bzero(a_y, gcount_padded * sizeof(float));
bzero(a_z, gcount_padded * sizeof(float));
}
}
/**
......
......@@ -188,10 +188,9 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
/* Fill the caches */
gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i,
ci_active, shift);
gravity_cache_populate_no_shift(cj_cache, gparts_j, gcount_j, gcount_padded_j,
cj_active);
gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, shift);
gravity_cache_populate_no_shift(cj_cache, gparts_j, gcount_j,
gcount_padded_j);
/* Ok... Here we go ! */
......@@ -281,9 +280,9 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
ci_cache->a_x[pid] += a_x;
ci_cache->a_y[pid] += a_y;
ci_cache->a_z[pid] += a_z;
ci_cache->a_x[pid] = a_x;
ci_cache->a_y[pid] = a_y;
ci_cache->a_z[pid] = a_z;
}
}
......@@ -374,9 +373,9 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
cj_cache->a_x[pjd] += a_x;
cj_cache->a_y[pjd] += a_y;
cj_cache->a_z[pjd] += a_z;
cj_cache->a_x[pjd] = a_x;
cj_cache->a_y[pjd] = a_y;
cj_cache->a_z[pjd] = a_z;
}
}
......@@ -531,10 +530,9 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
/* Fill the caches */
gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i,
ci_active, shift);
gravity_cache_populate_no_shift(cj_cache, gparts_j, gcount_j, gcount_padded_j,
cj_active);
gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, shift);
gravity_cache_populate_no_shift(cj_cache, gparts_j, gcount_j,
gcount_padded_j);
/* Ok... Here we go ! */
......@@ -629,9 +627,9 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
ci_cache->a_x[pid] += a_x;
ci_cache->a_y[pid] += a_y;
ci_cache->a_z[pid] += a_z;
ci_cache->a_x[pid] = a_x;
ci_cache->a_y[pid] = a_y;
ci_cache->a_z[pid] = a_z;
}
}
......@@ -727,9 +725,9 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
cj_cache->a_x[pjd] += a_x;
cj_cache->a_y[pjd] += a_y;
cj_cache->a_z[pjd] += a_z;
cj_cache->a_x[pjd] = a_x;
cj_cache->a_y[pjd] = a_y;
cj_cache->a_z[pjd] = a_z;
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment