Commit b79dbe1c authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch '528-ghost-task-data-race' into 'master'

Resolve "Ghost task data race?"

Closes #528

See merge request !728
parents 870ba5ce d6eea398
...@@ -254,6 +254,83 @@ __attribute__((always_inline)) INLINE int cache_read_particles( ...@@ -254,6 +254,83 @@ __attribute__((always_inline)) INLINE int cache_read_particles(
#endif #endif
} }
/**
* @brief Populate cache by reading in the particles in unsorted order for
* doself_subset.
*
* @param ci The #cell.
* @param ci_cache The cache.
* @return uninhibited_count The no. of uninhibited particles.
*/
__attribute__((always_inline)) INLINE int cache_read_particles_subset_self(
const struct cell *restrict const ci,
struct cache *restrict const ci_cache) {
#if defined(GADGET2_SPH)
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
const int count = ci->hydro.count;
const struct part *restrict parts = ci->hydro.parts;
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < count; i++) {
/* Pad inhibited particles. */
if (parts[i].time_bin >= time_bin_inhibited) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
continue;
}
x[i] = (float)(parts[i].x[0] - loc[0]);
y[i] = (float)(parts[i].x[1] - loc[1]);
z[i] = (float)(parts[i].x[2] - loc[2]);
m[i] = parts[i].mass;
vx[i] = parts[i].v[0];
vy[i] = parts[i].v[1];
vz[i] = parts[i].v[2];
}
/* Pad cache if the no. of particles is not a multiple of double the vector
* length. */
int count_align = count;
const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
if (rem != 0) {
count_align += (NUM_VEC_PROC * VEC_SIZE) - rem;
/* Set positions to something outside of the range of any particle */
for (int i = count; i < count_align; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
}
}
return count_align;
#else
error("Can't call the cache reading function with this flavour of SPH!");
return 0;
#endif
}
/** /**
* @brief Populate cache by only reading particles that are within range of * @brief Populate cache by only reading particles that are within range of
* each other within the adjoining cell. Also read the particles into the cache * each other within the adjoining cell. Also read the particles into the cache
...@@ -268,7 +345,7 @@ __attribute__((always_inline)) INLINE int cache_read_particles( ...@@ -268,7 +345,7 @@ __attribute__((always_inline)) INLINE int cache_read_particles(
* @param loc The cell location to remove from the particle positions. * @param loc The cell location to remove from the particle positions.
* @param flipped Flag to check whether the cells have been flipped or not. * @param flipped Flag to check whether the cells have been flipped or not.
*/ */
__attribute__((always_inline)) INLINE void cache_read_particles_subset( __attribute__((always_inline)) INLINE void cache_read_particles_subset_pair(
const struct cell *restrict const ci, struct cache *restrict const ci_cache, const struct cell *restrict const ci, struct cache *restrict const ci_cache,
const struct entry *restrict sort_i, int *first_pi, int *last_pi, const struct entry *restrict sort_i, int *first_pi, int *last_pi,
const double *loc, const int flipped) { const double *loc, const int flipped) {
...@@ -280,7 +357,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -280,7 +357,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, h, ci_cache->h, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
...@@ -303,7 +379,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -303,7 +379,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx), -(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)}; -(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame so single precision can be /* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */ * used instead of double precision. */
...@@ -315,8 +390,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -315,8 +390,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
x[i] = pos_padded[0]; x[i] = pos_padded[0];
y[i] = pos_padded[1]; y[i] = pos_padded[1];
z[i] = pos_padded[2]; z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f; m[i] = 1.f;
vx[i] = 1.f; vx[i] = 1.f;
vy[i] = 1.f; vy[i] = 1.f;
...@@ -328,7 +401,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -328,7 +401,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
x[i] = (float)(parts[idx].x[0] - loc[0]); x[i] = (float)(parts[idx].x[0] - loc[0]);
y[i] = (float)(parts[idx].x[1] - loc[1]); y[i] = (float)(parts[idx].x[1] - loc[1]);
z[i] = (float)(parts[idx].x[2] - loc[2]); z[i] = (float)(parts[idx].x[2] - loc[2]);
h[i] = parts[idx].h;
m[i] = parts[idx].mass; m[i] = parts[idx].mass;
vx[i] = parts[idx].v[0]; vx[i] = parts[idx].v[0];
vy[i] = parts[idx].v[1]; vy[i] = parts[idx].v[1];
...@@ -342,7 +414,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -342,7 +414,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
x[i] = pos_padded[0]; x[i] = pos_padded[0];
y[i] = pos_padded[1]; y[i] = pos_padded[1];
z[i] = pos_padded[2]; z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f; m[i] = 1.f;
vx[i] = 1.f; vx[i] = 1.f;
...@@ -366,7 +437,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -366,7 +437,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx), -(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)}; -(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame so single precision can be /* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */ * used instead of double precision. */
...@@ -378,7 +448,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -378,7 +448,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
x[i] = pos_padded[0]; x[i] = pos_padded[0];
y[i] = pos_padded[1]; y[i] = pos_padded[1];
z[i] = pos_padded[2]; z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f; m[i] = 1.f;
vx[i] = 1.f; vx[i] = 1.f;
...@@ -391,7 +460,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -391,7 +460,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
x[i] = (float)(parts[idx].x[0] - loc[0]); x[i] = (float)(parts[idx].x[0] - loc[0]);
y[i] = (float)(parts[idx].x[1] - loc[1]); y[i] = (float)(parts[idx].x[1] - loc[1]);
z[i] = (float)(parts[idx].x[2] - loc[2]); z[i] = (float)(parts[idx].x[2] - loc[2]);
h[i] = parts[idx].h;
m[i] = parts[idx].mass; m[i] = parts[idx].mass;
vx[i] = parts[idx].v[0]; vx[i] = parts[idx].v[0];
vy[i] = parts[idx].v[1]; vy[i] = parts[idx].v[1];
...@@ -406,7 +474,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset( ...@@ -406,7 +474,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
x[i] = pos_padded[0]; x[i] = pos_padded[0];
y[i] = pos_padded[1]; y[i] = pos_padded[1];
z[i] = pos_padded[2]; z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f; m[i] = 1.f;
vx[i] = 1.f; vx[i] = 1.f;
......
...@@ -896,7 +896,7 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c, ...@@ -896,7 +896,7 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c,
if (cell_cache->count < count) cache_init(cell_cache, count); if (cell_cache->count < count) cache_init(cell_cache, count);
/* Read the particles from the cell and store them locally in the cache. */ /* Read the particles from the cell and store them locally in the cache. */
const int count_align = cache_read_particles(c, cell_cache); const int count_align = cache_read_particles_subset_self(c, cell_cache);
/* Create secondary cache to store particle interactions. */ /* Create secondary cache to store particle interactions. */
struct c2_cache int_cache; struct c2_cache int_cache;
...@@ -1763,7 +1763,8 @@ void runner_dopair_subset_density_vec(struct runner *r, ...@@ -1763,7 +1763,8 @@ void runner_dopair_subset_density_vec(struct runner *r,
runner_shift_x, runner_shift_y, runner_shift_z, sort_j, max_index_i, 0); runner_shift_x, runner_shift_y, runner_shift_z, sort_j, max_index_i, 0);
/* Read the particles from the cell and store them locally in the cache. */ /* Read the particles from the cell and store them locally in the cache. */
cache_read_particles_subset(cj, cj_cache, sort_j, 0, &last_pj, ci->loc, 0); cache_read_particles_subset_pair(cj, cj_cache, sort_j, 0, &last_pj, ci->loc,
0);
const double dj_min = sort_j[0].d; const double dj_min = sort_j[0].d;
...@@ -1899,7 +1900,8 @@ void runner_dopair_subset_density_vec(struct runner *r, ...@@ -1899,7 +1900,8 @@ void runner_dopair_subset_density_vec(struct runner *r,
runner_shift_x, runner_shift_y, runner_shift_z, sort_j, max_index_i, 1); runner_shift_x, runner_shift_y, runner_shift_z, sort_j, max_index_i, 1);
/* Read the particles from the cell and store them locally in the cache. */ /* Read the particles from the cell and store them locally in the cache. */
cache_read_particles_subset(cj, cj_cache, sort_j, &first_pj, 0, ci->loc, 1); cache_read_particles_subset_pair(cj, cj_cache, sort_j, &first_pj, 0,
ci->loc, 1);
/* Get the number of particles read into the ci cache. */ /* Get the number of particles read into the ci cache. */
const int cj_cache_count = count_j - first_pj; const int cj_cache_count = count_j - first_pj;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment