Commit e6bc5754 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'parallel_get_index' into 'master'

Parallel space_rebuild()

Looking at some of the scaling results, it turns out that the last remaining significant chunk of non-parallel code is `space_rebuild()` and the majority of the time in there is spent computing the cell index of the particles. 
This can easily done in parallel and on the EAGLE_25 shows significant improvements in the code speed and scalability. Although I should say that this comes from running this on 16 cores only and based on the vTune outputs (which usually match the actual tests).

What do you think ?

See merge request !260
parents c69268c3 2b879b1c
...@@ -43,4 +43,18 @@ ...@@ -43,4 +43,18 @@
_a > _b ? _a : _b; \ _a > _b ? _a : _b; \
}) })
/**
* @brief Limits the value of x to be between a and b
*
* Only wraps once. If x > 2b, the returned value will be larger than b.
* Similarly for x < -b.
*/
#define box_wrap(x, a, b) \
({ \
const __typeof__(x) _x = (x); \
const __typeof__(a) _a = (a); \
const __typeof__(b) _b = (b); \
_x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \
})
#endif /* SWIFT_MINMAX_H */ #endif /* SWIFT_MINMAX_H */
...@@ -112,6 +112,15 @@ struct parallel_sort { ...@@ -112,6 +112,15 @@ struct parallel_sort {
volatile unsigned int first, last, waiting; volatile unsigned int first, last, waiting;
}; };
/**
* @brief Information required to compute the particle cell indices.
*/
struct index_data {
struct space *s;
struct cell *cells;
int *ind;
};
/** /**
* @brief Get the shift-id of the given pair of cells, swapping them * @brief Get the shift-id of the given pair of cells, swapping them
* if need be. * if need be.
...@@ -432,49 +441,21 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ...@@ -432,49 +441,21 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
struct cell *restrict cells_top = s->cells_top; struct cell *restrict cells_top = s->cells_top;
const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
/* Run through the particles and get their cell index. */ /* Run through the particles and get their cell index. */
// tic = getticks();
const size_t ind_size = s->size_parts; const size_t ind_size = s->size_parts;
int *ind; int *ind;
if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
error("Failed to allocate temporary particle indices."); error("Failed to allocate temporary particle indices.");
for (size_t k = 0; k < nr_parts; k++) { if (ind_size > 0) space_parts_get_cell_index(s, ind, cells_top, verbose);
struct part *restrict p = &s->parts[k]; for (size_t i = 0; i < ind_size; ++i) cells_top[ind[i]].count++;
for (int j = 0; j < 3; j++)
if (p->x[j] < 0.0)
p->x[j] += dim[j];
else if (p->x[j] >= dim[j])
p->x[j] -= dim[j];
ind[k] =
cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
cells_top[ind[k]].count++;
}
// message( "getting particle indices took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit()):
/* Run through the gravity particles and get their cell index. */ /* Run through the gravity particles and get their cell index. */
// tic = getticks();
const size_t gind_size = s->size_gparts; const size_t gind_size = s->size_gparts;
int *gind; int *gind;
if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
error("Failed to allocate temporary g-particle indices."); error("Failed to allocate temporary g-particle indices.");
for (size_t k = 0; k < nr_gparts; k++) { if (gind_size > 0) space_gparts_get_cell_index(s, gind, cells_top, verbose);
struct gpart *restrict gp = &s->gparts[k]; for (size_t i = 0; i < gind_size; ++i) cells_top[gind[i]].gcount++;
for (int j = 0; j < 3; j++)
if (gp->x[j] < 0.0)
gp->x[j] += dim[j];
else if (gp->x[j] >= dim[j])
gp->x[j] -= dim[j];
gind[k] =
cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
cells_top[gind[k]].gcount++;
}
// message( "getting g-particle indices took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
#ifdef WITH_MPI #ifdef WITH_MPI
...@@ -578,6 +559,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ...@@ -578,6 +559,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
ind = ind_new; ind = ind_new;
} }
const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
/* Assign each particle to its cell. */ /* Assign each particle to its cell. */
for (size_t k = nr_parts; k < s->nr_parts; k++) { for (size_t k = nr_parts; k < s->nr_parts; k++) {
const struct part *const p = &s->parts[k]; const struct part *const p = &s->parts[k];
...@@ -752,6 +736,164 @@ void space_sanitize(struct space *s) { ...@@ -752,6 +736,164 @@ void space_sanitize(struct space *s) {
} }
} }
/**
* @brief #threadpool mapper function to compute the particle cell indices.
*
* @param map_data Pointer towards the particles.
* @param nr_parts The number of particles to treat.
* @param extra_data Pointers to the space and index list
*/
void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
void *extra_data) {
/* Unpack the data */
struct part *restrict parts = (struct part *)map_data;
struct index_data *data = (struct index_data *)extra_data;
struct space *s = data->s;
int *const ind = data->ind + (ptrdiff_t)(parts - s->parts);
/* Get some constants */
const double dim_x = s->dim[0];
const double dim_y = s->dim[1];
const double dim_z = s->dim[2];
const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
const double ih_x = s->iwidth[0];
const double ih_y = s->iwidth[1];
const double ih_z = s->iwidth[2];
for (int k = 0; k < nr_parts; k++) {
/* Get the particle */
struct part *restrict p = &parts[k];
const double old_pos_x = p->x[0];
const double old_pos_y = p->x[1];
const double old_pos_z = p->x[2];
/* Put it back into the simulation volume */
const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
/* Get its cell index */
const int index =
cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
ind[k] = index;
/* Update the position */
p->x[0] = pos_x;
p->x[1] = pos_y;
p->x[2] = pos_z;
}
}
/**
* @brief #threadpool mapper function to compute the g-particle cell indices.
*
* @param map_data Pointer towards the g-particles.
* @param nr_gparts The number of g-particles to treat.
* @param extra_data Pointers to the space and index list
*/
void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts,
void *extra_data) {
/* Unpack the data */
struct gpart *restrict gparts = (struct gpart *)map_data;
struct index_data *data = (struct index_data *)extra_data;
struct space *s = data->s;
int *const ind = data->ind + (ptrdiff_t)(gparts - s->gparts);
/* Get some constants */
const double dim_x = s->dim[0];
const double dim_y = s->dim[1];
const double dim_z = s->dim[2];
const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
const double ih_x = s->iwidth[0];
const double ih_y = s->iwidth[1];
const double ih_z = s->iwidth[2];
for (int k = 0; k < nr_gparts; k++) {
/* Get the particle */
struct gpart *restrict gp = &gparts[k];
const double old_pos_x = gp->x[0];
const double old_pos_y = gp->x[1];
const double old_pos_z = gp->x[2];
/* Put it back into the simulation volume */
const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
/* Get its cell index */
const int index =
cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
ind[k] = index;
/* Update the position */
gp->x[0] = pos_x;
gp->x[1] = pos_y;
gp->x[2] = pos_z;
}
}
/**
* @brief Computes the cell index of all the particles and update the cell
* count.
*
* @param s The #space.
* @param ind The array of indices to fill.
* @param cells The array of #cell to update.
* @param verbose Are we talkative ?
*/
void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
int verbose) {
const ticks tic = getticks();
/* Pack the extra information */
struct index_data data;
data.s = s;
data.cells = cells;
data.ind = ind;
threadpool_map(&s->e->threadpool, space_parts_get_cell_index_mapper, s->parts,
s->nr_parts, sizeof(struct part), 1000, &data);
if (verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Computes the cell index of all the g-particles and update the cell
* gcount.
*
* @param s The #space.
* @param gind The array of indices to fill.
* @param cells The array of #cell to update.
* @param verbose Are we talkative ?
*/
void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
int verbose) {
const ticks tic = getticks();
/* Pack the extra information */
struct index_data data;
data.s = s;
data.cells = cells;
data.ind = gind;
threadpool_map(&s->e->threadpool, space_gparts_get_cell_index_mapper,
s->gparts, s->nr_gparts, sizeof(struct gpart), 1000, &data);
if (verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/** /**
* @brief Sort the particles and condensed particles according to the given * @brief Sort the particles and condensed particles according to the given
* indices. * indices.
......
...@@ -170,6 +170,10 @@ void space_recycle(struct space *s, struct cell *c); ...@@ -170,6 +170,10 @@ void space_recycle(struct space *s, struct cell *c);
void space_split(struct space *s, struct cell *cells, int nr_cells, void space_split(struct space *s, struct cell *cells, int nr_cells,
int verbose); int verbose);
void space_split_mapper(void *map_data, int num_elements, void *extra_data); void space_split_mapper(void *map_data, int num_elements, void *extra_data);
void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
int verbose);
void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
int verbose);
void space_do_parts_sort(); void space_do_parts_sort();
void space_do_gparts_sort(); void space_do_gparts_sort();
void space_init_parts(struct space *s); void space_init_parts(struct space *s);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment