Commit 07c6374c authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'list_local_cells' into 'master'

List top-level cells with tasks

See merge request swift/swiftsim!441
parents d6a00a8f 28517437
......@@ -2064,6 +2064,13 @@ void cell_set_super(struct cell *c, struct cell *super) {
if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super);
}
/**
* @brief Mapper function to set the super pointer of the cells.
*
* @param map_data The top-level cells.
* @param num_elements The number of top-level cells.
* @param extra_data Unused parameter.
*/
void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) {
for (int ind = 0; ind < num_elements; ind++) {
struct cell *c = &((struct cell *)map_data)[ind];
......@@ -2071,6 +2078,32 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) {
}
}
/**
* @brief Does this cell or any of its children have any task ?
*
* We use the timestep-related tasks to probe this as these always
* exist in a cell hierarchy that has any kind of task.
*
* @param c The #cell to probe.
*/
int cell_has_tasks(struct cell *c) {
#ifdef WITH_MPI
if (c->timestep != NULL || c->recv_ti != NULL) return 1;
#else
if (c->timestep != NULL) return 1;
#endif
if (c->split) {
int count = 0;
for (int k = 0; k < 8; ++k)
if (c->progeny[k] != NULL) count += cell_has_tasks(c->progeny[k]);
return count;
} else {
return 0;
}
}
/**
* @brief Recursively drifts the #part in a cell hierarchy.
*
......
......@@ -462,6 +462,7 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s);
void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s);
void cell_clear_drift_flags(struct cell *c, void *data);
void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data);
int cell_has_tasks(struct cell *c);
/* Inlined functions (for speed). */
......
......@@ -98,6 +98,16 @@ const char *engine_policy_names[] = {"none",
/** The rank of the engine as a global variable (for messages). */
int engine_rank;
/**
* @brief Data collected from the cells at the end of a time-step
*/
struct end_of_step_data {
int updates, g_updates, s_updates;
integertime_t ti_end_min, ti_end_max, ti_beg_max;
struct engine *e;
};
/**
* @brief Link a density/force task to a cell.
*
......@@ -3070,6 +3080,9 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
/* Re-build the tasks. */
engine_maketasks(e);
/* Make the list of top-level cells that have tasks */
space_list_cells_with_tasks(e->s);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
......@@ -3151,7 +3164,7 @@ void engine_barrier(struct engine *e) {
*
* @param c A super-cell.
*/
void engine_collect_kick(struct cell *c) {
void engine_collect_end_of_step_recurse(struct cell *c) {
/* Skip super-cells (Their values are already set) */
#ifdef WITH_MPI
......@@ -3170,7 +3183,7 @@ void engine_collect_kick(struct cell *c) {
if (cp != NULL && (cp->count > 0 || cp->gcount > 0 || cp->scount > 0)) {
/* Recurse */
engine_collect_kick(cp);
engine_collect_end_of_step_recurse(cp);
/* And update */
ti_end_min = min(ti_end_min, cp->ti_end_min);
......@@ -3196,37 +3209,25 @@ void engine_collect_kick(struct cell *c) {
c->s_updated = s_updated;
}
/**
* @brief Collects the next time-step and rebuild flag.
*
* The next time-step is determined by making each super-cell recurse to
* collect the minimal of ti_end and the number of updated particles. When in
* MPI mode this routines reduces these across all nodes and also collects the
* forcerebuild flag -- this is so that we only use a single collective MPI
* call per step for all these values.
*
* Note that the results are stored in e->collect_group1 struct not in the
* engine fields, unless apply is true. These can be applied field-by-field
* or all at once using collectgroup1_copy();
*
* @param e The #engine.
* @param apply whether to apply the results to the engine or just keep in the
* group1 struct.
*/
void engine_collect_timestep_and_rebuild(struct engine *e, int apply) {
void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
void *extra_data) {
const ticks tic = getticks();
struct end_of_step_data *data = (struct end_of_step_data *)extra_data;
struct engine *e = data->e;
struct space *s = e->s;
int *local_cells = (int *)map_data;
/* Local collectible */
int updates = 0, g_updates = 0, s_updates = 0;
integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0;
const struct space *s = e->s;
/* Collect the cell data. */
for (int k = 0; k < s->nr_cells; k++) {
struct cell *c = &s->cells_top[k];
for (int ind = 0; ind < num_elements; ind++) {
struct cell *c = &s->cells_top[local_cells[ind]];
if (c->count > 0 || c->gcount > 0 || c->scount > 0) {
/* Make the top-cells recurse */
engine_collect_kick(c);
engine_collect_end_of_step_recurse(c);
/* And aggregate */
ti_end_min = min(ti_end_min, c->ti_end_min);
......@@ -3243,9 +3244,53 @@ void engine_collect_timestep_and_rebuild(struct engine *e, int apply) {
}
}
/* Let's write back to the global data.
* We use the space lock to garanty single access*/
if (lock_lock(&s->lock) == 0) {
data->updates += updates;
data->g_updates += g_updates;
data->s_updates += s_updates;
data->ti_end_min = min(ti_end_min, data->ti_end_min);
data->ti_end_max = max(ti_end_max, data->ti_end_max);
data->ti_beg_max = max(ti_beg_max, data->ti_beg_max);
}
if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
}
/**
* @brief Collects the next time-step and rebuild flag.
*
* The next time-step is determined by making each super-cell recurse to
* collect the minimal of ti_end and the number of updated particles. When in
* MPI mode this routines reduces these across all nodes and also collects the
* forcerebuild flag -- this is so that we only use a single collective MPI
* call per step for all these values.
*
* Note that the results are stored in e->collect_group1 struct not in the
* engine fields, unless apply is true. These can be applied field-by-field
* or all at once using collectgroup1_copy();
*
* @param e The #engine.
* @param apply whether to apply the results to the engine or just keep in the
* group1 struct.
*/
void engine_collect_end_of_step(struct engine *e, int apply) {
const ticks tic = getticks();
const struct space *s = e->s;
struct end_of_step_data data;
data.updates = 0, data.g_updates = 0, data.s_updates = 0;
data.ti_end_min = max_nr_timesteps, data.ti_end_max = 0, data.ti_beg_max = 0;
data.e = e;
/* Collect information from the local top-level cells */
threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper,
s->local_cells_top, s->nr_local_cells, sizeof(int), 0, &data);
/* Store these in the temporary collection group. */
collectgroup1_init(&e->collect_group1, updates, g_updates, s_updates,
ti_end_min, ti_end_max, ti_beg_max, e->forcerebuild);
collectgroup1_init(&e->collect_group1, data.updates, data.g_updates,
data.s_updates, data.ti_end_min, data.ti_end_max,
data.ti_beg_max, e->forcerebuild);
/* Aggregate collective data from the different nodes for this step. */
#ifdef WITH_MPI
......@@ -3256,7 +3301,7 @@ void engine_collect_timestep_and_rebuild(struct engine *e, int apply) {
/* Check the above using the original MPI calls. */
integertime_t in_i[1], out_i[1];
in_i[0] = 0;
out_i[0] = ti_end_min;
out_i[0] = data.ti_end_min;
if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN,
MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to aggregate ti_end_min.");
......@@ -3265,9 +3310,9 @@ void engine_collect_timestep_and_rebuild(struct engine *e, int apply) {
e->collect_group1.ti_end_min);
long long in_ll[3], out_ll[3];
out_ll[0] = updates;
out_ll[1] = g_updates;
out_ll[2] = s_updates;
out_ll[0] = data.updates;
out_ll[1] = data.g_updates;
out_ll[2] = data.s_updates;
if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM,
MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to aggregate particle counts.");
......@@ -3556,7 +3601,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
#endif
/* Recover the (integer) end of the next time-step */
engine_collect_timestep_and_rebuild(e, 1);
engine_collect_end_of_step(e, 1);
/* Check if any particles have the same position. This is not
* allowed (/0) so we abort.*/
......@@ -3739,7 +3784,7 @@ void engine_step(struct engine *e) {
* end of the next time-step. Do these together to reduce the collective MPI
* calls per step, but some of the gathered information is not applied just
* yet (in case we save a snapshot or drift). */
engine_collect_timestep_and_rebuild(e, 0);
engine_collect_end_of_step(e, 0);
e->forcerebuild = e->collect_group1.forcerebuild;
/* Save some statistics ? */
......@@ -3807,8 +3852,8 @@ void engine_unskip(struct engine *e) {
const ticks tic = getticks();
/* Activate all the regular tasks */
threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->cells_top,
e->s->nr_cells, sizeof(struct cell), 1, e);
threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->local_cells_top,
e->s->nr_local_cells, sizeof(int), 1, e);
/* And the top level gravity FFT one */
if (e->s->periodic && (e->policy & engine_policy_self_gravity))
......
......@@ -868,10 +868,11 @@ void runner_do_unskip_mapper(void *map_data, int num_elements,
void *extra_data) {
struct engine *e = (struct engine *)extra_data;
struct cell *cells = (struct cell *)map_data;
struct space *s = e->s;
int *local_cells = (int *)map_data;
for (int ind = 0; ind < num_elements; ind++) {
struct cell *c = &cells[ind];
struct cell *c = &s->cells_top[local_cells[ind]];
if (c != NULL) runner_do_unskip(c, e);
}
}
......
......@@ -389,6 +389,7 @@ void space_regrid(struct space *s, int verbose) {
/* Free the old cells, if they were allocated. */
if (s->cells_top != NULL) {
space_free_cells(s);
free(s->local_cells_top);
free(s->cells_top);
free(s->multipoles_top);
}
......@@ -420,6 +421,12 @@ void space_regrid(struct space *s, int verbose) {
bzero(s->multipoles_top, s->nr_cells * sizeof(struct gravity_tensors));
}
/* Allocate the indices of local cells */
if (posix_memalign((void *)&s->local_cells_top, SWIFT_STRUCT_ALIGNMENT,
s->nr_cells * sizeof(int)) != 0)
error("Failed to allocate indices of local top-level cells.");
bzero(s->local_cells_top, s->nr_cells * sizeof(int));
/* Set the cells' locks */
for (int k = 0; k < s->nr_cells; k++) {
if (lock_init(&s->cells_top[k].lock) != 0)
......@@ -2466,6 +2473,27 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) {
}
}
/**
* @brief Construct the list of top-level cells that have any tasks in
* their hierarchy.
*
* This assumes the list has been pre-allocated at a regrid.
*
* @param s The #space.
*/
void space_list_cells_with_tasks(struct space *s) {
/* Let's rebuild the list of local top-level cells */
s->nr_local_cells = 0;
for (int i = 0; i < s->nr_cells; ++i)
if (cell_has_tasks(&s->cells_top[i])) {
s->local_cells_top[s->nr_local_cells] = i;
s->nr_local_cells++;
}
if (s->e->verbose)
message("Have %d local cells (total=%d)", s->nr_local_cells, s->nr_cells);
}
void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts,
void *extra_data) {
/* Unpack the data */
......@@ -3039,6 +3067,7 @@ void space_clean(struct space *s) {
for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]);
free(s->cells_top);
free(s->multipoles_top);
free(s->local_cells_top);
free(s->parts);
free(s->xparts);
free(s->gparts);
......
......@@ -101,6 +101,9 @@ struct space {
/*! Total number of cells (top- and sub-) */
int tot_cells;
/*! Number of *local* top-level cells with tasks */
int nr_local_cells;
/*! The (level 0) cells themselves. */
struct cell *cells_top;
......@@ -113,6 +116,9 @@ struct space {
/*! Buffer of unused multipoles for the sub-cells. */
struct gravity_tensors *multipoles_sub;
/*! The indices of the *local* top-level cells with tasks */
int *local_cells_top;
/*! The total number of parts in the space. */
size_t nr_parts, size_parts;
......@@ -204,6 +210,7 @@ void space_recycle_list(struct space *s, struct cell *cell_list_begin,
void space_split(struct space *s, struct cell *cells, int nr_cells,
int verbose);
void space_split_mapper(void *map_data, int num_elements, void *extra_data);
void space_list_cells_with_tasks(struct space *s);
void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
int verbose);
void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment