Skip to content
Snippets Groups Projects
Commit 188f5537 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'repartition-threadpool' into 'master'

Repartition threadpool

Closes #462

See merge request !663
parents a5740682 27e2a8a8
No related branches found
No related tags found
1 merge request!663Repartition threadpool
...@@ -1056,55 +1056,54 @@ static void pick_metis(int nodeID, struct space *s, int nregions, ...@@ -1056,55 +1056,54 @@ static void pick_metis(int nodeID, struct space *s, int nregions,
#endif #endif
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
/* Helper struct for partition_gather weights. */
struct weights_mapper_data {
double *weights_e;
double *weights_v;
idx_t *inds;
int eweights;
int nodeID;
int timebins;
int vweights;
int nr_cells;
struct cell *cells;
};
#ifdef SWIFT_DEBUG_CHECKS
static void check_weights(struct task *tasks, int nr_tasks,
struct weights_mapper_data *weights_data,
double *weights_v, double *weights_e);
#endif
/** /**
* @brief Repartition the cells amongst the nodes using weights of * @brief Threadpool mapper function to gather cell edge and vertex weights
* various kinds. * from the associated tasks.
* *
* @param vweights whether vertex weights will be used. * @map_data part of the data to process in this mapper.
* @param eweights whether weights will be used. * @num_elements the number of data elements to process.
* @param timebins use timebins as the edge weights. * @extra_data additional data for the mapper context.
* @param repartition the partition struct of the local engine.
* @param nodeID our nodeID.
* @param nr_nodes the number of nodes.
* @param s the space of cells holding our local particles.
* @param tasks the completed tasks from the last engine step for our node.
* @param nr_tasks the number of tasks.
*/ */
static void repart_edge_metis(int vweights, int eweights, int timebins, void partition_gather_weights(void *map_data, int num_elements,
struct repartition *repartition, int nodeID, void *extra_data) {
int nr_nodes, struct space *s, struct task *tasks,
int nr_tasks) {
/* Create weight arrays using task ticks for vertices and edges (edges struct task *tasks = (struct task *)map_data;
* assume the same graph structure as used in the part_ calls). */ struct weights_mapper_data *mydata = (struct weights_mapper_data *)extra_data;
int nr_cells = s->nr_cells;
struct cell *cells = s->cells_top;
/* Allocate and fill the adjncy indexing array defining the graph of double *weights_e = mydata->weights_e;
* cells. */ double *weights_v = mydata->weights_v;
idx_t *inds; idx_t *inds = mydata->inds;
if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL) int eweights = mydata->eweights;
error("Failed to allocate the inds array"); int nodeID = mydata->nodeID;
graph_init(s, inds, NULL); int nr_cells = mydata->nr_cells;
int timebins = mydata->timebins;
int vweights = mydata->vweights;
/* Allocate and init weights. */ struct cell *cells = mydata->cells;
double *weights_v = NULL;
double *weights_e = NULL;
if (vweights) {
if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL)
error("Failed to allocate vertex weights arrays.");
bzero(weights_v, sizeof(double) * nr_cells);
}
if (eweights) {
if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL)
error("Failed to allocate edge weights arrays.");
bzero(weights_e, sizeof(double) * 26 * nr_cells);
}
/* Loop over the tasks... */ /* Loop over the tasks... */
for (int j = 0; j < nr_tasks; j++) { for (int i = 0; i < num_elements; i++) {
/* Get a pointer to the kth task. */ struct task *t = &tasks[i];
struct task *t = &tasks[j];
/* Skip un-interesting tasks. */ /* Skip un-interesting tasks. */
if (t->cost == 0.f) continue; if (t->cost == 0.f) continue;
...@@ -1135,7 +1134,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ...@@ -1135,7 +1134,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
t->type == task_type_grav_long_range) { t->type == task_type_grav_long_range) {
/* Particle updates add only to vertex weight. */ /* Particle updates add only to vertex weight. */
if (vweights) weights_v[cid] += w; if (vweights) atomic_add_d(&weights_v[cid], w);
} }
/* Self interaction? */ /* Self interaction? */
...@@ -1143,7 +1142,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ...@@ -1143,7 +1142,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
(t->type == task_type_sub_self && cj == NULL && (t->type == task_type_sub_self && cj == NULL &&
ci->nodeID == nodeID)) { ci->nodeID == nodeID)) {
/* Self interactions add only to vertex weight. */ /* Self interactions add only to vertex weight. */
if (vweights) weights_v[cid] += w; if (vweights) atomic_add_d(&weights_v[cid], w);
} }
...@@ -1152,7 +1151,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ...@@ -1152,7 +1151,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
/* In-cell pair? */ /* In-cell pair? */
if (ci == cj) { if (ci == cj) {
/* Add weight to vertex for ci. */ /* Add weight to vertex for ci. */
if (vweights) weights_v[cid] += w; if (vweights) atomic_add_d(&weights_v[cid], w);
} }
...@@ -1163,8 +1162,8 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ...@@ -1163,8 +1162,8 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
/* Local cells add weight to vertices. */ /* Local cells add weight to vertices. */
if (vweights && ci->nodeID == nodeID) { if (vweights && ci->nodeID == nodeID) {
weights_v[cid] += 0.5 * w; atomic_add_d(&weights_v[cid], 0.5 * w);
if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; if (cj->nodeID == nodeID) atomic_add_d(&weights_v[cjd], 0.5 * w);
} }
if (eweights) { if (eweights) {
...@@ -1200,20 +1199,91 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ...@@ -1200,20 +1199,91 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
int dti = num_time_bins - get_time_bin(ci->hydro.ti_end_min); int dti = num_time_bins - get_time_bin(ci->hydro.ti_end_min);
int dtj = num_time_bins - get_time_bin(cj->hydro.ti_end_min); int dtj = num_time_bins - get_time_bin(cj->hydro.ti_end_min);
double dt = (double)(1 << dti) + (double)(1 << dtj); double dt = (double)(1 << dti) + (double)(1 << dtj);
weights_e[ik] += dt; atomic_add_d(&weights_e[ik], dt);
weights_e[jk] += dt; atomic_add_d(&weights_e[jk], dt);
} else { } else {
/* Add weights from task costs to the edge. */ /* Add weights from task costs to the edge. */
weights_e[ik] += w; atomic_add_d(&weights_e[ik], w);
weights_e[jk] += w; atomic_add_d(&weights_e[jk], w);
} }
} }
} }
} }
} }
} }
}
/**
* @brief Repartition the cells amongst the nodes using weights of
* various kinds.
*
* @param vweights whether vertex weights will be used.
* @param eweights whether weights will be used.
* @param timebins use timebins as the edge weights.
* @param repartition the partition struct of the local engine.
* @param nodeID our nodeID.
* @param nr_nodes the number of nodes.
* @param s the space of cells holding our local particles.
* @param tasks the completed tasks from the last engine step for our node.
* @param nr_tasks the number of tasks.
*/
static void repart_edge_metis(int vweights, int eweights, int timebins,
struct repartition *repartition, int nodeID,
int nr_nodes, struct space *s, struct task *tasks,
int nr_tasks) {
/* Create weight arrays using task ticks for vertices and edges (edges
* assume the same graph structure as used in the part_ calls). */
int nr_cells = s->nr_cells;
struct cell *cells = s->cells_top;
/* Allocate and fill the adjncy indexing array defining the graph of
* cells. */
idx_t *inds;
if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL)
error("Failed to allocate the inds array");
graph_init(s, inds, NULL);
/* Allocate and init weights. */
double *weights_v = NULL;
double *weights_e = NULL;
if (vweights) {
if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL)
error("Failed to allocate vertex weights arrays.");
bzero(weights_v, sizeof(double) * nr_cells);
}
if (eweights) {
if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL)
error("Failed to allocate edge weights arrays.");
bzero(weights_e, sizeof(double) * 26 * nr_cells);
}
/* Gather weights. */
struct weights_mapper_data weights_data;
weights_data.cells = cells;
weights_data.eweights = eweights;
weights_data.inds = inds;
weights_data.nodeID = nodeID;
weights_data.nr_cells = nr_cells;
weights_data.timebins = timebins;
weights_data.vweights = vweights;
weights_data.weights_e = weights_e;
weights_data.weights_v = weights_v;
ticks tic = getticks();
threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks,
sizeof(struct task), 0, &weights_data);
if (s->e->verbose)
message("weight mapper took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#ifdef SWIFT_DEBUG_CHECKS
check_weights(tasks, nr_tasks, &weights_data, weights_v, weights_e);
#endif
/* Merge the weights arrays across all nodes. */ /* Merge the weights arrays across all nodes. */
int res; int res;
...@@ -1717,6 +1787,185 @@ static int check_complete(struct space *s, int verbose, int nregions) { ...@@ -1717,6 +1787,185 @@ static int check_complete(struct space *s, int verbose, int nregions) {
return (!failed); return (!failed);
} }
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
#ifdef SWIFT_DEBUG_CHECKS
/**
* @brief Check that the threadpool version of the weights construction is
* correct by comparing to the old serial code.
*
* @tasks the list of tasks
* @nr_tasks number of tasks
* @mydata additional values as passed to threadpool
* @ref_weights_v vertex weights to check
* @ref_weights_e edge weights to check
*/
static void check_weights(struct task *tasks, int nr_tasks,
struct weights_mapper_data *mydata,
double *ref_weights_v, double *ref_weights_e) {
idx_t *inds = mydata->inds;
int eweights = mydata->eweights;
int nodeID = mydata->nodeID;
int nr_cells = mydata->nr_cells;
int timebins = mydata->timebins;
int vweights = mydata->vweights;
struct cell *cells = mydata->cells;
/* Allocate and init weights. */
double *weights_v = NULL;
double *weights_e = NULL;
if (vweights) {
if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL)
error("Failed to allocate vertex weights arrays.");
bzero(weights_v, sizeof(double) * nr_cells);
}
if (eweights) {
if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL)
error("Failed to allocate edge weights arrays.");
bzero(weights_e, sizeof(double) * 26 * nr_cells);
}
/* Loop over the tasks... */
for (int j = 0; j < nr_tasks; j++) {
/* Get a pointer to the kth task. */
struct task *t = &tasks[j];
/* Skip un-interesting tasks. */
if (t->cost == 0.f) continue;
/* Get the task weight based on costs. */
double w = (double)t->cost;
/* Get the top-level cells involved. */
struct cell *ci, *cj;
for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
;
if (t->cj != NULL)
for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
;
else
cj = NULL;
/* Get the cell IDs. */
int cid = ci - cells;
/* Different weights for different tasks. */
if (t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
t->type == task_type_ghost || t->type == task_type_extra_ghost ||
t->type == task_type_kick1 || t->type == task_type_kick2 ||
t->type == task_type_end_force || t->type == task_type_cooling ||
t->type == task_type_timestep || t->type == task_type_init_grav ||
t->type == task_type_grav_down ||
t->type == task_type_grav_long_range) {
/* Particle updates add only to vertex weight. */
if (vweights) weights_v[cid] += w;
}
/* Self interaction? */
else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
(t->type == task_type_sub_self && cj == NULL &&
ci->nodeID == nodeID)) {
/* Self interactions add only to vertex weight. */
if (vweights) weights_v[cid] += w;
}
/* Pair? */
else if (t->type == task_type_pair || (t->type == task_type_sub_pair)) {
/* In-cell pair? */
if (ci == cj) {
/* Add weight to vertex for ci. */
if (vweights) weights_v[cid] += w;
}
/* Distinct cells. */
else {
/* Index of the jth cell. */
int cjd = cj - cells;
/* Local cells add weight to vertices. */
if (vweights && ci->nodeID == nodeID) {
weights_v[cid] += 0.5 * w;
if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
}
if (eweights) {
/* Find indices of ci/cj neighbours. Note with gravity these cells may
* not be neighbours, in that case we ignore any edge weight for that
* pair. */
int ik = -1;
for (int k = 26 * cid; k < 26 * nr_cells; k++) {
if (inds[k] == cjd) {
ik = k;
break;
}
}
/* cj */
int jk = -1;
for (int k = 26 * cjd; k < 26 * nr_cells; k++) {
if (inds[k] == cid) {
jk = k;
break;
}
}
if (ik != -1 && jk != -1) {
if (timebins) {
/* Add weights to edge for all cells based on the expected
* interaction time (calculated as the time to the last expected
* time) as we want to avoid having active cells on the edges, so
* we cut for that. Note that weight is added to the local and
* remote cells, as we want to keep both away from any cuts, this
* can overflow int, so take care. */
int dti = num_time_bins - get_time_bin(ci->hydro.ti_end_min);
int dtj = num_time_bins - get_time_bin(cj->hydro.ti_end_min);
double dt = (double)(1 << dti) + (double)(1 << dtj);
weights_e[ik] += dt;
weights_e[jk] += dt;
} else {
/* Add weights from task costs to the edge. */
weights_e[ik] += w;
weights_e[jk] += w;
}
}
}
}
}
}
/* Now do the comparisons. */
double refsum = 0.0;
double sum = 0.0;
for (int k = 0; k < nr_cells; k++) {
refsum += ref_weights_v[k];
sum += weights_v[k];
}
if (fabs(sum - refsum) > 1.0) {
error("vertex partition weights are not consistent (%f!=%f)", sum, refsum);
} else {
refsum = 0.0;
sum = 0.0;
for (int k = 0; k < 26 * nr_cells; k++) {
refsum += ref_weights_e[k];
sum += weights_e[k];
}
if (fabs(sum - refsum) > 1.0) {
error("edge partition weights are not consistent (%f!=%f)", sum, refsum);
}
}
message("partition weights checked successfully");
}
#endif
#endif
/** /**
* @brief Partition a space of cells based on another space of cells. * @brief Partition a space of cells based on another space of cells.
* *
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment