Commit 1819dcaa authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Unify ticks and fixed costs branches

Can now repartition at any time provided some fixed costs have been set.
parent 1592b255
...@@ -330,8 +330,8 @@ int main(int argc, char *argv[]) { ...@@ -330,8 +330,8 @@ int main(int argc, char *argv[]) {
if (dump_tasks) { if (dump_tasks) {
if (myrank == 0) { if (myrank == 0) {
message("WARNING: complete task dumps are only created when " message("WARNING: complete task dumps are only created when "
"configured with --enable-task-debugging. " "configured with --enable-task-debugging.");
"Basic task statistics will be output."); message(" Basic task statistics will be output.");
} }
} }
#endif #endif
......
...@@ -1049,48 +1049,66 @@ void engine_repartition_trigger(struct engine *e) { ...@@ -1049,48 +1049,66 @@ void engine_repartition_trigger(struct engine *e) {
/* Do nothing if there have not been enough steps since the last repartition /* Do nothing if there have not been enough steps since the last repartition
* as we don't want to repeat this too often or immediately after a * as we don't want to repeat this too often or immediately after a
* repartition step. Also nothing to do when requested. */ * repartition step. Also nothing to do when requested. */
if (e->step - e->last_repartition >= 2 && if (e->step - e->last_repartition >= 2 && e->reparttype->type != REPART_NONE) {
e->reparttype->type != REPART_NONE) {
/* If we have fixed costs available and this is step 2 or we are forcing
/* It is only worth checking the CPU loads when we have processed a * repartitioning then we do a fixed costs one now. */
* significant number of all particles as we require all tasks to have if (e->reparttype->trigger > 1 ||
* times. */ (e->step == 2 && e->reparttype->use_fixed_costs)) {
if ((e->updates > 1 &&
e->updates >= e->total_nr_parts * e->reparttype->minfrac) || if (e->reparttype->trigger > 1) {
(e->g_updates > 1 && if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1;
e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { } else {
e->forcerepart = 1;
/* Get CPU time used since the last call to this function. */ }
double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step; e->reparttype->use_ticks = 0;
/* Gather the elapsed CPU times from all ranks for the last step. */ } else {
double elapsed_cputimes[e->nr_nodes];
MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
MPI_DOUBLE, 0, MPI_COMM_WORLD); /* It is only worth checking the CPU loads when we have processed a
if (e->nodeID == 0) { * significant number of all particles as we require all tasks to have
* timings. */
/* Get the range and mean of cputimes. */ if ((e->updates > 1 &&
double mintime = elapsed_cputimes[0]; e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
double maxtime = elapsed_cputimes[0]; (e->g_updates > 1 &&
double sum = elapsed_cputimes[0]; e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
for (int k = 1; k < e->nr_nodes; k++) {
if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; /* We are using the task timings. */
if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; e->reparttype->use_ticks = 1;
sum += elapsed_cputimes[k];
} /* Get CPU time used since the last call to this function. */
double mean = sum / (double)e->nr_nodes; double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step;
/* Are we out of balance? */ /* Gather the elapsed CPU times from all ranks for the last step. */
double abs_trigger = fabs(e->reparttype->trigger); double elapsed_cputimes[e->nr_nodes];
if (((maxtime - mintime) / mean) > abs_trigger) { MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
// if (e->verbose) MPI_DOUBLE, 0, MPI_COMM_WORLD);
message("trigger fraction %.3f > %.3f will repartition", if (e->nodeID == 0) {
(maxtime - mintime) / mean, abs_trigger);
e->forcerepart = 1; /* Get the range and mean of cputimes. */
} else { double mintime = elapsed_cputimes[0];
// if (e->verbose) { double maxtime = elapsed_cputimes[0];
message("trigger fraction %.3f =< %.3f will not repartition", double sum = elapsed_cputimes[0];
(maxtime - mintime) / mean, abs_trigger); for (int k = 1; k < e->nr_nodes; k++) {
if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
sum += elapsed_cputimes[k];
}
double mean = sum / (double)e->nr_nodes;
/* Are we out of balance? */
double abs_trigger = fabs(e->reparttype->trigger);
if (((maxtime - mintime) / mean) > abs_trigger) {
// if (e->verbose)
message("trigger fraction %.3f > %.3f will repartition",
(maxtime - mintime) / mean, abs_trigger);
e->forcerepart = 1;
} else {
// if (e->verbose) {
message("trigger fraction %.3f =< %.3f will not repartition",
(maxtime - mintime) / mean, abs_trigger);
}
} }
} }
......
...@@ -83,7 +83,7 @@ static int check_complete(struct space *s, int verbose, int nregions); ...@@ -83,7 +83,7 @@ static int check_complete(struct space *s, int verbose, int nregions);
* statistics output produced when running with task debugging enabled. * statistics output produced when running with task debugging enabled.
*/ */
static double repartition_costs[task_type_count][task_subtype_count]; static double repartition_costs[task_type_count][task_subtype_count];
static void repart_init_fixed_costs(int policy); static int repart_init_fixed_costs(void);
#endif #endif
/* Vectorisation support */ /* Vectorisation support */
...@@ -1165,6 +1165,7 @@ struct weights_mapper_data { ...@@ -1165,6 +1165,7 @@ struct weights_mapper_data {
int timebins; int timebins;
int vweights; int vweights;
int nr_cells; int nr_cells;
int use_ticks;
struct cell *cells; struct cell *cells;
}; };
...@@ -1182,8 +1183,8 @@ static void check_weights(struct task *tasks, int nr_tasks, ...@@ -1182,8 +1183,8 @@ static void check_weights(struct task *tasks, int nr_tasks,
* @param num_elements the number of data elements to process. * @param num_elements the number of data elements to process.
* @param extra_data additional data for the mapper context. * @param extra_data additional data for the mapper context.
*/ */
void partition_gather_weights(void *map_data, int num_elements, static void partition_gather_weights(void *map_data, int num_elements,
void *extra_data) { void *extra_data) {
struct task *tasks = (struct task *)map_data; struct task *tasks = (struct task *)map_data;
struct weights_mapper_data *mydata = (struct weights_mapper_data *)extra_data; struct weights_mapper_data *mydata = (struct weights_mapper_data *)extra_data;
...@@ -1196,6 +1197,7 @@ void partition_gather_weights(void *map_data, int num_elements, ...@@ -1196,6 +1197,7 @@ void partition_gather_weights(void *map_data, int num_elements,
int nr_cells = mydata->nr_cells; int nr_cells = mydata->nr_cells;
int timebins = mydata->timebins; int timebins = mydata->timebins;
int vweights = mydata->vweights; int vweights = mydata->vweights;
int use_ticks = mydata->use_ticks;
struct cell *cells = mydata->cells; struct cell *cells = mydata->cells;
...@@ -1207,11 +1209,13 @@ void partition_gather_weights(void *map_data, int num_elements, ...@@ -1207,11 +1209,13 @@ void partition_gather_weights(void *map_data, int num_elements,
if (t->type == task_type_send || t->type == task_type_recv || if (t->type == task_type_send || t->type == task_type_recv ||
t->type == task_type_logger || t->implicit || t->ci == NULL) continue; t->type == task_type_logger || t->implicit || t->ci == NULL) continue;
/* Get the task weight based on fixed cost for this task type. */ /* Get weight for this task. Either based on fixed costs or task timings. */
//double w = repartition_costs[t->type][t->subtype]; double w = 0.0;
if (use_ticks) {
/* Get the task weight based on costs. */ w = (double)t->toc - (double)t->tic;
double w = (double)t->toc - (double)t->tic; } else {
w = repartition_costs[t->type][t->subtype];
}
if (w <= 0.0) continue; if (w <= 0.0) continue;
/* Get the top-level cells involved. */ /* Get the top-level cells involved. */
...@@ -1378,6 +1382,8 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ...@@ -1378,6 +1382,8 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
ticks tic = getticks(); ticks tic = getticks();
message("using ticks: %d", repartition->use_ticks);
threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks, threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks,
sizeof(struct task), 0, &weights_data); sizeof(struct task), 0, &weights_data);
if (s->e->verbose) if (s->e->verbose)
...@@ -1541,7 +1547,6 @@ void partition_repartition(struct repartition *reparttype, int nodeID, ...@@ -1541,7 +1547,6 @@ void partition_repartition(struct repartition *reparttype, int nodeID,
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
ticks tic = getticks(); ticks tic = getticks();
repart_init_fixed_costs(s->e->policy);
if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) { if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) {
repart_edge_metis(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, repart_edge_metis(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks,
...@@ -1853,27 +1858,45 @@ void partition_init(struct partition *partition, ...@@ -1853,27 +1858,45 @@ void partition_init(struct partition *partition,
repartition->ncelllist = 0; repartition->ncelllist = 0;
repartition->celllist = NULL; repartition->celllist = NULL;
/* Do we have fixed costs available? These can be used to force
* repartitioning at any time. Not required if not repartitioning.*/
repartition->use_fixed_costs =
parser_get_opt_param_int(params, "DomainDecomposition:use_fixed_costs", 0);
if (repartition->type == REPART_NONE) repartition->use_fixed_costs = 0;
/* Check if this is true or required and initialise them. */
if (repartition->use_fixed_costs || repartition->trigger > 1) {
if (!repart_init_fixed_costs()) {
if (repartition->trigger <= 1) {
if (engine_rank == 0)
message("WARNING: fixed cost repartitioning was requested but is"
" not available.");
repartition->use_fixed_costs = 0;
} else {
error("Forced fixed cost repartitioning was requested but is"
" not available.");
}
}
}
#else #else
error("SWIFT was not compiled with MPI support"); error("SWIFT was not compiled with MPI support");
#endif #endif
} }
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) #ifdef WITH_MPI
/** /**
* @brief Set the fixed costs for repartition using METIS. * @brief Set the fixed costs for repartition using METIS.
* *
* These are determined using a run with task debugging enabled which gives a * These are determined using a run with the -y flag on which produces
* statistical analysis condensed into a .h file. Note that some tasks have * a statistical analysis that is condensed into a .h file for inclusion.
* different costs depending on the engine policies, for instance the kicks
* do work with self gravity and hydro, we attempt to allow for that. Finally
* note this is a statistical solution, so requires that there are sufficient
* tasks on each rank so that the fixed costs do the right thing on average,
* you may like to used task ticks as weights if this isn't working.
* *
* @param policy the #engine policy. * If the default include file is used then no fixed costs are set and this
* function will return 0.
*/ */
static void repart_init_fixed_costs(int policy) { static int repart_init_fixed_costs(void) {
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
/* Set the default fixed cost. */ /* Set the default fixed cost. */
for (int j = 0; j < task_type_count; j++) { for (int j = 0; j < task_type_count; j++) {
for (int k = 0; k < task_subtype_count; k++) { for (int k = 0; k < task_subtype_count; k++) {
...@@ -1881,72 +1904,14 @@ static void repart_init_fixed_costs(int policy) { ...@@ -1881,72 +1904,14 @@ static void repart_init_fixed_costs(int policy) {
} }
} }
/* TODO: these may be separable, so we could have costs for each policy #include <partition_fixed_costs.h>
* addition, if separated need to take care with the relative scaling. */ return HAVE_FIXED_COSTS;
if (policy & (engine_policy_hydro & engine_policy_self_gravity)) {
/* EAGLE_50 -s -G -S 8 nodes 16 cores */
repartition_costs[1][0] = 45842; /* sort/none */
repartition_costs[2][1] = 15609; /* self/density */
repartition_costs[2][3] = 18830; /* self/force */
repartition_costs[2][4] = 202588; /* self/grav */
repartition_costs[3][1] = 67; /* pair/density */
repartition_costs[3][3] = 207; /* pair/force */
repartition_costs[3][4] = 1507; /* pair/grav */
repartition_costs[4][1] = 36268; /* sub_self/density */
repartition_costs[4][3] = 52252; /* sub_self/force */
repartition_costs[5][1] = 709; /* sub_pair/density */
repartition_costs[5][3] = 1258; /* sub_pair/force */
repartition_costs[6][0] = 1135; /* init_grav/none */
repartition_costs[9][0] = 160; /* ghost/none */
repartition_costs[12][0] = 7176; /* drift_part/none */
repartition_costs[13][0] = 4502; /* drift_gpart/none */
repartition_costs[15][0] = 8234; /* end_force/none */
repartition_costs[16][0] = 15508; /* kick1/none */
repartition_costs[17][0] = 15780; /* kick2/none */
repartition_costs[18][0] = 19848; /* timestep/none */
repartition_costs[21][0] = 4105; /* grav_long_range/none */
repartition_costs[22][0] = 68; /* grav_mm/none */
repartition_costs[24][0] = 16785; /* grav_down/none */
repartition_costs[25][0] = 70632; /* grav_mesh/none */
} else if (policy & engine_policy_self_gravity) {
/* EAGLE_50 -G 8 nodes 16 cores, scaled to match self/grav. */
repartition_costs[2][4] = 202588; /* self/grav */
repartition_costs[3][4] = 1760; /* pair/grav */
repartition_costs[6][0] = 1610; /* init_grav/none */
repartition_costs[13][0] = 999; /* drift_gpart/none */
repartition_costs[15][0] = 3481; /* end_force/none */
repartition_costs[16][0] = 6336; /* kick1/none */
repartition_costs[17][0] = 6343; /* kick2/none */
repartition_costs[18][0] = 13864; /* timestep/none */
repartition_costs[21][0] = 1422; /* grav_long_range/none */
repartition_costs[22][0] = 71; /* grav_mm/none */
repartition_costs[24][0] = 16011; /* grav_down/none */
repartition_costs[25][0] = 60414; /* grav_mesh/none */
} else if (policy & engine_policy_hydro) {
/* EAGLE_50 -s 8 nodes 16 cores, not scaled, but similar. */
repartition_costs[1][0] = 52733; /* sort/none */
repartition_costs[2][1] = 15458; /* self/density */
repartition_costs[2][3] = 19212; /* self/force */
repartition_costs[3][1] = 74; /* pair/density */
repartition_costs[3][3] = 242; /* pair/force */
repartition_costs[4][1] = 42895; /* sub_self/density */
repartition_costs[4][3] = 64254; /* sub_self/force */
repartition_costs[5][1] = 818; /* sub_pair/density */
repartition_costs[5][3] = 1443; /* sub_pair/force */
repartition_costs[9][0] = 159; /* ghost/none */
repartition_costs[12][0] = 6708; /* drift_part/none */
repartition_costs[15][0] = 6479; /* end_force/none */
repartition_costs[16][0] = 6609; /* kick1/none */
repartition_costs[17][0] = 6975; /* kick2/none */
repartition_costs[18][0] = 5229; /* timestep/none */
}
}
#endif #endif
return 0;
}
#endif /* WITH_MPI */
/* General support */ /* General support */
/* =============== */ /* =============== */
......
...@@ -59,6 +59,9 @@ struct repartition { ...@@ -59,6 +59,9 @@ struct repartition {
int usemetis; int usemetis;
int adaptive; int adaptive;
int use_fixed_costs;
int use_ticks;
/* The partition as a cell-list. */ /* The partition as a cell-list. */
int ncelllist; int ncelllist;
int *celllist; int *celllist;
......
#define HAVE_FIXED_COSTS 1
repartition_costs[1][0] = 9695; /* sort/none */
repartition_costs[2][1] = 2279; /* self/density */
repartition_costs[2][3] = 3489; /* self/force */
repartition_costs[2][4] = 240418; /* self/grav */
repartition_costs[3][1] = 179; /* pair/density */
repartition_costs[3][3] = 329; /* pair/force */
repartition_costs[3][4] = 4299; /* pair/grav */
repartition_costs[4][1] = 16814; /* sub_self/density */
repartition_costs[4][3] = 27911; /* sub_self/force */
repartition_costs[5][1] = 837; /* sub_pair/density */
repartition_costs[5][3] = 1454; /* sub_pair/force */
repartition_costs[6][0] = 144; /* init_grav/none */
repartition_costs[9][0] = 665; /* ghost/none */
repartition_costs[12][0] = 126; /* drift_part/none */
repartition_costs[13][0] = 234; /* drift_gpart/none */
repartition_costs[15][0] = 763; /* end_force/none */
repartition_costs[16][0] = 1276; /* kick1/none */
repartition_costs[17][0] = 1405; /* kick2/none */
repartition_costs[18][0] = 1969; /* timestep/none */
repartition_costs[21][0] = 566; /* grav_long_range/none */
repartition_costs[22][0] = 108; /* grav_mm/none */
repartition_costs[24][0] = 1366; /* grav_down/none */
repartition_costs[25][0] = 8664; /* grav_mesh/none */
...@@ -774,9 +774,12 @@ void task_dump_stats(struct engine *e, int step) { ...@@ -774,9 +774,12 @@ void task_dump_stats(struct engine *e, int step) {
#endif #endif
FILE *dfile = fopen(dumpfile, "w"); FILE *dfile = fopen(dumpfile, "w");
fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
FILE *cfile = fopen(costsfile, "w"); FILE *cfile = fopen(costsfile, "w");
fprintf(cfile, "/* use as src/partition_fixed_costs.h */\n");
fprintf(cfile, "#define HAVE_FIXED_COSTS 1\n");
fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
for (int j = 0; j < task_type_count; j++) { for (int j = 0; j < task_type_count; j++) {
const char *taskID = taskID_names[j]; const char *taskID = taskID_names[j];
for (int k = 0; k < task_subtype_count; k++) { for (int k = 0; k < task_subtype_count; k++) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment