diff --git a/examples/main.c b/examples/main.c index df2b30c743759273d3f8958ee17eb2abbb923eee..d7ac2d36b358839fa696f226d89eba4d92845080 100644 --- a/examples/main.c +++ b/examples/main.c @@ -330,8 +330,8 @@ int main(int argc, char *argv[]) { if (dump_tasks) { if (myrank == 0) { message("WARNING: complete task dumps are only created when " - "configured with --enable-task-debugging. " - "Basic task statistics will be output."); + "configured with --enable-task-debugging."); + message(" Basic task statistics will be output."); } } #endif diff --git a/src/engine.c b/src/engine.c index 18853bf00c13963d4224ad6005a7bbefe9f2d34a..fa74c7682b939e632cb317551bcb2e99477ec98a 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1049,48 +1049,66 @@ void engine_repartition_trigger(struct engine *e) { /* Do nothing if there have not been enough steps since the last repartition * as we don't want to repeat this too often or immediately after a * repartition step. Also nothing to do when requested. */ - if (e->step - e->last_repartition >= 2 && - e->reparttype->type != REPART_NONE) { - - /* It is only worth checking the CPU loads when we have processed a - * significant number of all particles as we require all tasks to have - * times. */ - if ((e->updates > 1 && - e->updates >= e->total_nr_parts * e->reparttype->minfrac) || - (e->g_updates > 1 && - e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { - - /* Get CPU time used since the last call to this function. */ - double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step; - - /* Gather the elapsed CPU times from all ranks for the last step. */ - double elapsed_cputimes[e->nr_nodes]; - MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, - MPI_DOUBLE, 0, MPI_COMM_WORLD); - if (e->nodeID == 0) { - - /* Get the range and mean of cputimes. */ - double mintime = elapsed_cputimes[0]; - double maxtime = elapsed_cputimes[0]; - double sum = elapsed_cputimes[0]; - for (int k = 1; k < e->nr_nodes; k++) { - if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; - if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; - sum += elapsed_cputimes[k]; - } - double mean = sum / (double)e->nr_nodes; - - /* Are we out of balance? */ - double abs_trigger = fabs(e->reparttype->trigger); - if (((maxtime - mintime) / mean) > abs_trigger) { - // if (e->verbose) - message("trigger fraction %.3f > %.3f will repartition", - (maxtime - mintime) / mean, abs_trigger); - e->forcerepart = 1; - } else { - // if (e->verbose) { - message("trigger fraction %.3f =< %.3f will not repartition", - (maxtime - mintime) / mean, abs_trigger); + if (e->step - e->last_repartition >= 2 && e->reparttype->type != REPART_NONE) { + + /* If we have fixed costs available and this is step 2 or we are forcing + * repartitioning then we do a fixed costs one now. */ + if (e->reparttype->trigger > 1 || + (e->step == 2 && e->reparttype->use_fixed_costs)) { + + if (e->reparttype->trigger > 1) { + if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1; + } else { + e->forcerepart = 1; + } + e->reparttype->use_ticks = 0; + + } else { + + + /* It is only worth checking the CPU loads when we have processed a + * significant number of all particles as we require all tasks to have + * timings. */ + if ((e->updates > 1 && + e->updates >= e->total_nr_parts * e->reparttype->minfrac) || + (e->g_updates > 1 && + e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { + + /* We are using the task timings. */ + e->reparttype->use_ticks = 1; + + /* Get CPU time used since the last call to this function. */ + double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step; + + /* Gather the elapsed CPU times from all ranks for the last step. */ + double elapsed_cputimes[e->nr_nodes]; + MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, + MPI_DOUBLE, 0, MPI_COMM_WORLD); + if (e->nodeID == 0) { + + /* Get the range and mean of cputimes. */ + double mintime = elapsed_cputimes[0]; + double maxtime = elapsed_cputimes[0]; + double sum = elapsed_cputimes[0]; + for (int k = 1; k < e->nr_nodes; k++) { + if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; + if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; + sum += elapsed_cputimes[k]; + } + double mean = sum / (double)e->nr_nodes; + + /* Are we out of balance? */ + double abs_trigger = fabs(e->reparttype->trigger); + if (((maxtime - mintime) / mean) > abs_trigger) { + // if (e->verbose) + message("trigger fraction %.3f > %.3f will repartition", + (maxtime - mintime) / mean, abs_trigger); + e->forcerepart = 1; + } else { + // if (e->verbose) { + message("trigger fraction %.3f =< %.3f will not repartition", + (maxtime - mintime) / mean, abs_trigger); + } } } diff --git a/src/partition.c b/src/partition.c index 0bcf3ef0e6c5be8607a02bcfca6195ad03538bcd..74be00ab754bd45f70ad68a60e330465bd04d71e 100644 --- a/src/partition.c +++ b/src/partition.c @@ -83,7 +83,7 @@ static int check_complete(struct space *s, int verbose, int nregions); * statistics output produced when running with task debugging enabled. */ static double repartition_costs[task_type_count][task_subtype_count]; -static void repart_init_fixed_costs(int policy); +static int repart_init_fixed_costs(void); #endif /* Vectorisation support */ @@ -1165,6 +1165,7 @@ struct weights_mapper_data { int timebins; int vweights; int nr_cells; + int use_ticks; struct cell *cells; }; @@ -1182,8 +1183,8 @@ static void check_weights(struct task *tasks, int nr_tasks, * @param num_elements the number of data elements to process. * @param extra_data additional data for the mapper context. */ -void partition_gather_weights(void *map_data, int num_elements, - void *extra_data) { +static void partition_gather_weights(void *map_data, int num_elements, + void *extra_data) { struct task *tasks = (struct task *)map_data; struct weights_mapper_data *mydata = (struct weights_mapper_data *)extra_data; @@ -1196,6 +1197,7 @@ void partition_gather_weights(void *map_data, int num_elements, int nr_cells = mydata->nr_cells; int timebins = mydata->timebins; int vweights = mydata->vweights; + int use_ticks = mydata->use_ticks; struct cell *cells = mydata->cells; @@ -1207,11 +1209,13 @@ void partition_gather_weights(void *map_data, int num_elements, if (t->type == task_type_send || t->type == task_type_recv || t->type == task_type_logger || t->implicit || t->ci == NULL) continue; - /* Get the task weight based on fixed cost for this task type. */ - //double w = repartition_costs[t->type][t->subtype]; - - /* Get the task weight based on costs. */ - double w = (double)t->toc - (double)t->tic; + /* Get weight for this task. Either based on fixed costs or task timings. */ + double w = 0.0; + if (use_ticks) { + w = (double)t->toc - (double)t->tic; + } else { + w = repartition_costs[t->type][t->subtype]; + } if (w <= 0.0) continue; /* Get the top-level cells involved. */ @@ -1378,6 +1382,8 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ticks tic = getticks(); + message("using ticks: %d", repartition->use_ticks); + threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks, sizeof(struct task), 0, &weights_data); if (s->e->verbose) @@ -1541,7 +1547,6 @@ void partition_repartition(struct repartition *reparttype, int nodeID, #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) ticks tic = getticks(); - repart_init_fixed_costs(s->e->policy); if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) { repart_edge_metis(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, @@ -1853,27 +1858,45 @@ void partition_init(struct partition *partition, repartition->ncelllist = 0; repartition->celllist = NULL; + /* Do we have fixed costs available? These can be used to force + * repartitioning at any time. Not required if not repartitioning.*/ + repartition->use_fixed_costs = + parser_get_opt_param_int(params, "DomainDecomposition:use_fixed_costs", 0); + if (repartition->type == REPART_NONE) repartition->use_fixed_costs = 0; + + /* Check if this is true or required and initialise them. */ + if (repartition->use_fixed_costs || repartition->trigger > 1) { + if (!repart_init_fixed_costs()) { + if (repartition->trigger <= 1) { + if (engine_rank == 0) + message("WARNING: fixed cost repartitioning was requested but is" + " not available."); + repartition->use_fixed_costs = 0; + } else { + error("Forced fixed cost repartitioning was requested but is" + " not available."); + } + } + } + #else error("SWIFT was not compiled with MPI support"); #endif } -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#ifdef WITH_MPI /** * @brief Set the fixed costs for repartition using METIS. * - * These are determined using a run with task debugging enabled which gives a - * statistical analysis condensed into a .h file. Note that some tasks have - * different costs depending on the engine policies, for instance the kicks - * do work with self gravity and hydro, we attempt to allow for that. Finally - * note this is a statistical solution, so requires that there are sufficient - * tasks on each rank so that the fixed costs do the right thing on average, - * you may like to used task ticks as weights if this isn't working. + * These are determined using a run with the -y flag on which produces + * a statistical analysis that is condensed into a .h file for inclusion. * - * @param policy the #engine policy. + * If the default include file is used then no fixed costs are set and this + * function will return 0. */ -static void repart_init_fixed_costs(int policy) { +static int repart_init_fixed_costs(void) { +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /* Set the default fixed cost. */ for (int j = 0; j < task_type_count; j++) { for (int k = 0; k < task_subtype_count; k++) { @@ -1881,72 +1904,14 @@ static void repart_init_fixed_costs(int policy) { } } - /* TODO: these may be separable, so we could have costs for each policy - * addition, if separated need to take care with the relative scaling. */ - if (policy & (engine_policy_hydro & engine_policy_self_gravity)) { - - /* EAGLE_50 -s -G -S 8 nodes 16 cores */ - repartition_costs[1][0] = 45842; /* sort/none */ - repartition_costs[2][1] = 15609; /* self/density */ - repartition_costs[2][3] = 18830; /* self/force */ - repartition_costs[2][4] = 202588; /* self/grav */ - repartition_costs[3][1] = 67; /* pair/density */ - repartition_costs[3][3] = 207; /* pair/force */ - repartition_costs[3][4] = 1507; /* pair/grav */ - repartition_costs[4][1] = 36268; /* sub_self/density */ - repartition_costs[4][3] = 52252; /* sub_self/force */ - repartition_costs[5][1] = 709; /* sub_pair/density */ - repartition_costs[5][3] = 1258; /* sub_pair/force */ - repartition_costs[6][0] = 1135; /* init_grav/none */ - repartition_costs[9][0] = 160; /* ghost/none */ - repartition_costs[12][0] = 7176; /* drift_part/none */ - repartition_costs[13][0] = 4502; /* drift_gpart/none */ - repartition_costs[15][0] = 8234; /* end_force/none */ - repartition_costs[16][0] = 15508; /* kick1/none */ - repartition_costs[17][0] = 15780; /* kick2/none */ - repartition_costs[18][0] = 19848; /* timestep/none */ - repartition_costs[21][0] = 4105; /* grav_long_range/none */ - repartition_costs[22][0] = 68; /* grav_mm/none */ - repartition_costs[24][0] = 16785; /* grav_down/none */ - repartition_costs[25][0] = 70632; /* grav_mesh/none */ - - } else if (policy & engine_policy_self_gravity) { - - /* EAGLE_50 -G 8 nodes 16 cores, scaled to match self/grav. */ - repartition_costs[2][4] = 202588; /* self/grav */ - repartition_costs[3][4] = 1760; /* pair/grav */ - repartition_costs[6][0] = 1610; /* init_grav/none */ - repartition_costs[13][0] = 999; /* drift_gpart/none */ - repartition_costs[15][0] = 3481; /* end_force/none */ - repartition_costs[16][0] = 6336; /* kick1/none */ - repartition_costs[17][0] = 6343; /* kick2/none */ - repartition_costs[18][0] = 13864; /* timestep/none */ - repartition_costs[21][0] = 1422; /* grav_long_range/none */ - repartition_costs[22][0] = 71; /* grav_mm/none */ - repartition_costs[24][0] = 16011; /* grav_down/none */ - repartition_costs[25][0] = 60414; /* grav_mesh/none */ - } else if (policy & engine_policy_hydro) { - - /* EAGLE_50 -s 8 nodes 16 cores, not scaled, but similar. */ - repartition_costs[1][0] = 52733; /* sort/none */ - repartition_costs[2][1] = 15458; /* self/density */ - repartition_costs[2][3] = 19212; /* self/force */ - repartition_costs[3][1] = 74; /* pair/density */ - repartition_costs[3][3] = 242; /* pair/force */ - repartition_costs[4][1] = 42895; /* sub_self/density */ - repartition_costs[4][3] = 64254; /* sub_self/force */ - repartition_costs[5][1] = 818; /* sub_pair/density */ - repartition_costs[5][3] = 1443; /* sub_pair/force */ - repartition_costs[9][0] = 159; /* ghost/none */ - repartition_costs[12][0] = 6708; /* drift_part/none */ - repartition_costs[15][0] = 6479; /* end_force/none */ - repartition_costs[16][0] = 6609; /* kick1/none */ - repartition_costs[17][0] = 6975; /* kick2/none */ - repartition_costs[18][0] = 5229; /* timestep/none */ - } -} +#include <partition_fixed_costs.h> + return HAVE_FIXED_COSTS; #endif + return 0; +} +#endif /* WITH_MPI */ + /* General support */ /* =============== */ diff --git a/src/partition.h b/src/partition.h index 1202a1d19ff18f83ed26464bade088990ed51db6..18b7079b000e95d15a366d8ce5d5484c4d5ad75f 100644 --- a/src/partition.h +++ b/src/partition.h @@ -59,6 +59,9 @@ struct repartition { int usemetis; int adaptive; + int use_fixed_costs; + int use_ticks; + /* The partition as a cell-list. */ int ncelllist; int *celllist; diff --git a/src/partition_fixed_costs.h b/src/partition_fixed_costs.h new file mode 100644 index 0000000000000000000000000000000000000000..75b7a61155cfb4eb24c978b493593c6821b23151 --- /dev/null +++ b/src/partition_fixed_costs.h @@ -0,0 +1,24 @@ +#define HAVE_FIXED_COSTS 1 +repartition_costs[1][0] = 9695; /* sort/none */ +repartition_costs[2][1] = 2279; /* self/density */ +repartition_costs[2][3] = 3489; /* self/force */ +repartition_costs[2][4] = 240418; /* self/grav */ +repartition_costs[3][1] = 179; /* pair/density */ +repartition_costs[3][3] = 329; /* pair/force */ +repartition_costs[3][4] = 4299; /* pair/grav */ +repartition_costs[4][1] = 16814; /* sub_self/density */ +repartition_costs[4][3] = 27911; /* sub_self/force */ +repartition_costs[5][1] = 837; /* sub_pair/density */ +repartition_costs[5][3] = 1454; /* sub_pair/force */ +repartition_costs[6][0] = 144; /* init_grav/none */ +repartition_costs[9][0] = 665; /* ghost/none */ +repartition_costs[12][0] = 126; /* drift_part/none */ +repartition_costs[13][0] = 234; /* drift_gpart/none */ +repartition_costs[15][0] = 763; /* end_force/none */ +repartition_costs[16][0] = 1276; /* kick1/none */ +repartition_costs[17][0] = 1405; /* kick2/none */ +repartition_costs[18][0] = 1969; /* timestep/none */ +repartition_costs[21][0] = 566; /* grav_long_range/none */ +repartition_costs[22][0] = 108; /* grav_mm/none */ +repartition_costs[24][0] = 1366; /* grav_down/none */ +repartition_costs[25][0] = 8664; /* grav_mesh/none */ diff --git a/src/task.c b/src/task.c index 51998182ab79c2f07f218f0e0aa9066a73e06f92..48db2d06f8fb77f98e70f709e9d4d56a8984ca5e 100644 --- a/src/task.c +++ b/src/task.c @@ -774,9 +774,12 @@ void task_dump_stats(struct engine *e, int step) { #endif FILE *dfile = fopen(dumpfile, "w"); + fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n"); + FILE *cfile = fopen(costsfile, "w"); + fprintf(cfile, "/* use as src/partition_fixed_costs.h */\n"); + fprintf(cfile, "#define HAVE_FIXED_COSTS 1\n"); - fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n"); for (int j = 0; j < task_type_count; j++) { const char *taskID = taskID_names[j]; for (int k = 0; k < task_subtype_count; k++) {