From 48c45be5b5b2794a9633c0a040372113ba1cc485 Mon Sep 17 00:00:00 2001 From: "Peter W. Draper" <p.w.draper@durham.ac.uk> Date: Tue, 6 Dec 2016 17:45:47 +0000 Subject: [PATCH] Make fraction time difference that triggers a repartition a parameter --- examples/main.c | 8 +++---- examples/parameter_example.yml | 9 +++---- src/engine.c | 43 ++++++++++++++-------------------- src/engine.h | 2 +- src/partition.c | 30 ++++++++++++++---------- src/partition.h | 9 ++++++- 6 files changed, 53 insertions(+), 48 deletions(-) diff --git a/examples/main.c b/examples/main.c index aea55d3ca2..103f76048d 100644 --- a/examples/main.c +++ b/examples/main.c @@ -323,10 +323,10 @@ int main(int argc, char *argv[]) { #endif /* Prepare the domain decomposition scheme */ - enum repartition_type reparttype = REPART_NONE; + struct repartition repartition; #ifdef WITH_MPI struct partition initial_partition; - partition_init(&initial_partition, &reparttype, params, nr_nodes); + partition_init(&initial_partition, &repartition, params, nr_nodes); /* Let's report what we did */ if (myrank == 0) { @@ -335,7 +335,7 @@ int main(int argc, char *argv[]) { if (initial_partition.type == INITPART_GRID) message("grid set to [ %i %i %i ].", initial_partition.grid[0], initial_partition.grid[1], initial_partition.grid[2]); - message("Using %s repartitioning", repartition_name[reparttype]); + message("Using %s repartitioning", repartition_name[repartition.type]); } #endif @@ -551,7 +551,7 @@ int main(int argc, char *argv[]) { timers_reset(timers_mask_all); /* Take a step. */ - engine_step(&e, reparttype); + engine_step(&e, &repartition); #ifdef SWIFT_DEBUG_TASKS /* Dump the task data using the given frequency. */ diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 899bfb0224..6300d4831a 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -58,11 +58,12 @@ InitialConditions: # Parameters governing domain decomposition DomainDecomposition: - initial_type: m # (Optional) The initial strategy ("g", "m", "w", or "v"). + initial_type: m # (Optional) The initial strategy ("g", "m", "w", or "v"). initial_grid_x: 10 # (Optional) Grid size if the "g" strategy is chosen. - initial_grid_y: 10 - initial_grid_z: 10 - repartition_type: b # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x"). + initial_grid_y: 10 # "" + initial_grid_z: 10 # "" + repartition_type: b # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x"). + fractionaltime 0.1 # (Optional) The fractional time difference between MPI ranks required to trigger a new decomposition # Parameters related to external potentials -------------------------------------------- diff --git a/src/engine.c b/src/engine.c index 6a3169c4b4..3f91dc96f5 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2562,9 +2562,9 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { * @brief Let the #engine loose to compute the forces. * * @param e The #engine. - * @param reparttype type of repartitioning to use (MPI only) -*/ -void engine_step(struct engine *e, enum repartition_type reparttype) { + * @param repartition repartitioning struct. + */ +void engine_step(struct engine *e, struct repartition *repartition) { double snapshot_drift_time = 0.; @@ -2574,6 +2574,7 @@ void engine_step(struct engine *e, enum repartition_type reparttype) { clocks_gettime(&time1); #ifdef WITH_MPI + /* Time since the last step started. */ double elapsed_time = elapsed(e->toc_step, e->tic_step); #endif e->tic_step = getticks(); @@ -2581,41 +2582,33 @@ void engine_step(struct engine *e, enum repartition_type reparttype) { /* Recover the (integer) end of the next time-step */ engine_collect_timestep(e); - /* If the last step updated all particles then repartition the space around - * the nodes. - * - * XXX handle fixdt, that will repartition all the time. - * - * XXX Look at node balance, try to use that to decide if necessary. - */ #ifdef WITH_MPI - /* Gather the elapsed times from all ranks for the last step. - * These are used to determine if repartitioning might be necessary. */ + + /* Gather the elapsed times from all ranks for the last step. */ double elapsed_times[e->nr_nodes]; MPI_Gather(&elapsed_time, 1, MPI_DOUBLE, elapsed_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + /* If all available particles of any type have been updated then consider + * if a repartition might be needed. */ if (e->nodeID == 0) { if ((e->updates != 0 && e->updates == e->total_nr_parts) || (e->g_updates != 0 && e->g_updates == e->total_nr_gparts)) { /* OK we are tempted as enough particles have been updated, so check - * the distribution of elapsed times for the ranks.*/ + * the distribution of elapsed times for the ranks. */ double mintime = elapsed_times[0]; double maxtime = elapsed_times[0]; for (int k = 1; k < e->nr_nodes; k++) { if (elapsed_times[k] > maxtime) maxtime = elapsed_times[k]; if (elapsed_times[k] < mintime) mintime = elapsed_times[k]; } - if (((maxtime - mintime) / mintime) > 0.1) { - /* 10% variation. */ - message("will repartition %ld %ld %ld %ld", e->updates, - e->total_nr_parts, e->g_updates, e->total_nr_gparts); - e->forcerepart = reparttype; + if (((maxtime - mintime) / mintime) > repartition->fractionaltime) { + message("repartition variance: %f", (maxtime - mintime) / mintime); + e->forcerepart = repartition->type; } else { - message("will not repartition, variance too small: %f", - (maxtime - mintime) / mintime); + message("no repartition variance: %f", (maxtime - mintime) / mintime); } } } @@ -2659,14 +2652,12 @@ void engine_step(struct engine *e, enum repartition_type reparttype) { if (e->nodeID == 0) { /* Print some information to the screen */ - printf(" %6d %14e %14e %10zu %10zu %21.3f %s\n", e->step, e->time, - e->timeStep, e->updates, e->g_updates, e->wallclock_time, - clocks_get_timesincestart()); + printf(" %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time, + e->timeStep, e->updates, e->g_updates, e->wallclock_time); fflush(stdout); - fprintf(e->file_timesteps, " %6d %14e %14e %10zu %10zu %21.3f %s\n", e->step, - e->time, e->timeStep, e->updates, e->g_updates, e->wallclock_time, - clocks_get_timesincestart()); + fprintf(e->file_timesteps, " %6d %14e %14e %10zu %10zu %21.3f\n", e->step, + e->time, e->timeStep, e->updates, e->g_updates, e->wallclock_time); fflush(e->file_timesteps); } diff --git a/src/engine.h b/src/engine.h index d97a0369e3..7843030fa0 100644 --- a/src/engine.h +++ b/src/engine.h @@ -236,7 +236,7 @@ void engine_launch(struct engine *e, int nr_runners); void engine_prepare(struct engine *e, int nodrift); void engine_print(struct engine *e); void engine_init_particles(struct engine *e, int flag_entropy_ICs); -void engine_step(struct engine *e, enum repartition_type reparttype); +void engine_step(struct engine *e, struct repartition *repartition); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); void engine_exchange_strays(struct engine *e, size_t offset_parts, diff --git a/src/partition.c b/src/partition.c index 89ba3f2835..dbbe54d202 100644 --- a/src/partition.c +++ b/src/partition.c @@ -709,8 +709,7 @@ static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) { * Note that at the end of this process all the cells will be re-distributed * across the nodes, but the particles themselves will not be. * - * @param reparttype the type of repartition to attempt, see the repart_type - *enum. + * @param repartition repartition struct. * @param nodeID our nodeID. * @param nr_nodes the number of nodes. * @param s the space of cells holding our local particles. @@ -723,7 +722,8 @@ void partition_repartition(enum repartition_type reparttype, int nodeID, #if defined(WITH_MPI) && defined(HAVE_METIS) - if (reparttype == REPART_METIS_BOTH || reparttype == REPART_METIS_EDGE || + if (reparttype == REPART_METIS_BOTH || + reparttype == REPART_METIS_EDGE || reparttype == REPART_METIS_VERTEX_EDGE) { int partweights; @@ -905,15 +905,15 @@ void partition_initial_partition(struct partition *initial_partition, /** * @brief Initialises the partition and re-partition scheme from the parameter - *file + * file * * @param partition The #partition scheme to initialise. - * @param reparttype The repartition scheme to initialise. + * @param repartition The #repartition scheme to initialise. * @param params The parsed parameter file. * @param nr_nodes The number of MPI nodes we are running on. */ void partition_init(struct partition *partition, - enum repartition_type *reparttype, + struct repartition *repartition, const struct swift_params *params, int nr_nodes) { #ifdef WITH_MPI @@ -921,7 +921,6 @@ void partition_init(struct partition *partition, /* Defaults make use of METIS if available */ #ifdef HAVE_METIS char default_repart = 'b'; - ; char default_part = 'm'; #else char default_repart = 'n'; @@ -979,20 +978,20 @@ void partition_init(struct partition *partition, switch (repart_type) { case 'n': - *reparttype = REPART_NONE; + repartition->type = REPART_NONE; break; #ifdef HAVE_METIS case 'b': - *reparttype = REPART_METIS_BOTH; + repartition->type = REPART_METIS_BOTH; break; case 'e': - *reparttype = REPART_METIS_EDGE; + repartition->type = REPART_METIS_EDGE; break; case 'v': - *reparttype = REPART_METIS_VERTEX; + repartition->type = REPART_METIS_VERTEX; break; case 'x': - *reparttype = REPART_METIS_VERTEX_EDGE; + repartition->type = REPART_METIS_VERTEX_EDGE; break; default: message("Invalid choice of re-partition type '%c'.", repart_type); @@ -1004,6 +1003,13 @@ void partition_init(struct partition *partition, #endif } + /* Get the fraction time difference between nodes. If larger than + * this when a repartition is being considered it will be allowed. */ + repartition->fractionaltime = parser_get_opt_param_float( + params, "DomainDecomposition:fractionaltime", 0.1); + if (repartition->fractionaltime < 0 || repartition->fractionaltime > 1) + error("Invalid DomainDecomposition:fractionaltime, must be in range 0 to 1"); + #else error("SWIFT was not compiled with MPI support"); #endif diff --git a/src/partition.h b/src/partition.h index b2a132ed48..e87afb484a 100644 --- a/src/partition.h +++ b/src/partition.h @@ -39,6 +39,7 @@ struct partition { enum partition_type type; int grid[3]; }; + /* Repartition type to use. */ enum repartition_type { REPART_NONE = 0, @@ -48,6 +49,12 @@ enum repartition_type { REPART_METIS_VERTEX_EDGE }; +/* Repartition preferences. */ +struct repartition { + enum repartition_type type; + float fractionaltime; +}; + /* Simple descriptions of types for reports. */ extern const char *repartition_name[]; @@ -60,7 +67,7 @@ void partition_initial_partition(struct partition *initial_partition, int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeID, struct space *s); void partition_init(struct partition *partition, - enum repartition_type *reparttypestruct, + struct repartition *repartition, const struct swift_params *params, int nr_nodes); #endif /* SWIFT_PARTITION_H */ -- GitLab