diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index bd5f6871e64b0bf987c80abe282f04e4906e82ad..c6836713574da33056ac63558bf2d8518b07e418 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -1,4 +1,4 @@ -# Define the system of units to use internally. +# Define the system of units to use internally. InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters @@ -38,7 +38,7 @@ Snapshots: Statistics: delta_time: 1e-2 # Time between statistics output energy_file_name: energy # (Optional) File name for energy output - timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name + timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name # Parameters for the hydrodynamics scheme SPH: @@ -50,12 +50,12 @@ SPH: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. + eta: 0.025 # Constant dimensionless multiplier for time integration. epsilon: 0.1 # Softening length (in internal units). a_smooth: 1.25 # (Optional) Smoothing scale in top-level cell sizes to smooth the long-range forces over (this is the default value). r_cut: 4.5 # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value). - + # Parameters related to the initial conditions InitialConditions: file_name: SedovBlast/sedov.hdf5 # The file to read @@ -67,15 +67,19 @@ InitialConditions: # Parameters governing domain decomposition DomainDecomposition: - initial_type: m # (Optional) The initial strategy ("g", "m", "w", or "v"). - initial_grid_x: 10 # (Optional) Grid size if the "g" strategy is chosen. - initial_grid_y: 10 # "" - initial_grid_z: 10 # "" - repartition_type: b # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x"). - fractionaltime: 0.1 # (Optional) The fractional time difference between MPI ranks required to trigger a new decomposition - + initial_type: m # (Optional) The initial strategy ("g", "m", "w", or "v"). + initial_grid_x: 10 # (Optional) Grid size if the "g" strategy is chosen. + initial_grid_y: 10 # "" + initial_grid_z: 10 # "" + repartition_type: b # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x"). + trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a + # new decomposition, or number of steps (>1) between decompositions + minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when + # using CPU time trigger + cputime_file_name: cputime # (Optional) File name for per node CPU time estimates for each step, requires --enable-task-debugging + # Parameters related to external potentials -------------------------------------------- - + # Point mass external potentials PointMassPotential: position_x: 50. # location of external point mass (internal units) @@ -92,7 +96,7 @@ IsothermalPotential: vrot: 200. # Rotation speed of isothermal potential (internal units) timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition epsilon: 0.1 # Softening size (internal units) - + # Disk-patch potential parameters DiscPatchPotential: surface_density: 10. # Surface density of the disc (internal units) diff --git a/src/engine.c b/src/engine.c index 1ac2f1b32cc10c6c214d250f6219a2f5ec239ff4..cb48c7219cf20a3f16c95553bab5fdd85395b0f1 100644 --- a/src/engine.c +++ b/src/engine.c @@ -3040,36 +3040,66 @@ void engine_step(struct engine *e) { e->wallclock_time); fflush(stdout); - fprintf(e->file_timesteps, " %6d %14e %14e %10zu %10zu %10zu %21.3f\n", + fprintf(e->file_timesteps, " %6d %14e %14e %10zu %10zu %10zu %21.3f %3d %3d\n", e->step, e->time, e->timeStep, e->updates, e->g_updates, - e->s_updates, e->wallclock_time); + e->s_updates, e->wallclock_time, (e->lastrebuild > 0), + (e->lastrepart != REPART_NONE)); fflush(e->file_timesteps); } - /* Prepare the tasks to be launched, rebuild or repartition if needed. */ - engine_prepare(e); - /* Repartition the space amongst the nodes? */ #ifdef WITH_MPI - /* CPU time used since the last step started (note not elapsed time). */ - double elapsed_cputime = e->cputoc_step - e->cputic_step; - e->cputic_step = clocks_get_cputime_used(); + /* Old style if trigger is >1 or this is the second step. */ + if (e->reparttype->trigger > 1 || e->step == 2 ) { + if (e->reparttype->trigger > 1) { + if (e->step % (int)e->reparttype->trigger == 2) + e->forcerepart = 1; + } else { + e->forcerepart = 1; + } - /* Gather the elapsed CPU times from all ranks for the last step. */ - double elapsed_cputimes[e->nr_nodes]; - MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, MPI_DOUBLE, - 0, MPI_COMM_WORLD); +#ifdef SWIFT_DEBUG_TASKS + /* Capture CPU times for comparisons with other methods. */ + double elapsed_cputime = e->cputoc_step - e->cputic_step; + e->cputic_step = clocks_get_cputime_used(); + double elapsed_cputimes[e->nr_nodes]; + MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, MPI_DOUBLE, + 0, MPI_COMM_WORLD); + if (e->nodeID == 0) { + double mintime = elapsed_cputimes[0]; + double maxtime = elapsed_cputimes[0]; + for (int k = 1; k < e->nr_nodes; k++) { + if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; + if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; + } + fprintf(e->file_cputimes, "%6d ", e->step); + for (int k = 0; k < e->nr_nodes; k++) { + fprintf(e->file_cputimes, " %14.7g", elapsed_cputimes[k]); + } + fprintf(e->file_cputimes, "\n"); + fflush(e->file_cputimes); + } +#endif - /* If all available particles of any type have been updated then consider if - * a repartition might be needed. Only worth checking when there is load on - * all ranks. */ - if (e->nodeID == 0) { - if ((e->updates != 0 && e->updates == e->total_nr_parts) || - (e->g_updates != 0 && e->g_updates == e->total_nr_gparts)) { + } else { + + /* Use cputimes from ranks to estimate the imbalance. */ + double elapsed_cputime = e->cputoc_step - e->cputic_step; + e->cputic_step = clocks_get_cputime_used(); + + /* Gather the elapsed CPU times from all ranks for the last step. */ + double elapsed_cputimes[e->nr_nodes]; + MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, MPI_DOUBLE, + 0, MPI_COMM_WORLD); + + /* If all available particles of any type have been updated then consider + * if a repartition might be needed. Only worth checking when there is + * load on all ranks, so require that some fraction of all particles have + * been processed. */ + if (e->nodeID == 0) { - /* OK we are tempted as enough particles have been updated, so check - * the distribution of elapsed times for the ranks. */ + /* Get the range of cputimes. */ double mintime = elapsed_cputimes[0]; double maxtime = elapsed_cputimes[0]; for (int k = 1; k < e->nr_nodes; k++) { @@ -3077,19 +3107,39 @@ void engine_step(struct engine *e) { if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; } - if (((maxtime - mintime) / mintime) > e->reparttype->fractionaltime) { - if (e->verbose) - message("fractionaltime %.2f > %.2f will repartition", - (maxtime - mintime) / mintime, e->reparttype->fractionaltime); - e->forcerepart = e->reparttype->type; + if ((e->updates > 1 && e->updates >= e->total_nr_parts * e->reparttype->minfrac) || + (e->g_updates > 1 && e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { + + /* Are we out of balance? */ + if (((maxtime - mintime) / mintime) > e->reparttype->trigger) { + if (e->verbose) + message("fractionaltime %.2f > %.2f will repartition", + (maxtime - mintime) / mintime, e->reparttype->trigger); + e->forcerepart = 1; + } } + +#ifdef SWIFT_DEBUG_TASKS + /* Save the cputimes for analysis. */ + fprintf(e->file_cputimes, "%6d ", e->step); + for (int k = 0; k < e->nr_nodes; k++) { + fprintf(e->file_cputimes, " %14.7g", elapsed_cputimes[k]); + } + fprintf(e->file_cputimes, "\n"); + fflush(e->file_cputimes); +#endif } } /* All nodes do this together. */ MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); + e->lastrepart = e->forcerepart; #endif + /* Prepare the tasks to be launched, rebuild or repartition if needed. */ + e->lastrebuild = e->forcerebuild; + engine_prepare(e); + /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -3538,7 +3588,9 @@ void engine_init(struct engine *e, struct space *s, e->proxy_ind = NULL; e->nr_proxies = 0; e->forcerebuild = 1; + e->lastrebuild = 1; e->forcerepart = 0; + e->lastrepart = 0; e->reparttype = reparttype; e->dump_snapshot = 0; e->links = NULL; @@ -3567,6 +3619,9 @@ void engine_init(struct engine *e, struct space *s, e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); e->file_stats = NULL; e->file_timesteps = NULL; +#if WITH_MPI + e->file_cputimes = NULL; +#endif e->deltaTimeStatistics = parser_get_param_double(params, "Statistics:delta_time"); e->timeLastStatistics = e->timeBegin - e->deltaTimeStatistics; @@ -3763,6 +3818,15 @@ void engine_init(struct engine *e, struct space *s, "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates", "Wall-clock time", clocks_getunit()); fflush(e->file_timesteps); + +#if defined(SWIFT_DEBUG_TASKS) && defined(WITH_MPI) + char cputimefileName[200] = ""; + parser_get_opt_param_string(params, "DomainDecomposition:cputime_file_name", + cputimefileName, + engine_default_cputime_file_name); + sprintf(cputimefileName + strlen(cputimefileName), ".txt"); + e->file_cputimes = fopen(cputimefileName, "w"); +#endif } /* Print policy */ diff --git a/src/engine.h b/src/engine.h index 97e18fba9a03b2f8c461ff0550c7085b66948011..bbb3ae2aa96a2a61c4b52fe41b084ad21819c5a4 100644 --- a/src/engine.h +++ b/src/engine.h @@ -80,6 +80,7 @@ extern const char *engine_policy_names[]; #define engine_redistribute_alloc_margin 1.2 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" +#define engine_default_cputime_file_name "cputime" /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; @@ -189,6 +190,9 @@ struct engine { #ifdef WITH_MPI /* CPU times at the start/end of a step. */ double cputic_step, cputoc_step; + + /* Record of these. */ + FILE *file_cputimes; #endif /* Wallclock time of the last time-step */ @@ -196,9 +200,11 @@ struct engine { /* Force the engine to rebuild? */ int forcerebuild; + int lastrebuild; /* Force the engine to repartition ? */ int forcerepart; + int lastrepart; struct repartition *reparttype; /* Need to dump a snapshot ? */ diff --git a/src/partition.c b/src/partition.c index 1878b9b4d2a615837c0658d01fcf6cdb3af9148b..39639eabbca717e9f1b28dcf137f092cfb6fbe89 100644 --- a/src/partition.c +++ b/src/partition.c @@ -793,7 +793,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID, #if defined(WITH_MPI) && defined(HAVE_METIS) - if (reparttype->type == REPART_METIS_BOTH || + if (reparttype->type == REPART_METIS_BOTH || reparttype->type == REPART_METIS_EDGE || reparttype->type == REPART_METIS_VERTEX_EDGE) { @@ -1074,13 +1074,17 @@ void partition_init(struct partition *partition, #endif } - /* Get the fraction time difference between nodes. If larger than - * this when a repartition is being considered it will be allowed. */ - repartition->fractionaltime = parser_get_opt_param_float( - params, "DomainDecomposition:fractionaltime", 0.1); - if (repartition->fractionaltime < 0 || repartition->fractionaltime > 1) - error( - "Invalid DomainDecomposition:fractionaltime, must be in range 0 to 1"); + /* Get the fraction CPU time difference between nodes (<1) or the number + * of steps between repartitions (>1). */ + repartition->trigger = parser_get_opt_param_float(params, "DomainDecomposition:trigger", 0.05f); + if (repartition->trigger <= 0) + error("Invalid DomainDecomposition:trigger, must be greater than zero"); + + /* Fraction of particles that should be updated before a repartition + * based on CPU time is considered. */ + repartition->minfrac = parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.9f); + if (repartition->minfrac <= 0 || repartition->minfrac > 1) + error("Invalid DomainDecomposition:minfrac, must be greater than 0 and less than equal to 1"); #else error("SWIFT was not compiled with MPI support"); diff --git a/src/partition.h b/src/partition.h index 35708de96dafc0a6ccef8c17a468e9699cc7cfaf..03523b165a930b085224e458ac0dd8c8232a578d 100644 --- a/src/partition.h +++ b/src/partition.h @@ -52,7 +52,8 @@ enum repartition_type { /* Repartition preferences. */ struct repartition { enum repartition_type type; - float fractionaltime; + float trigger; + float minfrac; }; /* Simple descriptions of types for reports. */