diff --git a/.gitignore b/.gitignore index b44b5189190bd6593368c9ce8fbbd19d1d34d6ce..3c6ecf6ece8e10520e37bd5f305a533e28976e33 100644 --- a/.gitignore +++ b/.gitignore @@ -29,9 +29,9 @@ examples/*/*.txt examples/*/used_parameters.yml examples/*/*/*.xmf examples/*/*/*.hdf5 +examples/*/*/*.png examples/*/*/*.txt examples/*/*/used_parameters.yml -examples/*/*.png tests/testPair tests/brute_force_standard.dat diff --git a/configure.ac b/configure.ac index c94f5c23b9cc5868a199ad4d177053c6f94f639d..224b7f722fda80a653bd47914a0ffda3881e49f1 100644 --- a/configure.ac +++ b/configure.ac @@ -105,8 +105,9 @@ if test "$enable_mpi" = "yes"; then AC_PATH_PROG([MPIRUN],[mpirun],[notfound]) fi if test "$MPIRUN" = "notfound"; then + # This may not be fatal (some systems do not allow mpirun on + # development nodes)., so push on. AC_MSG_WARN([Cannot find mpirun command on PATH, thread support may not be correct]) - enable_mpi="no" else # Special options we know about. # Intel: -mt_mpi diff --git a/examples/SedovBlast_1D/sedov.yml b/examples/SedovBlast_1D/sedov.yml index 2a15d6ed22e71735c04274cee3f719b9d21f170e..87ab2e81e11e184eebbe024ea75ac9205f7530c5 100644 --- a/examples/SedovBlast_1D/sedov.yml +++ b/examples/SedovBlast_1D/sedov.yml @@ -11,7 +11,7 @@ TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). time_end: 5e-2 # The end time of the simulation (in internal units). dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + dt_max: 1e-5 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: diff --git a/examples/SodShock_2D/makeIC.py b/examples/SodShock_2D/makeIC.py index fdc1610df8cb87b3057323b1330e4c3044f36241..850ca24f54c39990a9b0c54c0d2f361a2aa01e95 100644 --- a/examples/SodShock_2D/makeIC.py +++ b/examples/SodShock_2D/makeIC.py @@ -88,7 +88,7 @@ file = h5py.File(fileName, 'w') # Header grp = file.create_group("/Header") -grp.attrs["BoxSize"] = [boxSize, 0.5, 0.1] +grp.attrs["BoxSize"] = [boxSize, 0.5, 1.0] grp.attrs["NumPart_Total"] = [numPart, 0, 0, 0, 0, 0] grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0] diff --git a/examples/main.c b/examples/main.c index 8d4289c872ce34ba626cacffca4097c0093fe0c3..034b800887928c049a610c27ef7c916573c71be6 100644 --- a/examples/main.c +++ b/examples/main.c @@ -348,7 +348,7 @@ int main(int argc, char *argv[]) { #endif /* Prepare the domain decomposition scheme */ - enum repartition_type reparttype = REPART_NONE; + struct repartition reparttype; #ifdef WITH_MPI struct partition initial_partition; partition_init(&initial_partition, &reparttype, params, nr_nodes); @@ -360,7 +360,7 @@ int main(int argc, char *argv[]) { if (initial_partition.type == INITPART_GRID) message("grid set to [ %i %i %i ].", initial_partition.grid[0], initial_partition.grid[1], initial_partition.grid[2]); - message("Using %s repartitioning", repartition_name[reparttype]); + message("Using %s repartitioning", repartition_name[reparttype.type]); } #endif @@ -444,8 +444,8 @@ int main(int argc, char *argv[]) { long long N_total[3] = {0, 0, 0}; #if defined(WITH_MPI) long long N_long[3] = {Ngas, Ngpart, Nspart}; - MPI_Reduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, 0, - MPI_COMM_WORLD); + MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD); #else N_total[0] = Ngas; N_total[1] = Ngpart; @@ -527,10 +527,10 @@ int main(int argc, char *argv[]) { /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); struct engine e; - engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, with_aff, - engine_policies, talking, reparttype, &us, &prog_const, - &hydro_properties, &gravity_properties, &potential, &cooling_func, - &sourceterms); + engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, N_total[0], + N_total[1], with_aff, engine_policies, talking, &reparttype, &us, + &prog_const, &hydro_properties, &gravity_properties, &potential, + &cooling_func, &sourceterms); if (myrank == 0) { clocks_gettime(&toc); message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 7c69d19bc215472c90bb1b91f23f46702afc64f2..501863a7047df33592fe77b523fdb211dfb7d16b 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -1,4 +1,4 @@ -# Define the system of units to use internally. +# Define the system of units to use internally. InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters @@ -38,7 +38,7 @@ Snapshots: Statistics: delta_time: 1e-2 # Time between statistics output energy_file_name: energy # (Optional) File name for energy output - timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name + timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name # Parameters for the hydrodynamics scheme SPH: @@ -51,11 +51,11 @@ SPH: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. + eta: 0.025 # Constant dimensionless multiplier for time integration. epsilon: 0.1 # Softening length (in internal units). a_smooth: 1.25 # (Optional) Smoothing scale in top-level cell sizes to smooth the long-range forces over (this is the default value). r_cut: 4.5 # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value). - + # Parameters related to the initial conditions InitialConditions: file_name: SedovBlast/sedov.hdf5 # The file to read @@ -67,14 +67,20 @@ InitialConditions: # Parameters governing domain decomposition DomainDecomposition: - initial_type: m # (Optional) The initial strategy ("g", "m", "w", or "v"). - initial_grid_x: 10 # (Optional) Grid size if the "g" strategy is chosen. - initial_grid_y: 10 - initial_grid_z: 10 - repartition_type: b # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x"). - + initial_type: simple_metis # (Optional) The initial decomposition strategy: "grid", + # "simple_metis", "weighted_metis", or "vectorized". + initial_grid_x: 10 # (Optional) Grid size if the "grid" strategy is chosen. + initial_grid_y: 10 # "" + initial_grid_z: 10 # "" + repartition_type: task_weights # (Optional) The re-decomposition strategy: "none", "task_weights", "particle_weights", + # "edge_task_weights" or "hybrid_weights". + trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a + # new decomposition, or number of steps (>1) between decompositions + minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when + # using CPU time trigger + # Parameters related to external potentials -------------------------------------------- - + # Point mass external potentials PointMassPotential: position_x: 50. # location of external point mass (internal units) @@ -91,7 +97,7 @@ IsothermalPotential: vrot: 200. # Rotation speed of isothermal potential (internal units) timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition epsilon: 0.1 # Softening size (internal units) - + # Disk-patch potential parameters DiscPatchPotential: surface_density: 10. # Surface density of the disc (internal units) diff --git a/src/clocks.c b/src/clocks.c index 5d662a2c1cc67d56c6f08b334554935cdecd4611..d5f0e571fe0a4694c4088bb05014fafa99d60488 100644 --- a/src/clocks.c +++ b/src/clocks.c @@ -244,3 +244,22 @@ const char *clocks_get_timesincestart() { return buffer; } + +/** + * @brief return the cpu time used. + * + * Uses the times(2) function to access the user cpu times and returns the sum + * of these for the process tree, i.e. current process plus "waited-for" + * children. This may be pthread implementation specific as to what that + * exactly means. Note we do not include the system time as that includes + * spin times and we don't want to give credit for that. + * + * @result cpu time used in sysconf(_SC_CLK_TCK) ticks, usually 100/s not our + * usual ticks. + */ +double clocks_get_cputime_used() { + + struct tms tmstic; + times(&tmstic); + return (double)(tmstic.tms_utime + tmstic.tms_cutime); +} diff --git a/src/clocks.h b/src/clocks.h index 1b4640d605c5ff5d0d9a3d07af158e346695974e..4c69e130d9bfc5e61fb03fc9820ffc76d2440dc4 100644 --- a/src/clocks.h +++ b/src/clocks.h @@ -19,6 +19,7 @@ #ifndef SWIFT_CLOCKS_H #define SWIFT_CLOCKS_H +#include <sys/times.h> #include <time.h> #include "cycle.h" @@ -41,4 +42,6 @@ double clocks_from_ticks(ticks tics); double clocks_diff_ticks(ticks tic, ticks toc); const char *clocks_get_timesincestart(); +double clocks_get_cputime_used(); + #endif /* SWIFT_CLOCKS_H */ diff --git a/src/dimension.h b/src/dimension.h index f81f953f664c458f426a83e345e91921b28a55b8..7c58b5b846490e8f5227a232eaaeb3df5995f795 100644 --- a/src/dimension.h +++ b/src/dimension.h @@ -39,23 +39,26 @@ #define hydro_dimension 3.f #define hydro_dimension_inv 0.3333333333f -#define hydro_dimention_unit_sphere ((float)(4. * M_PI / 3.)) +#define hydro_dimension_unit_sphere ((float)(4. * M_PI / 3.)) +#define hydro_dimension_unit_sphere_inv ((float)(3. * M_1_PI / 4.)) #elif defined(HYDRO_DIMENSION_2D) #define hydro_dimension 2.f #define hydro_dimension_inv 0.5f -#define hydro_dimention_unit_sphere ((float)M_PI) +#define hydro_dimension_unit_sphere ((float)M_PI) +#define hydro_dimension_unit_sphere_inv ((float)M_1_PI) #elif defined(HYDRO_DIMENSION_1D) #define hydro_dimension 1.f #define hydro_dimension_inv 1.f -#define hydro_dimention_unit_sphere 2.f +#define hydro_dimension_unit_sphere 2.f +#define hydro_dimension_unit_sphere_inv 0.5f #else -#error "A problem dimensionality must be chosen in const.h !" +#error "A problem dimensionality must be chosen in config.h !" #endif @@ -201,6 +204,35 @@ invert_dimension_by_dimension_matrix(float A[3][3]) { #endif } +/** + * @brief Get the radius of a dimension sphere with the given volume + * + * @param volume Volume of the dimension sphere + * @return Radius of the dimension sphere + */ +__attribute__((always_inline)) INLINE static float get_radius_dimension_sphere( + float volume) { + +#if defined(HYDRO_DIMENSION_3D) + + return cbrtf(volume * hydro_dimension_unit_sphere_inv); + +#elif defined(HYDRO_DIMENSION_2D) + + return sqrtf(volume * hydro_dimension_unit_sphere_inv); + +#elif defined(HYDRO_DIMENSION_1D) + + return volume * hydro_dimension_unit_sphere_inv; + +#else + + error("The dimension is not defined !"); + return 0.f; + +#endif +} + /* ------------------------------------------------------------------------- */ #ifdef WITH_VECTORIZATION diff --git a/src/engine.c b/src/engine.c index 87f10f60b2e802d3ca8dac672125a507e87b9f77..fd6cb92bb09604dafac751160413932c19788469 100644 --- a/src/engine.c +++ b/src/engine.c @@ -882,6 +882,84 @@ void engine_repartition(struct engine *e) { #endif } +/** + * @brief Decide whether trigger a repartition the cells amongst the nodes. + * + * @param e The #engine. + */ +void engine_repartition_trigger(struct engine *e) { + +#ifdef WITH_MPI + + /* Do nothing if there have not been enough steps since the last + * repartition, don't want to repeat this too often or immediately after + * a repartition step. */ + if (e->step - e->last_repartition > 2) { + + /* Old style if trigger is >1 or this is the second step (want an early + * repartition following the initial repartition). */ + if (e->reparttype->trigger > 1 || e->step == 2) { + if (e->reparttype->trigger > 1) { + if (e->step % (int)e->reparttype->trigger == 2) e->forcerepart = 1; + } else { + e->forcerepart = 1; + } + + } else { + + /* Use cputimes from ranks to estimate the imbalance. */ + /* First check if we are going to skip this stage anyway, if so do that + * now. If is only worth checking the CPU loads when we have processed a + * significant number of all particles. */ + if ((e->updates > 1 && + e->updates >= e->total_nr_parts * e->reparttype->minfrac) || + (e->g_updates > 1 && + e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { + + /* Get CPU time used since the last call to this function. */ + double elapsed_cputime = + clocks_get_cputime_used() - e->cputime_last_step; + + /* Gather the elapsed CPU times from all ranks for the last step. */ + double elapsed_cputimes[e->nr_nodes]; + MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, + MPI_DOUBLE, 0, MPI_COMM_WORLD); + if (e->nodeID == 0) { + + /* Get the range and mean of cputimes. */ + double mintime = elapsed_cputimes[0]; + double maxtime = elapsed_cputimes[0]; + double sum = elapsed_cputimes[0]; + for (int k = 1; k < e->nr_nodes; k++) { + if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; + if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; + sum += elapsed_cputimes[k]; + } + double mean = sum / (double)e->nr_nodes; + + /* Are we out of balance? */ + if (((maxtime - mintime) / mean) > e->reparttype->trigger) { + if (e->verbose) + message("trigger fraction %.3f exceeds %.3f will repartition", + (maxtime - mintime) / mintime, e->reparttype->trigger); + e->forcerepart = 1; + } + } + + /* All nodes do this together. */ + MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); + } + } + + /* Remember we did this. */ + if (e->forcerepart) e->last_repartition = e->step; + } + + /* We always reset CPU time for next check. */ + e->cputime_last_step = clocks_get_cputime_used(); +#endif +} + /** * @brief Add up/down gravity tasks to a cell hierarchy. * @@ -3041,7 +3119,9 @@ void engine_step(struct engine *e) { struct clocks_time time1, time2; clocks_gettime(&time1); +#ifdef SWIFT_DEBUG_TASKS e->tic_step = getticks(); +#endif if (e->nodeID == 0) { @@ -3069,9 +3149,9 @@ void engine_step(struct engine *e) { /* Prepare the tasks to be launched, rebuild or repartition if needed. */ engine_prepare(e); -/* Repartition the space amongst the nodes? */ -#if defined(WITH_MPI) && defined(HAVE_METIS) - if (e->step % 100 == 2) e->forcerepart = 1; +#ifdef WITH_MPI + /* Repartition the space amongst the nodes? */ + engine_repartition_trigger(e); #endif /* Are we drifting everything (a la Gadget/GIZMO) ? */ @@ -3144,9 +3224,12 @@ void engine_step(struct engine *e) { TIMER_TOC2(timer_step); clocks_gettime(&time2); - e->wallclock_time = (float)clocks_diff(&time1, &time2); + +#ifdef SWIFT_DEBUG_TASKS + /* Time in ticks at the end of this step. */ e->toc_step = getticks(); +#endif } /** @@ -3530,6 +3613,8 @@ void engine_unpin() { * @param nr_nodes The number of MPI ranks. * @param nodeID The MPI rank of this node. * @param nr_threads The number of threads per MPI rank. + * @param Ngas total number of gas particles in the simulation. + * @param Ndm total number of gravity particles in the simulation. * @param with_aff use processor affinity, if supported. * @param policy The queuing policy to use. * @param verbose Is this #engine talkative ? @@ -3544,8 +3629,8 @@ void engine_unpin() { */ void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, - int nr_threads, int with_aff, int policy, int verbose, - enum repartition_type reparttype, + int nr_threads, int Ngas, int Ndm, int with_aff, int policy, + int verbose, struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, const struct hydro_props *hydro, @@ -3564,6 +3649,8 @@ void engine_init(struct engine *e, struct space *s, e->step = 0; e->nr_nodes = nr_nodes; e->nodeID = nodeID; + e->total_nr_parts = Ngas; + e->total_nr_gparts = Ndm; e->proxy_ind = NULL; e->nr_proxies = 0; e->forcerebuild = 1; @@ -3611,6 +3698,10 @@ void engine_init(struct engine *e, struct space *s, e->cooling_func = cooling_func; e->sourceterms = sourceterms; e->parameter_file = params; +#ifdef WITH_MPI + e->cputime_last_step = 0; + e->last_repartition = -1; +#endif engine_rank = nodeID; /* Make the space link back to the engine. */ diff --git a/src/engine.h b/src/engine.h index 80903ee78156c7e4efb2650e59e4fa832fecbfa3..a0e32ad15b79c364d13d19589f8462ff8705ee29 100644 --- a/src/engine.h +++ b/src/engine.h @@ -144,6 +144,9 @@ struct engine { /* Number of particles updated */ size_t updates, g_updates, s_updates; + /* Total numbers of particles in the system. */ + size_t total_nr_parts, total_nr_gparts; + /* The internal system of units */ const struct unit_system *internal_units; @@ -181,8 +184,18 @@ struct engine { struct proxy *proxies; int nr_proxies, *proxy_ind; +#ifdef SWIFT_DEBUG_TASKS /* Tic/toc at the start/end of a step. */ ticks tic_step, toc_step; +#endif + +#ifdef WITH_MPI + /* CPU time of the last step. */ + double cputime_last_step; + + /* Step of last repartition. */ + int last_repartition; +#endif /* Wallclock time of the last time-step */ float wallclock_time; @@ -192,7 +205,7 @@ struct engine { /* Force the engine to repartition ? */ int forcerepart; - enum repartition_type reparttype; + struct repartition *reparttype; /* Need to dump some statistics ? */ int save_stats; @@ -240,14 +253,14 @@ void engine_drift_all(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, - int nr_threads, int with_aff, int policy, int verbose, - enum repartition_type reparttype, + int nr_threads, int Ngas, int Ndm, int with_aff, int policy, + int verbose, struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, const struct hydro_props *hydro, const struct gravity_props *gravity, const struct external_potential *potential, - const struct cooling_function_data *cooling, + const struct cooling_function_data *cooling_func, struct sourceterms *sourceterms); void engine_launch(struct engine *e, int nr_runners); void engine_prepare(struct engine *e); @@ -262,6 +275,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, size_t *Nspart); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); +void engine_repartition_trigger(struct engine *e); void engine_makeproxies(struct engine *e); void engine_redistribute(struct engine *e); void engine_print_policy(struct engine *e); diff --git a/src/equation_of_state.h b/src/equation_of_state.h index 5e570fc6343f11eb2c71720cfd51afe52161ff02..28c97c7b96b778c7bbb7bcbfb6ffe682ce54ba22 100644 --- a/src/equation_of_state.h +++ b/src/equation_of_state.h @@ -69,6 +69,21 @@ __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy( return entropy * pow_gamma(density); } +/** + * @brief Returns the entropy given density and pressure. + * + * Computes \f$A = \frac{P}{\rho^\gamma}\f$. + * + * @param density The density \f$\rho\f$. + * @param pressure The pressure \f$P\f$. + * @return The entropy \f$A\f$. + */ +__attribute__((always_inline)) INLINE static float gas_entropy_from_pressure( + float density, float pressure) { + + return pressure * pow_minus_gamma(density); +} + /** * @brief Returns the sound speed given density and entropy * @@ -111,6 +126,20 @@ gas_pressure_from_internal_energy(float density, float u) { return hydro_gamma_minus_one * u * density; } +/** + * @brief Returns the internal energy given density and pressure. + * + * Computes \f$u = \frac{1}{\gamma - 1}\frac{P}{\rho}\f$. + * + * @param density The density \f$\rho\f$. + * @param pressure The pressure \f$P\f$. + * @return The internal energy \f$u\f$. + */ +__attribute__((always_inline)) INLINE static float +gas_internal_energy_from_pressure(float density, float pressure) { + return hydro_one_over_gamma_minus_one * pressure / density; +} + /** * @brief Returns the sound speed given density and internal energy * @@ -172,6 +201,22 @@ __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy( return hydro_gamma_minus_one * const_isothermal_internal_energy * density; } +/** + * @brief Returns the entropy given density and pressure. + * + * Computes \f$A = \frac{P}{\rho^\gamma}\f$. + * + * @param density The density \f$\rho\f$. + * @param pressure The pressure \f$P\f$ (ignored). + * @return The entropy \f$A\f$. + */ +__attribute__((always_inline)) INLINE static float gas_entropy_from_pressure( + float density, float pressure) { + + return hydro_gamma_minus_one * const_isothermal_internal_energy * + pow_minus_gamma_minus_one(density); +} + /** * @brief Returns the sound speed given density and entropy * @@ -219,6 +264,20 @@ gas_pressure_from_internal_energy(float density, float u) { return hydro_gamma_minus_one * const_isothermal_internal_energy * density; } +/** + * @brief Returns the internal energy given density and pressure. + * + * Just returns the constant internal energy. + * + * @param density The density \f$\rho\f$ (ignored). + * @param pressure The pressure \f$P\f$ (ignored). + * @return The internal energy \f$u\f$ (which is constant). + */ +__attribute__((always_inline)) INLINE static float +gas_internal_energy_from_pressure(float density, float pressure) { + return const_isothermal_energy; +} + /** * @brief Returns the sound speed given density and internal energy * diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index de495533a966af2986fd788cff673f434f79ad7c..e791360c1a95533186e2588bc03fce90dc269442 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -230,7 +230,7 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] kernel_gamma_inv_dim) /* Kernel normalisation constant (volume term) */ -#define kernel_norm ((float)(hydro_dimention_unit_sphere * kernel_gamma_dim)) +#define kernel_norm ((float)(hydro_dimension_unit_sphere * kernel_gamma_dim)) /* ------------------------------------------------------------------------- */ diff --git a/src/parallel_io.c b/src/parallel_io.c index 89945b58d96f22c93699ebe78935cd00a0fc3d54..b857fd76a53738b19e5b26b8717881e71c424b6e 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -441,10 +441,17 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, N_total[ptype] = (numParticles[ptype]) + (numParticles_highWord[ptype] << 32); + /* Get the box size if not cubic */ dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; + /* Change box size in the 1D and 2D case */ + if (hydro_dimension == 2) + dim[2] = min(dim[0], dim[1]); + else if (hydro_dimension == 1) + dim[2] = dim[1] = dim[0]; + /* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */ /* N_total[0], (periodic ? "": "non-"), dim[0], */ /* dim[1], dim[2]); */ diff --git a/src/partition.c b/src/partition.c index 5a4d2f8d2e0bb4a3f1c8d1b3e7dc32293965c7dc..49dbf883e0dea3f00c93ca33ec8cb0248bbfbfaa 100644 --- a/src/partition.c +++ b/src/partition.c @@ -780,31 +780,31 @@ static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) { * Note that at the end of this process all the cells will be re-distributed * across the nodes, but the particles themselves will not be. * - * @param reparttype the type of repartition to attempt, see the repart_type - *enum. + * @param reparttype #repartition struct * @param nodeID our nodeID. * @param nr_nodes the number of nodes. * @param s the space of cells holding our local particles. * @param tasks the completed tasks from the last engine step for our node. * @param nr_tasks the number of tasks. */ -void partition_repartition(enum repartition_type reparttype, int nodeID, +void partition_repartition(struct repartition *reparttype, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks) { #if defined(WITH_MPI) && defined(HAVE_METIS) - if (reparttype == REPART_METIS_BOTH || reparttype == REPART_METIS_EDGE || - reparttype == REPART_METIS_VERTEX_EDGE) { + if (reparttype->type == REPART_METIS_BOTH || + reparttype->type == REPART_METIS_EDGE || + reparttype->type == REPART_METIS_VERTEX_EDGE) { int partweights; int bothweights; - if (reparttype == REPART_METIS_VERTEX_EDGE) + if (reparttype->type == REPART_METIS_VERTEX_EDGE) partweights = 1; else partweights = 0; - if (reparttype == REPART_METIS_BOTH) + if (reparttype->type == REPART_METIS_BOTH) bothweights = 1; else bothweights = 0; @@ -812,10 +812,14 @@ void partition_repartition(enum repartition_type reparttype, int nodeID, repart_edge_metis(partweights, bothweights, nodeID, nr_nodes, s, tasks, nr_tasks); - } else if (reparttype == REPART_METIS_VERTEX) { + } else if (reparttype->type == REPART_METIS_VERTEX) { repart_vertex_metis(s, nodeID, nr_nodes); + } else if (reparttype->type == REPART_NONE) { + + /* Doing nothing. */ + } else { error("Unknown repartition type"); } @@ -976,27 +980,26 @@ void partition_initial_partition(struct partition *initial_partition, /** * @brief Initialises the partition and re-partition scheme from the parameter - *file + * file * * @param partition The #partition scheme to initialise. - * @param reparttype The repartition scheme to initialise. + * @param repartition The #repartition scheme to initialise. * @param params The parsed parameter file. * @param nr_nodes The number of MPI nodes we are running on. */ void partition_init(struct partition *partition, - enum repartition_type *reparttype, + struct repartition *repartition, const struct swift_params *params, int nr_nodes) { #ifdef WITH_MPI /* Defaults make use of METIS if available */ #ifdef HAVE_METIS - char default_repart = 'b'; - ; - char default_part = 'm'; + const char *default_repart = "task_weights"; + const char *default_part = "simple_metis"; #else - char default_repart = 'n'; - char default_part = 'g'; + const char *default_repart = "none"; + const char *default_part = "grid"; #endif /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ @@ -1007,10 +1010,10 @@ void partition_init(struct partition *partition, &partition->grid[0]); /* Now let's check what the user wants as an initial domain. */ - const char part_type = parser_get_opt_param_char( - params, "DomainDecomposition:initial_type", default_part); - - switch (part_type) { + char part_type[20]; + parser_get_opt_param_string(params, "DomainDecomposition:initial_type", + part_type, default_part); + switch (part_type[0]) { case 'g': partition->type = INITPART_GRID; break; @@ -1018,24 +1021,28 @@ void partition_init(struct partition *partition, partition->type = INITPART_VECTORIZE; break; #ifdef HAVE_METIS - case 'm': + case 's': partition->type = INITPART_METIS_NOWEIGHT; break; case 'w': partition->type = INITPART_METIS_WEIGHT; break; default: - message("Invalid choice of initial partition type '%c'.", part_type); - error("Permitted values are: 'g','m','v' or 'w'."); + message("Invalid choice of initial partition type '%s'.", part_type); + error( + "Permitted values are: 'grid', 'simple_metis', 'weighted_metis'" + " or 'vectorized'"); #else default: - message("Invalid choice of initial partition type '%c'.", part_type); - error("Permitted values are: 'g' or 'v' when compiled without metis."); + message("Invalid choice of initial partition type '%s'.", part_type); + error( + "Permitted values are: 'grid' or 'vectorized' when compiled " + "without METIS."); #endif } /* In case of grid, read more parameters */ - if (part_type == 'g') { + if (part_type[0] == 'g') { partition->grid[0] = parser_get_opt_param_int( params, "DomainDecomposition:initial_grid_x", partition->grid[0]); partition->grid[1] = parser_get_opt_param_int( @@ -1045,36 +1052,54 @@ void partition_init(struct partition *partition, } /* Now let's check what the user wants as a repartition strategy */ - const char repart_type = parser_get_opt_param_char( - params, "DomainDecomposition:repartition_type", default_repart); + parser_get_opt_param_string(params, "DomainDecomposition:repartition_type", + part_type, default_repart); - switch (repart_type) { + switch (part_type[0]) { case 'n': - *reparttype = REPART_NONE; + repartition->type = REPART_NONE; break; #ifdef HAVE_METIS - case 'b': - *reparttype = REPART_METIS_BOTH; + case 't': + repartition->type = REPART_METIS_BOTH; break; case 'e': - *reparttype = REPART_METIS_EDGE; + repartition->type = REPART_METIS_EDGE; break; - case 'v': - *reparttype = REPART_METIS_VERTEX; + case 'p': + repartition->type = REPART_METIS_VERTEX; break; - case 'x': - *reparttype = REPART_METIS_VERTEX_EDGE; + case 'h': + repartition->type = REPART_METIS_VERTEX_EDGE; break; default: - message("Invalid choice of re-partition type '%c'.", repart_type); - error("Permitted values are: 'b','e','n', 'v' or 'x'."); + message("Invalid choice of re-partition type '%s'.", part_type); + error( + "Permitted values are: 'none', 'task_weights', 'particle_weights'," + "'edge_task_weights' or 'hybrid_weights'"); #else default: - message("Invalid choice of re-partition type '%c'.", repart_type); - error("Permitted values are: 'n' when compiled without metis."); + message("Invalid choice of re-partition type '%s'.", part_type); + error("Permitted values are: 'none' when compiled without METIS."); #endif } + /* Get the fraction CPU time difference between nodes (<1) or the number + * of steps between repartitions (>1). */ + repartition->trigger = + parser_get_opt_param_float(params, "DomainDecomposition:trigger", 0.05f); + if (repartition->trigger <= 0) + error("Invalid DomainDecomposition:trigger, must be greater than zero"); + + /* Fraction of particles that should be updated before a repartition + * based on CPU time is considered. */ + repartition->minfrac = + parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.9f); + if (repartition->minfrac <= 0 || repartition->minfrac > 1) + error( + "Invalid DomainDecomposition:minfrac, must be greater than 0 and less " + "than equal to 1"); + #else error("SWIFT was not compiled with MPI support"); #endif diff --git a/src/partition.h b/src/partition.h index b2a132ed48e48573949d16291f72218990589158..03523b165a930b085224e458ac0dd8c8232a578d 100644 --- a/src/partition.h +++ b/src/partition.h @@ -39,6 +39,7 @@ struct partition { enum partition_type type; int grid[3]; }; + /* Repartition type to use. */ enum repartition_type { REPART_NONE = 0, @@ -48,10 +49,17 @@ enum repartition_type { REPART_METIS_VERTEX_EDGE }; +/* Repartition preferences. */ +struct repartition { + enum repartition_type type; + float trigger; + float minfrac; +}; + /* Simple descriptions of types for reports. */ extern const char *repartition_name[]; -void partition_repartition(enum repartition_type reparttype, int nodeID, +void partition_repartition(struct repartition *reparttype, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks); void partition_initial_partition(struct partition *initial_partition, @@ -60,7 +68,7 @@ void partition_initial_partition(struct partition *initial_partition, int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeID, struct space *s); void partition_init(struct partition *partition, - enum repartition_type *reparttypestruct, + struct repartition *repartition, const struct swift_params *params, int nr_nodes); #endif /* SWIFT_PARTITION_H */ diff --git a/src/serial_io.c b/src/serial_io.c index 52c52ff24c186a04da3e3945aea6684abcd95476..a7e342f0a90fcf4c57f334526ff91b1923de4585 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -493,10 +493,17 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, N_total[ptype] = (numParticles[ptype]) + (numParticles_highWord[ptype] << 32); + /* Get the box size if not cubic */ dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; + /* Change box size in the 1D and 2D case */ + if (hydro_dimension == 2) + dim[2] = min(dim[0], dim[1]); + else if (hydro_dimension == 1) + dim[2] = dim[1] = dim[0]; + /* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */ /* N_total, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */ diff --git a/src/single_io.c b/src/single_io.c index 85c1286f7f0d3769c30a79af411b03d1523aa292..0b091a5997504e5f5a4cc3b8af7ca06c994e993c 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -400,10 +400,17 @@ void read_ic_single(char* fileName, const struct unit_system* internal_units, N[ptype] = ((long long)numParticles[ptype]) + ((long long)numParticles_highWord[ptype] << 32); + /* Get the box size if not cubic */ dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; + /* Change box size in the 1D and 2D case */ + if (hydro_dimension == 2) + dim[2] = min(dim[0], dim[1]); + else if (hydro_dimension == 1) + dim[2] = dim[1] = dim[0]; + /* message("Found %d particles in a %speriodic box of size [%f %f %f].", */ /* *N, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */