Commit cb0c811f authored by James Willis's avatar James Willis
Browse files

Merge branch 'master' into dopair-vectorisation-merge

parents 1b482dcc 4500d31f
......@@ -29,9 +29,9 @@ examples/*/*.txt
examples/*/used_parameters.yml
examples/*/*/*.xmf
examples/*/*/*.hdf5
examples/*/*/*.png
examples/*/*/*.txt
examples/*/*/used_parameters.yml
examples/*/*.png
tests/testPair
tests/brute_force_standard.dat
......
......@@ -105,8 +105,9 @@ if test "$enable_mpi" = "yes"; then
AC_PATH_PROG([MPIRUN],[mpirun],[notfound])
fi
if test "$MPIRUN" = "notfound"; then
# This may not be fatal (some systems do not allow mpirun on
# development nodes)., so push on.
AC_MSG_WARN([Cannot find mpirun command on PATH, thread support may not be correct])
enable_mpi="no"
else
# Special options we know about.
# Intel: -mt_mpi
......
......@@ -11,7 +11,7 @@ TimeIntegration:
time_begin: 0. # The starting time of the simulation (in internal units).
time_end: 5e-2 # The end time of the simulation (in internal units).
dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units).
dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units).
dt_max: 1e-5 # The maximal time-step size of the simulation (in internal units).
# Parameters governing the snapshots
Snapshots:
......
......@@ -88,7 +88,7 @@ file = h5py.File(fileName, 'w')
# Header
grp = file.create_group("/Header")
grp.attrs["BoxSize"] = [boxSize, 0.5, 0.1]
grp.attrs["BoxSize"] = [boxSize, 0.5, 1.0]
grp.attrs["NumPart_Total"] = [numPart, 0, 0, 0, 0, 0]
grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0]
......
......@@ -348,7 +348,7 @@ int main(int argc, char *argv[]) {
#endif
/* Prepare the domain decomposition scheme */
enum repartition_type reparttype = REPART_NONE;
struct repartition reparttype;
#ifdef WITH_MPI
struct partition initial_partition;
partition_init(&initial_partition, &reparttype, params, nr_nodes);
......@@ -360,7 +360,7 @@ int main(int argc, char *argv[]) {
if (initial_partition.type == INITPART_GRID)
message("grid set to [ %i %i %i ].", initial_partition.grid[0],
initial_partition.grid[1], initial_partition.grid[2]);
message("Using %s repartitioning", repartition_name[reparttype]);
message("Using %s repartitioning", repartition_name[reparttype.type]);
}
#endif
......@@ -444,8 +444,8 @@ int main(int argc, char *argv[]) {
long long N_total[3] = {0, 0, 0};
#if defined(WITH_MPI)
long long N_long[3] = {Ngas, Ngpart, Nspart};
MPI_Reduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, 0,
MPI_COMM_WORLD);
MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM,
MPI_COMM_WORLD);
#else
N_total[0] = Ngas;
N_total[1] = Ngpart;
......@@ -527,10 +527,10 @@ int main(int argc, char *argv[]) {
/* Initialize the engine with the space and policies. */
if (myrank == 0) clocks_gettime(&tic);
struct engine e;
engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, with_aff,
engine_policies, talking, reparttype, &us, &prog_const,
&hydro_properties, &gravity_properties, &potential, &cooling_func,
&sourceterms);
engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, N_total[0],
N_total[1], with_aff, engine_policies, talking, &reparttype, &us,
&prog_const, &hydro_properties, &gravity_properties, &potential,
&cooling_func, &sourceterms);
if (myrank == 0) {
clocks_gettime(&toc);
message("engine_init took %.3f %s.", clocks_diff(&tic, &toc),
......
# Define the system of units to use internally.
# Define the system of units to use internally.
InternalUnitSystem:
UnitMass_in_cgs: 1 # Grams
UnitLength_in_cgs: 1 # Centimeters
......@@ -38,7 +38,7 @@ Snapshots:
Statistics:
delta_time: 1e-2 # Time between statistics output
energy_file_name: energy # (Optional) File name for energy output
timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name
timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name
# Parameters for the hydrodynamics scheme
SPH:
......@@ -51,11 +51,11 @@ SPH:
# Parameters for the self-gravity scheme
Gravity:
eta: 0.025 # Constant dimensionless multiplier for time integration.
eta: 0.025 # Constant dimensionless multiplier for time integration.
epsilon: 0.1 # Softening length (in internal units).
a_smooth: 1.25 # (Optional) Smoothing scale in top-level cell sizes to smooth the long-range forces over (this is the default value).
r_cut: 4.5 # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value).
# Parameters related to the initial conditions
InitialConditions:
file_name: SedovBlast/sedov.hdf5 # The file to read
......@@ -67,14 +67,20 @@ InitialConditions:
# Parameters governing domain decomposition
DomainDecomposition:
initial_type: m # (Optional) The initial strategy ("g", "m", "w", or "v").
initial_grid_x: 10 # (Optional) Grid size if the "g" strategy is chosen.
initial_grid_y: 10
initial_grid_z: 10
repartition_type: b # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x").
initial_type: simple_metis # (Optional) The initial decomposition strategy: "grid",
# "simple_metis", "weighted_metis", or "vectorized".
initial_grid_x: 10 # (Optional) Grid size if the "grid" strategy is chosen.
initial_grid_y: 10 # ""
initial_grid_z: 10 # ""
repartition_type: task_weights # (Optional) The re-decomposition strategy: "none", "task_weights", "particle_weights",
# "edge_task_weights" or "hybrid_weights".
trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a
# new decomposition, or number of steps (>1) between decompositions
minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when
# using CPU time trigger
# Parameters related to external potentials --------------------------------------------
# Point mass external potentials
PointMassPotential:
position_x: 50. # location of external point mass (internal units)
......@@ -91,7 +97,7 @@ IsothermalPotential:
vrot: 200. # Rotation speed of isothermal potential (internal units)
timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition
epsilon: 0.1 # Softening size (internal units)
# Disk-patch potential parameters
DiscPatchPotential:
surface_density: 10. # Surface density of the disc (internal units)
......
......@@ -244,3 +244,22 @@ const char *clocks_get_timesincestart() {
return buffer;
}
/**
* @brief return the cpu time used.
*
* Uses the times(2) function to access the user cpu times and returns the sum
* of these for the process tree, i.e. current process plus "waited-for"
* children. This may be pthread implementation specific as to what that
* exactly means. Note we do not include the system time as that includes
* spin times and we don't want to give credit for that.
*
* @result cpu time used in sysconf(_SC_CLK_TCK) ticks, usually 100/s not our
* usual ticks.
*/
double clocks_get_cputime_used() {
struct tms tmstic;
times(&tmstic);
return (double)(tmstic.tms_utime + tmstic.tms_cutime);
}
......@@ -19,6 +19,7 @@
#ifndef SWIFT_CLOCKS_H
#define SWIFT_CLOCKS_H
#include <sys/times.h>
#include <time.h>
#include "cycle.h"
......@@ -41,4 +42,6 @@ double clocks_from_ticks(ticks tics);
double clocks_diff_ticks(ticks tic, ticks toc);
const char *clocks_get_timesincestart();
double clocks_get_cputime_used();
#endif /* SWIFT_CLOCKS_H */
......@@ -39,23 +39,26 @@
#define hydro_dimension 3.f
#define hydro_dimension_inv 0.3333333333f
#define hydro_dimention_unit_sphere ((float)(4. * M_PI / 3.))
#define hydro_dimension_unit_sphere ((float)(4. * M_PI / 3.))
#define hydro_dimension_unit_sphere_inv ((float)(3. * M_1_PI / 4.))
#elif defined(HYDRO_DIMENSION_2D)
#define hydro_dimension 2.f
#define hydro_dimension_inv 0.5f
#define hydro_dimention_unit_sphere ((float)M_PI)
#define hydro_dimension_unit_sphere ((float)M_PI)
#define hydro_dimension_unit_sphere_inv ((float)M_1_PI)
#elif defined(HYDRO_DIMENSION_1D)
#define hydro_dimension 1.f
#define hydro_dimension_inv 1.f
#define hydro_dimention_unit_sphere 2.f
#define hydro_dimension_unit_sphere 2.f
#define hydro_dimension_unit_sphere_inv 0.5f
#else
#error "A problem dimensionality must be chosen in const.h !"
#error "A problem dimensionality must be chosen in config.h !"
#endif
......@@ -201,6 +204,35 @@ invert_dimension_by_dimension_matrix(float A[3][3]) {
#endif
}
/**
* @brief Get the radius of a dimension sphere with the given volume
*
* @param volume Volume of the dimension sphere
* @return Radius of the dimension sphere
*/
__attribute__((always_inline)) INLINE static float get_radius_dimension_sphere(
float volume) {
#if defined(HYDRO_DIMENSION_3D)
return cbrtf(volume * hydro_dimension_unit_sphere_inv);
#elif defined(HYDRO_DIMENSION_2D)
return sqrtf(volume * hydro_dimension_unit_sphere_inv);
#elif defined(HYDRO_DIMENSION_1D)
return volume * hydro_dimension_unit_sphere_inv;
#else
error("The dimension is not defined !");
return 0.f;
#endif
}
/* ------------------------------------------------------------------------- */
#ifdef WITH_VECTORIZATION
......
......@@ -882,6 +882,84 @@ void engine_repartition(struct engine *e) {
#endif
}
/**
* @brief Decide whether trigger a repartition the cells amongst the nodes.
*
* @param e The #engine.
*/
void engine_repartition_trigger(struct engine *e) {
#ifdef WITH_MPI
/* Do nothing if there have not been enough steps since the last
* repartition, don't want to repeat this too often or immediately after
* a repartition step. */
if (e->step - e->last_repartition > 2) {
/* Old style if trigger is >1 or this is the second step (want an early
* repartition following the initial repartition). */
if (e->reparttype->trigger > 1 || e->step == 2) {
if (e->reparttype->trigger > 1) {
if (e->step % (int)e->reparttype->trigger == 2) e->forcerepart = 1;
} else {
e->forcerepart = 1;
}
} else {
/* Use cputimes from ranks to estimate the imbalance. */
/* First check if we are going to skip this stage anyway, if so do that
* now. If is only worth checking the CPU loads when we have processed a
* significant number of all particles. */
if ((e->updates > 1 &&
e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
(e->g_updates > 1 &&
e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
/* Get CPU time used since the last call to this function. */
double elapsed_cputime =
clocks_get_cputime_used() - e->cputime_last_step;
/* Gather the elapsed CPU times from all ranks for the last step. */
double elapsed_cputimes[e->nr_nodes];
MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (e->nodeID == 0) {
/* Get the range and mean of cputimes. */
double mintime = elapsed_cputimes[0];
double maxtime = elapsed_cputimes[0];
double sum = elapsed_cputimes[0];
for (int k = 1; k < e->nr_nodes; k++) {
if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
sum += elapsed_cputimes[k];
}
double mean = sum / (double)e->nr_nodes;
/* Are we out of balance? */
if (((maxtime - mintime) / mean) > e->reparttype->trigger) {
if (e->verbose)
message("trigger fraction %.3f exceeds %.3f will repartition",
(maxtime - mintime) / mintime, e->reparttype->trigger);
e->forcerepart = 1;
}
}
/* All nodes do this together. */
MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
}
/* Remember we did this. */
if (e->forcerepart) e->last_repartition = e->step;
}
/* We always reset CPU time for next check. */
e->cputime_last_step = clocks_get_cputime_used();
#endif
}
/**
* @brief Add up/down gravity tasks to a cell hierarchy.
*
......@@ -3041,7 +3119,9 @@ void engine_step(struct engine *e) {
struct clocks_time time1, time2;
clocks_gettime(&time1);
#ifdef SWIFT_DEBUG_TASKS
e->tic_step = getticks();
#endif
if (e->nodeID == 0) {
......@@ -3069,9 +3149,9 @@ void engine_step(struct engine *e) {
/* Prepare the tasks to be launched, rebuild or repartition if needed. */
engine_prepare(e);
/* Repartition the space amongst the nodes? */
#if defined(WITH_MPI) && defined(HAVE_METIS)
if (e->step % 100 == 2) e->forcerepart = 1;
#ifdef WITH_MPI
/* Repartition the space amongst the nodes? */
engine_repartition_trigger(e);
#endif
/* Are we drifting everything (a la Gadget/GIZMO) ? */
......@@ -3144,9 +3224,12 @@ void engine_step(struct engine *e) {
TIMER_TOC2(timer_step);
clocks_gettime(&time2);
e->wallclock_time = (float)clocks_diff(&time1, &time2);
#ifdef SWIFT_DEBUG_TASKS
/* Time in ticks at the end of this step. */
e->toc_step = getticks();
#endif
}
/**
......@@ -3530,6 +3613,8 @@ void engine_unpin() {
* @param nr_nodes The number of MPI ranks.
* @param nodeID The MPI rank of this node.
* @param nr_threads The number of threads per MPI rank.
* @param Ngas total number of gas particles in the simulation.
* @param Ndm total number of gravity particles in the simulation.
* @param with_aff use processor affinity, if supported.
* @param policy The queuing policy to use.
* @param verbose Is this #engine talkative ?
......@@ -3544,8 +3629,8 @@ void engine_unpin() {
*/
void engine_init(struct engine *e, struct space *s,
const struct swift_params *params, int nr_nodes, int nodeID,
int nr_threads, int with_aff, int policy, int verbose,
enum repartition_type reparttype,
int nr_threads, int Ngas, int Ndm, int with_aff, int policy,
int verbose, struct repartition *reparttype,
const struct unit_system *internal_units,
const struct phys_const *physical_constants,
const struct hydro_props *hydro,
......@@ -3564,6 +3649,8 @@ void engine_init(struct engine *e, struct space *s,
e->step = 0;
e->nr_nodes = nr_nodes;
e->nodeID = nodeID;
e->total_nr_parts = Ngas;
e->total_nr_gparts = Ndm;
e->proxy_ind = NULL;
e->nr_proxies = 0;
e->forcerebuild = 1;
......@@ -3611,6 +3698,10 @@ void engine_init(struct engine *e, struct space *s,
e->cooling_func = cooling_func;
e->sourceterms = sourceterms;
e->parameter_file = params;
#ifdef WITH_MPI
e->cputime_last_step = 0;
e->last_repartition = -1;
#endif
engine_rank = nodeID;
/* Make the space link back to the engine. */
......
......@@ -144,6 +144,9 @@ struct engine {
/* Number of particles updated */
size_t updates, g_updates, s_updates;
/* Total numbers of particles in the system. */
size_t total_nr_parts, total_nr_gparts;
/* The internal system of units */
const struct unit_system *internal_units;
......@@ -181,8 +184,18 @@ struct engine {
struct proxy *proxies;
int nr_proxies, *proxy_ind;
#ifdef SWIFT_DEBUG_TASKS
/* Tic/toc at the start/end of a step. */
ticks tic_step, toc_step;
#endif
#ifdef WITH_MPI
/* CPU time of the last step. */
double cputime_last_step;
/* Step of last repartition. */
int last_repartition;
#endif
/* Wallclock time of the last time-step */
float wallclock_time;
......@@ -192,7 +205,7 @@ struct engine {
/* Force the engine to repartition ? */
int forcerepart;
enum repartition_type reparttype;
struct repartition *reparttype;
/* Need to dump some statistics ? */
int save_stats;
......@@ -240,14 +253,14 @@ void engine_drift_all(struct engine *e);
void engine_dump_snapshot(struct engine *e);
void engine_init(struct engine *e, struct space *s,
const struct swift_params *params, int nr_nodes, int nodeID,
int nr_threads, int with_aff, int policy, int verbose,
enum repartition_type reparttype,
int nr_threads, int Ngas, int Ndm, int with_aff, int policy,
int verbose, struct repartition *reparttype,
const struct unit_system *internal_units,
const struct phys_const *physical_constants,
const struct hydro_props *hydro,
const struct gravity_props *gravity,
const struct external_potential *potential,
const struct cooling_function_data *cooling,
const struct cooling_function_data *cooling_func,
struct sourceterms *sourceterms);
void engine_launch(struct engine *e, int nr_runners);
void engine_prepare(struct engine *e);
......@@ -262,6 +275,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
size_t *Nspart);
void engine_rebuild(struct engine *e);
void engine_repartition(struct engine *e);
void engine_repartition_trigger(struct engine *e);
void engine_makeproxies(struct engine *e);
void engine_redistribute(struct engine *e);
void engine_print_policy(struct engine *e);
......
......@@ -69,6 +69,21 @@ __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy(
return entropy * pow_gamma(density);
}
/**
* @brief Returns the entropy given density and pressure.
*
* Computes \f$A = \frac{P}{\rho^\gamma}\f$.
*
* @param density The density \f$\rho\f$.
* @param pressure The pressure \f$P\f$.
* @return The entropy \f$A\f$.
*/
__attribute__((always_inline)) INLINE static float gas_entropy_from_pressure(
float density, float pressure) {
return pressure * pow_minus_gamma(density);
}
/**
* @brief Returns the sound speed given density and entropy
*
......@@ -111,6 +126,20 @@ gas_pressure_from_internal_energy(float density, float u) {
return hydro_gamma_minus_one * u * density;
}
/**
* @brief Returns the internal energy given density and pressure.
*
* Computes \f$u = \frac{1}{\gamma - 1}\frac{P}{\rho}\f$.
*
* @param density The density \f$\rho\f$.
* @param pressure The pressure \f$P\f$.
* @return The internal energy \f$u\f$.
*/
__attribute__((always_inline)) INLINE static float
gas_internal_energy_from_pressure(float density, float pressure) {
return hydro_one_over_gamma_minus_one * pressure / density;
}
/**
* @brief Returns the sound speed given density and internal energy
*
......@@ -172,6 +201,22 @@ __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy(
return hydro_gamma_minus_one * const_isothermal_internal_energy * density;
}
/**
* @brief Returns the entropy given density and pressure.
*
* Computes \f$A = \frac{P}{\rho^\gamma}\f$.
*
* @param density The density \f$\rho\f$.
* @param pressure The pressure \f$P\f$ (ignored).
* @return The entropy \f$A\f$.
*/
__attribute__((always_inline)) INLINE static float gas_entropy_from_pressure(
float density, float pressure) {
return hydro_gamma_minus_one * const_isothermal_internal_energy *
pow_minus_gamma_minus_one(density);
}
/**
* @brief Returns the sound speed given density and entropy
*
......@@ -219,6 +264,20 @@ gas_pressure_from_internal_energy(float density, float u) {
return hydro_gamma_minus_one * const_isothermal_internal_energy * density;
}
/**
* @brief Returns the internal energy given density and pressure.
*
* Just returns the constant internal energy.
*
* @param density The density \f$\rho\f$ (ignored).
* @param pressure The pressure \f$P\f$ (ignored).
* @return The internal energy \f$u\f$ (which is constant).
*/
__attribute__((always_inline)) INLINE static float
gas_internal_energy_from_pressure(float density, float pressure) {
return const_isothermal_energy;
}
/**
* @brief Returns the sound speed given density and internal energy
*
......
......@@ -230,7 +230,7 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
kernel_gamma_inv_dim)
/* Kernel normalisation constant (volume term) */
#define kernel_norm ((float)(hydro_dimention_unit_sphere * kernel_gamma_dim))
#define kernel_norm ((float)(hydro_dimension_unit_sphere * kernel_gamma_dim))
/* ------------------------------------------------------------------------- */
......
......@@ -441,10 +441,17 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units,
N_total[ptype] =
(numParticles[ptype]) + (numParticles_highWord[ptype] << 32);
/* Get the box size if not cubic */
dim[0] = boxSize[0];
dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];
/* Change box size in the 1D and 2D case */
if (hydro_dimension == 2)
dim[2] = min(dim[0], dim[1]);
else if (hydro_dimension == 1)
dim[2] = dim[1] = dim[0];
/* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */
/* N_total[0], (periodic ? "": "non-"), dim[0], */
/* dim[1], dim[2]); */
......
......@@ -780,31 +780,31 @@ static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) {
* Note that at the end of this process all the cells will be re-distributed
* across the nodes, but the particles themselves will not be.
*
* @param reparttype the type of repartition to attempt, see the repart_type
*enum.
* @param reparttype #repartition struct
* @param nodeID our nodeID.
* @param nr_nodes the number of nodes.
* @param s the space of cells holding our local particles.
* @param tasks the completed tasks from the last engine step for our node.
* @param nr_tasks the number of tasks.
*/
void partition_repartition(enum repartition_type reparttype, int nodeID,
void partition_repartition(struct repartition *reparttype, int nodeID,
int nr_nodes, struct space *s, struct task *tasks,
int nr_tasks) {
#if defined(WITH_MPI) && defined(HAVE_METIS)
if (reparttype == REPART_METIS_BOTH || reparttype == REPART_METIS_EDGE ||
reparttype == REPART_METIS_VERTEX_EDGE) {
if (reparttype->type == REPART_METIS_BOTH ||
reparttype->type == REPART_METIS_EDGE ||
reparttype->type == REPART_METIS_VERTEX_EDGE) {
int partweights;
int bothweights;
if (reparttype == REPART_METIS_VERTEX_EDGE)
if (reparttype->type == REPART_METIS_VERTEX_EDGE)
partweights = 1;
else
partweights = 0;
if (reparttype == REPART_METIS_BOTH)