Commit 67e7b944 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Use a single global value for the current runtime, this should avoid issues...

Use a single global value for the current runtime, this should avoid issues with the exact timing of when the limit is exceeded
parent 9f675abe
......@@ -1296,7 +1296,7 @@ int main(int argc, char *argv[]) {
}
/* Did we exceed the maximal runtime? */
if (clocks_get_hours_since_start() > restart_max_hours_runtime) {
if (e.runtime > restart_max_hours_runtime) {
force_stop = 1;
message("Runtime limit reached, dumping restart files...");
if (resubmit_after_max_hours) resubmit = 1;
......
......@@ -56,6 +56,7 @@ struct mpicollectgroup1 {
long long total_nr_tasks;
float tasks_per_cell_max;
struct star_formation_history sfh;
float runtime;
};
/* Forward declarations. */
......@@ -125,6 +126,8 @@ void collectgroup1_apply(const struct collectgroup1 *grp1, struct engine *e) {
e->tasks_per_cell_max = grp1->tasks_per_cell_max;
star_formation_logger_add_to_accumulator(&e->sfh, &grp1->sfh);
e->runtime = grp1->runtime;
}
/**
......@@ -174,6 +177,7 @@ void collectgroup1_apply(const struct collectgroup1 *grp1, struct engine *e) {
* @param total_nr_tasks total number of tasks on rank.
* @param tasks_per_cell the used number of tasks per cell.
* @param sfh The star formation history logger
* @param runtime The runtime of rank in hours.
*/
void collectgroup1_init(
struct collectgroup1 *grp1, size_t updated, size_t g_updated,
......@@ -186,7 +190,7 @@ void collectgroup1_init(
integertime_t ti_black_holes_end_min, integertime_t ti_black_holes_end_max,
integertime_t ti_black_holes_beg_max, int forcerebuild,
long long total_nr_cells, long long total_nr_tasks, float tasks_per_cell,
const struct star_formation_history sfh) {
const struct star_formation_history sfh, float runtime) {
grp1->updated = updated;
grp1->g_updated = g_updated;
......@@ -213,6 +217,7 @@ void collectgroup1_init(
grp1->total_nr_tasks = total_nr_tasks;
grp1->tasks_per_cell_max = tasks_per_cell;
grp1->sfh = sfh;
grp1->runtime = runtime;
}
/**
......@@ -254,6 +259,7 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) {
mpigrp11.total_nr_tasks = grp1->total_nr_tasks;
mpigrp11.tasks_per_cell_max = grp1->tasks_per_cell_max;
mpigrp11.sfh = grp1->sfh;
mpigrp11.runtime = grp1->runtime;
struct mpicollectgroup1 mpigrp12;
if (MPI_Allreduce(&mpigrp11, &mpigrp12, 1, mpicollectgroup1_type,
......@@ -286,6 +292,7 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) {
grp1->total_nr_tasks = mpigrp12.total_nr_tasks;
grp1->tasks_per_cell_max = mpigrp12.tasks_per_cell_max;
grp1->sfh = mpigrp12.sfh;
grp1->runtime = mpigrp12.runtime;
#endif
}
......@@ -357,6 +364,9 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11,
/* Star formation history */
star_formation_logger_add(&mpigrp11->sfh, &mpigrp12->sfh);
/* Use the maximum runtime as the global runtime. */
mpigrp11->runtime = max(mpigrp11->runtime, mpigrp12->runtime);
}
/**
......
......@@ -60,6 +60,9 @@ struct collectgroup1 {
/* Maximum value of actual tasks per cell across all ranks. */
float tasks_per_cell_max;
/* Global runtime of application in hours. */
float runtime;
};
void collectgroup_init(void);
......@@ -75,7 +78,7 @@ void collectgroup1_init(
integertime_t ti_black_holes_end_min, integertime_t ti_black_holes_end_max,
integertime_t ti_black_holes_beg_max, int forcerebuild,
long long total_nr_cells, long long total_nr_tasks, float tasks_per_cell,
const struct star_formation_history sfh);
const struct star_formation_history sfh, float runtime);
void collectgroup1_reduce(struct collectgroup1 *grp1);
#ifdef WITH_MPI
void mpicollect_free_MPI_type(void);
......
......@@ -476,6 +476,9 @@ struct engine {
/* Maximum number of tasks needed for restarting. */
int restart_max_tasks;
/* The globally agreed runtime, in hours. */
float runtime;
/* Label of the run */
char run_name[PARSER_MAX_LINE_SIZE];
......
......@@ -44,6 +44,7 @@ struct end_of_step_data {
ti_black_holes_beg_max;
struct engine *e;
struct star_formation_history sfh;
float runtime;
};
/**
......@@ -455,6 +456,9 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0;
data.e = e;
/* Need to use a consistent check of the hours since we started. */
data.runtime = clocks_get_hours_since_start();
/* Initialize the total SFH of the simulation to zero */
star_formation_logger_init(&data.sfh);
......@@ -478,9 +482,10 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max,
data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max,
data.ti_stars_beg_max, data.ti_black_holes_end_min,
data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild,
e->s->tot_cells, e->sched.nr_tasks,
(float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh);
data.ti_black_holes_end_max, data.ti_black_holes_beg_max,
e->forcerebuild, e->s->tot_cells, e->sched.nr_tasks,
(float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh,
data.runtime);
/* Aggregate collective data from the different nodes for this step. */
#ifdef WITH_MPI
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment