Commit f595959b authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Move the repartitionning decision to engine_step()

parent ea549fe0
......@@ -334,9 +334,9 @@ int main(int argc, char *argv[]) {
#endif
/* Prepare the domain decomposition scheme */
enum repartition_type reparttype = REPART_NONE;
#ifdef WITH_MPI
struct partition initial_partition;
enum repartition_type reparttype;
partition_init(&initial_partition, &reparttype, params, nr_nodes);
/* Let's report what we did */
......@@ -507,7 +507,7 @@ int main(int argc, char *argv[]) {
if (myrank == 0) clocks_gettime(&tic);
struct engine e;
engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, with_aff,
engine_policies, talking, &us, &prog_const, &hydro_properties,
engine_policies, talking, reparttype, &us, &prog_const, &hydro_properties,
&potential, &cooling_func, &sourceterms);
if (myrank == 0) {
clocks_gettime(&toc);
......@@ -570,11 +570,6 @@ int main(int argc, char *argv[]) {
/* Main simulation loop */
for (int j = 0; !engine_is_done(&e) && e.step != nsteps; j++) {
/* Repartition the space amongst the nodes? */
#ifdef WITH_MPI
if (j % 100 == 2) e.forcerepart = reparttype;
#endif
/* Reset timers */
timers_reset(timers_mask_all);
......
......@@ -936,18 +936,36 @@ void cell_clean_links(struct cell *c, void *data) {
/**
* @brief Checks that a cell is at the current point in time
*
* Calls error() if the cell is not at the current time.
* Calls error() if the cell is not at the current time.
*
* @param c Cell to act upon
* @param data The current time on the integer time-line
*/
void cell_check_drift_point(struct cell *c, void *data) {
#ifdef SWIFT_DEBUG_CHECKS
const integertime_t ti_drift = *(integertime_t *)data;
if (c->ti_old != ti_drift && c->nodeID == engine_rank)
/* Only check local cells */
if(c->nodeID != engine_rank) return;
if (c->ti_old != ti_drift)
error("Cell in an incorrect time-zone! c->ti_old=%lld ti_drift=%lld",
c->ti_old, ti_drift);
for(int i=0; i<c->count; ++i)
if(c->parts[i].ti_drift != ti_drift)
error("part in an incorrect time-zone! p->ti_drift=%lld ti_drift=%lld",
c->parts[i].ti_drift, ti_drift);
for(int i=0; i<c->gcount; ++i)
if(c->gparts[i].ti_drift != ti_drift)
error("g-part in an incorrect time-zone! gp->ti_drift=%lld ti_drift=%lld",
c->gparts[i].ti_drift, ti_drift);
#else
error("Calling debugging code without debugging flag activated.");
#endif
}
/**
......@@ -1366,5 +1384,7 @@ void cell_check_timesteps(struct cell *c) {
if (c->parts[i].time_bin == 0)
error("Particle without assigned time-bin");
}
#else
error("Calling debugging code without debugging flag activated.");
#endif
}
......@@ -822,19 +822,18 @@ void engine_repartition(struct engine *e) {
fflush(stdout);
/* Check that all cells have been drifted to the current time */
space_check_drift_point(e->s, e->ti_current);
space_check_drift_point(e->s, e->ti_old);
#endif
/* Clear the repartition flag. */
enum repartition_type reparttype = e->forcerepart;
e->forcerepart = REPART_NONE;
e->forcerepart = 0;
/* Nothing to do if only using a single node. Also avoids METIS
* bug that doesn't handle this case well. */
if (e->nr_nodes == 1) return;
/* Do the repartitioning. */
partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s,
partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
e->sched.tasks, e->sched.nr_tasks);
/* Now comes the tricky part: Exchange particles between all nodes.
......@@ -2576,11 +2575,10 @@ void engine_rebuild(struct engine *e) {
/* Clear the forcerebuild flag, whatever it was. */
e->forcerebuild = 0;
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
* previously been active on this rank. */
space_check_drift_point(e->s, e->ti_old);
message("rebuild"); fflush(stdout);
#ifdef WITH_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
/* Re-build the space. */
......@@ -2604,6 +2602,13 @@ void engine_rebuild(struct engine *e) {
/* Print the status of the system */
// if (e->verbose) engine_print_task_counts(e);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
* previously been active on this rank. */
space_check_drift_point(e->s, e->ti_old);
#endif
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
......@@ -2618,6 +2623,21 @@ void engine_prepare(struct engine *e) {
TIMER_TIC;
#ifdef SWIFT_DEBUG_CHECKS
if(e->forcerepart || e->forcerebuild) {
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
* previously been active on this rank. */
space_check_drift_point(e->s, e->ti_old);
#endif
}
/* Do we need repartitioning ? */
if (e->forcerepart) engine_repartition(e);
/* Do we need rebuilding ? */
if (e->forcerebuild) engine_rebuild(e);
/* Unskip active tasks and check for rebuild */
engine_unskip(e);
......@@ -3010,7 +3030,7 @@ void engine_step(struct engine *e) {
e->tic_step = getticks();
message("snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild,
message("START snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild,
e->forcerepart);
/* Move forward in time */
......@@ -3024,7 +3044,7 @@ void engine_step(struct engine *e) {
if (e->nodeID == 0) {
/* Print some information to the screen */
printf(" %6d %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->time,
printf(" %6d %lld %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->ti_current, e->time,
e->timeStep, e->updates, e->g_updates, e->s_updates,
e->wallclock_time);
fflush(stdout);
......@@ -3035,15 +3055,14 @@ void engine_step(struct engine *e) {
fflush(e->file_timesteps);
}
/* Do we need repartitioning ? */
if (e->forcerepart != REPART_NONE) engine_repartition(e);
/* Do we need rebuilding ? */
if (e->forcerebuild) engine_rebuild(e);
/* Prepare the tasks to be launched. */
/* Prepare the tasks to be launched, rebuild or repartition if needed. */
engine_prepare(e);
/* Repartition the space amongst the nodes? */
#ifdef WITH_MPI
if (e->step % 100 == 2) e->forcerepart = 1;
#endif
/* Print the number of active tasks ? */
if (e->verbose) engine_print_task_counts(e);
......@@ -3067,16 +3086,19 @@ void engine_step(struct engine *e) {
e->forcerebuild = buff;
#endif
message("MIDDLE snap=%d, rebuild=%d repart=%d ti_current=%lld", e->dump_snapshot, e->forcerebuild,
e->forcerepart, e->ti_current);
/* Do we want a snapshot? */
if (e->ti_end_min >= e->ti_nextSnapshot && e->ti_nextSnapshot > 0)
e->dump_snapshot = 1;
/* Drift everybody (i.e. what has not yet been drifted) */
/* to the current time */
if (e->dump_snapshot || e->forcerebuild || e->forcerepart != REPART_NONE)
if (e->dump_snapshot || e->forcerebuild || e->forcerepart)
engine_drift_all(e);
message("snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild,
message("END snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild,
e->forcerepart);
/* Write a snapshot ? */
......@@ -3465,6 +3487,7 @@ void engine_unpin() {
void engine_init(struct engine *e, struct space *s,
const struct swift_params *params, int nr_nodes, int nodeID,
int nr_threads, int with_aff, int policy, int verbose,
enum repartition_type reparttype,
const struct UnitSystem *internal_units,
const struct phys_const *physical_constants,
const struct hydro_props *hydro,
......@@ -3485,7 +3508,8 @@ void engine_init(struct engine *e, struct space *s,
e->proxy_ind = NULL;
e->nr_proxies = 0;
e->forcerebuild = 1;
e->forcerepart = REPART_NONE;
e->forcerepart = 0;
e->reparttype = reparttype;
e->dump_snapshot = 0;
e->links = NULL;
e->nr_links = 0;
......
......@@ -185,7 +185,10 @@ struct engine {
/* Force the engine to rebuild? */
int forcerebuild;
enum repartition_type forcerepart;
/* Force the engine to repartition ? */
int forcerepart;
enum repartition_type reparttype;
/* Need to dump a snapshot ? */
int dump_snapshot;
......@@ -228,6 +231,7 @@ void engine_dump_snapshot(struct engine *e);
void engine_init(struct engine *e, struct space *s,
const struct swift_params *params, int nr_nodes, int nodeID,
int nr_threads, int with_aff, int policy, int verbose,
enum repartition_type reparttype,
const struct UnitSystem *internal_units,
const struct phys_const *physical_constants,
const struct hydro_props *hydro,
......
......@@ -251,7 +251,7 @@ void space_regrid(struct space *s, int verbose) {
const size_t nr_parts = s->nr_parts;
const ticks tic = getticks();
// const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
//const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
const integertime_t ti_old = (s->e != NULL) ? s->e->ti_old : 0;
/* Run through the cells and get the current h_max. */
......@@ -488,7 +488,7 @@ void space_rebuild(struct space *s, int verbose) {
size_t nr_gparts = s->nr_gparts;
size_t nr_sparts = s->nr_sparts;
struct cell *restrict cells_top = s->cells_top;
// const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
//const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
const integertime_t ti_old = (s->e != NULL) ? s->e->ti_old : 0;
/* Run through the particles and get their cell index. Allocates
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment