diff --git a/examples/main.c b/examples/main.c index 00afea75da422e750c715e35172459ef9eda7242..09311efb4c5cb9acc4a64648499863ca3ad70a34 100644 --- a/examples/main.c +++ b/examples/main.c @@ -334,9 +334,9 @@ int main(int argc, char *argv[]) { #endif /* Prepare the domain decomposition scheme */ + enum repartition_type reparttype = REPART_NONE; #ifdef WITH_MPI struct partition initial_partition; - enum repartition_type reparttype; partition_init(&initial_partition, &reparttype, params, nr_nodes); /* Let's report what we did */ @@ -507,7 +507,7 @@ int main(int argc, char *argv[]) { if (myrank == 0) clocks_gettime(&tic); struct engine e; engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, with_aff, - engine_policies, talking, &us, &prog_const, &hydro_properties, + engine_policies, talking, reparttype, &us, &prog_const, &hydro_properties, &potential, &cooling_func, &sourceterms); if (myrank == 0) { clocks_gettime(&toc); @@ -570,11 +570,6 @@ int main(int argc, char *argv[]) { /* Main simulation loop */ for (int j = 0; !engine_is_done(&e) && e.step != nsteps; j++) { -/* Repartition the space amongst the nodes? */ -#ifdef WITH_MPI - if (j % 100 == 2) e.forcerepart = reparttype; -#endif - /* Reset timers */ timers_reset(timers_mask_all); diff --git a/src/cell.c b/src/cell.c index b0c60a9302a1191bad93c35df7cdd5cf74b2cef8..2289fe043749d97d1eb06b6999fad839640b495c 100644 --- a/src/cell.c +++ b/src/cell.c @@ -936,18 +936,36 @@ void cell_clean_links(struct cell *c, void *data) { /** * @brief Checks that a cell is at the current point in time * - * Calls error() if the cell is not at the current time. + * Calls error() if the cell is not at the current time. * * @param c Cell to act upon * @param data The current time on the integer time-line */ void cell_check_drift_point(struct cell *c, void *data) { +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_drift = *(integertime_t *)data; - if (c->ti_old != ti_drift && c->nodeID == engine_rank) + /* Only check local cells */ + if(c->nodeID != engine_rank) return; + + if (c->ti_old != ti_drift) error("Cell in an incorrect time-zone! c->ti_old=%lld ti_drift=%lld", c->ti_old, ti_drift); + + for(int i=0; i<c->count; ++i) + if(c->parts[i].ti_drift != ti_drift) + error("part in an incorrect time-zone! p->ti_drift=%lld ti_drift=%lld", + c->parts[i].ti_drift, ti_drift); + + for(int i=0; i<c->gcount; ++i) + if(c->gparts[i].ti_drift != ti_drift) + error("g-part in an incorrect time-zone! gp->ti_drift=%lld ti_drift=%lld", + c->gparts[i].ti_drift, ti_drift); +#else + error("Calling debugging code without debugging flag activated."); +#endif } /** @@ -1366,5 +1384,7 @@ void cell_check_timesteps(struct cell *c) { if (c->parts[i].time_bin == 0) error("Particle without assigned time-bin"); } +#else + error("Calling debugging code without debugging flag activated."); #endif } diff --git a/src/engine.c b/src/engine.c index 17730acc80d38b6a7a9f646ebe4fe8853c35c5fc..5336279ae01e076743398f85c16aff9e94b0f53e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -822,19 +822,18 @@ void engine_repartition(struct engine *e) { fflush(stdout); /* Check that all cells have been drifted to the current time */ - space_check_drift_point(e->s, e->ti_current); + space_check_drift_point(e->s, e->ti_old); #endif /* Clear the repartition flag. */ - enum repartition_type reparttype = e->forcerepart; - e->forcerepart = REPART_NONE; + e->forcerepart = 0; /* Nothing to do if only using a single node. Also avoids METIS * bug that doesn't handle this case well. */ if (e->nr_nodes == 1) return; /* Do the repartitioning. */ - partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s, + partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s, e->sched.tasks, e->sched.nr_tasks); /* Now comes the tricky part: Exchange particles between all nodes. @@ -2576,11 +2575,10 @@ void engine_rebuild(struct engine *e) { /* Clear the forcerebuild flag, whatever it was. */ e->forcerebuild = 0; -#ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time. - * That can include cells that have not - * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_old); + message("rebuild"); fflush(stdout); + +#ifdef WITH_MPI + MPI_Barrier(MPI_COMM_WORLD); #endif /* Re-build the space. */ @@ -2604,6 +2602,13 @@ void engine_rebuild(struct engine *e) { /* Print the status of the system */ // if (e->verbose) engine_print_task_counts(e); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. + * That can include cells that have not + * previously been active on this rank. */ + space_check_drift_point(e->s, e->ti_old); +#endif + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -2618,6 +2623,21 @@ void engine_prepare(struct engine *e) { TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if(e->forcerepart || e->forcerebuild) { + /* Check that all cells have been drifted to the current time. + * That can include cells that have not + * previously been active on this rank. */ + space_check_drift_point(e->s, e->ti_old); +#endif + } + + /* Do we need repartitioning ? */ + if (e->forcerepart) engine_repartition(e); + + /* Do we need rebuilding ? */ + if (e->forcerebuild) engine_rebuild(e); + /* Unskip active tasks and check for rebuild */ engine_unskip(e); @@ -3010,7 +3030,7 @@ void engine_step(struct engine *e) { e->tic_step = getticks(); - message("snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild, + message("START snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild, e->forcerepart); /* Move forward in time */ @@ -3024,7 +3044,7 @@ void engine_step(struct engine *e) { if (e->nodeID == 0) { /* Print some information to the screen */ - printf(" %6d %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->time, + printf(" %6d %lld %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->ti_current, e->time, e->timeStep, e->updates, e->g_updates, e->s_updates, e->wallclock_time); fflush(stdout); @@ -3035,15 +3055,14 @@ void engine_step(struct engine *e) { fflush(e->file_timesteps); } - /* Do we need repartitioning ? */ - if (e->forcerepart != REPART_NONE) engine_repartition(e); - - /* Do we need rebuilding ? */ - if (e->forcerebuild) engine_rebuild(e); - - /* Prepare the tasks to be launched. */ + /* Prepare the tasks to be launched, rebuild or repartition if needed. */ engine_prepare(e); + /* Repartition the space amongst the nodes? */ +#ifdef WITH_MPI + if (e->step % 100 == 2) e->forcerepart = 1; +#endif + /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -3067,16 +3086,19 @@ void engine_step(struct engine *e) { e->forcerebuild = buff; #endif + message("MIDDLE snap=%d, rebuild=%d repart=%d ti_current=%lld", e->dump_snapshot, e->forcerebuild, + e->forcerepart, e->ti_current); + /* Do we want a snapshot? */ if (e->ti_end_min >= e->ti_nextSnapshot && e->ti_nextSnapshot > 0) e->dump_snapshot = 1; /* Drift everybody (i.e. what has not yet been drifted) */ /* to the current time */ - if (e->dump_snapshot || e->forcerebuild || e->forcerepart != REPART_NONE) + if (e->dump_snapshot || e->forcerebuild || e->forcerepart) engine_drift_all(e); - message("snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild, + message("END snap=%d, rebuild=%d repart=%d", e->dump_snapshot, e->forcerebuild, e->forcerepart); /* Write a snapshot ? */ @@ -3465,6 +3487,7 @@ void engine_unpin() { void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, int nr_threads, int with_aff, int policy, int verbose, + enum repartition_type reparttype, const struct UnitSystem *internal_units, const struct phys_const *physical_constants, const struct hydro_props *hydro, @@ -3485,7 +3508,8 @@ void engine_init(struct engine *e, struct space *s, e->proxy_ind = NULL; e->nr_proxies = 0; e->forcerebuild = 1; - e->forcerepart = REPART_NONE; + e->forcerepart = 0; + e->reparttype = reparttype; e->dump_snapshot = 0; e->links = NULL; e->nr_links = 0; diff --git a/src/engine.h b/src/engine.h index 63d6fc0b993fdd303c97c70a1e2e07dada65db7a..9a9e4bf3ef6da01228757782efa4153fd796d892 100644 --- a/src/engine.h +++ b/src/engine.h @@ -185,7 +185,10 @@ struct engine { /* Force the engine to rebuild? */ int forcerebuild; - enum repartition_type forcerepart; + + /* Force the engine to repartition ? */ + int forcerepart; + enum repartition_type reparttype; /* Need to dump a snapshot ? */ int dump_snapshot; @@ -228,6 +231,7 @@ void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, int nr_threads, int with_aff, int policy, int verbose, + enum repartition_type reparttype, const struct UnitSystem *internal_units, const struct phys_const *physical_constants, const struct hydro_props *hydro, diff --git a/src/space.c b/src/space.c index 7cb3ade35ddd5f078304b556304d8a8c7fb34afe..fa3771d8433927a6c1887d8ea07f9664b489f42e 100644 --- a/src/space.c +++ b/src/space.c @@ -251,7 +251,7 @@ void space_regrid(struct space *s, int verbose) { const size_t nr_parts = s->nr_parts; const ticks tic = getticks(); - // const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; + //const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; const integertime_t ti_old = (s->e != NULL) ? s->e->ti_old : 0; /* Run through the cells and get the current h_max. */ @@ -488,7 +488,7 @@ void space_rebuild(struct space *s, int verbose) { size_t nr_gparts = s->nr_gparts; size_t nr_sparts = s->nr_sparts; struct cell *restrict cells_top = s->cells_top; - // const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; + //const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; const integertime_t ti_old = (s->e != NULL) ? s->e->ti_old : 0; /* Run through the particles and get their cell index. Allocates