diff --git a/examples/main.c b/examples/main.c index 5ae01a9280ac84c11513137cd5b26c9e8966f7b9..334fdd6a9dfed783f452cea3e32f924306a016c2 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1088,96 +1088,9 @@ int main(int argc, char *argv[]) { #ifdef SWIFT_DEBUG_TASKS /* Dump the task data using the given frequency. */ if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { -#ifdef WITH_MPI - - /* Make sure output file is empty, only on one rank. */ - char dumpfile[35]; - snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", j + 1); - FILE *file_thread; - if (myrank == 0) { - file_thread = fopen(dumpfile, "w"); - fclose(file_thread); - } - MPI_Barrier(MPI_COMM_WORLD); - - for (int i = 0; i < nr_nodes; i++) { - - /* Rank 0 decides the index of writing node, this happens one-by-one. */ - int kk = i; - MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); - - if (i == myrank) { - - /* Open file and position at end. */ - file_thread = fopen(dumpfile, "a"); - - fprintf(file_thread, - " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n", myrank, - e.tic_step, e.toc_step, e.updates, e.g_updates, e.s_updates, - cpufreq); - int count = 0; - for (int l = 0; l < e.sched.nr_tasks; l++) { - if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { - fprintf(file_thread, - " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n", - myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type, - e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), - e.sched.tasks[l].tic, e.sched.tasks[l].toc, - (e.sched.tasks[l].ci != NULL) - ? e.sched.tasks[l].ci->hydro.count - : 0, - (e.sched.tasks[l].cj != NULL) - ? e.sched.tasks[l].cj->hydro.count - : 0, - (e.sched.tasks[l].ci != NULL) - ? e.sched.tasks[l].ci->grav.count - : 0, - (e.sched.tasks[l].cj != NULL) - ? e.sched.tasks[l].cj->grav.count - : 0, - e.sched.tasks[l].flags, e.sched.tasks[l].sid); - } - fflush(stdout); - count++; - } - fclose(file_thread); - } - - /* And we wait for all to synchronize. */ - MPI_Barrier(MPI_COMM_WORLD); - } - -#else - char dumpfile[32]; - snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", j + 1); - FILE *file_thread; - file_thread = fopen(dumpfile, "w"); - /* Add some information to help with the plots */ - fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", - -2, -1, -1, 1, e.tic_step, e.toc_step, e.updates, e.g_updates, - e.s_updates, 0, cpufreq); - for (int l = 0; l < e.sched.nr_tasks; l++) { - if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { - fprintf( - file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n", - e.sched.tasks[l].rid, e.sched.tasks[l].type, - e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), - e.sched.tasks[l].tic, e.sched.tasks[l].toc, - (e.sched.tasks[l].ci == NULL) ? 0 - : e.sched.tasks[l].ci->hydro.count, - (e.sched.tasks[l].cj == NULL) ? 0 - : e.sched.tasks[l].cj->hydro.count, - (e.sched.tasks[l].ci == NULL) ? 0 - : e.sched.tasks[l].ci->grav.count, - (e.sched.tasks[l].cj == NULL) ? 0 - : e.sched.tasks[l].cj->grav.count, - e.sched.tasks[l].sid); - } - } - fclose(file_thread); -#endif // WITH_MPI + task_dump_all(&e, j + 1); } -#endif // SWIFT_DEBUG_TASKS +#endif #ifdef SWIFT_DEBUG_THREADPOOL /* Dump the task data using the given frequency. */ diff --git a/src/task.c b/src/task.c index 9d4be3aaa5aaa074b19f7a22bddab99629ef9085..a3afc7861e4e723c84d5fac8d8bf8ae21f51ac8a 100644 --- a/src/task.c +++ b/src/task.c @@ -42,6 +42,7 @@ /* Local headers. */ #include "atomic.h" +#include "engine.h" #include "error.h" #include "inline.h" #include "lock.h" @@ -549,3 +550,109 @@ void task_create_mpi_comms(void) { } } #endif + +/** + * @brief dump all the tasks of all the known engines into a file for postprocessing. + * + * Dumps the information to a file "thread_info-stepn.dat" where n is the + * given step value, or "thread_info_MPI-stepn.dat", if we are running + * under MPI. Note if running under MPIU all the ranks are dumped into this + * one file, which has an additional field to identify the rank. + * + * @param e the #engine + * @param step the current step. + */ +void task_dump_all(struct engine *e, int step) { + +#ifdef SWIFT_DEBUG_TASKS + + /* Need this to convert ticks to seconds. */ + unsigned long long cpufreq = clocks_get_cpufreq(); + +#ifdef WITH_MPI + /* Make sure output file is empty, only on one rank. */ + char dumpfile[35]; + snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step); + FILE *file_thread; + if (engine_rank == 0) { + file_thread = fopen(dumpfile, "w"); + fclose(file_thread); + } + MPI_Barrier(MPI_COMM_WORLD); + + for (int i = 0; i < e->nr_nodes; i++) { + + /* Rank 0 decides the index of the writing node, this happens + * one-by-one. */ + int kk = i; + MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if (i == engine_rank) { + + /* Open file and position at end. */ + file_thread = fopen(dumpfile, "a"); + + /* Add some information to help with the plots and conversion of ticks to + * seconds. */ + fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n", + engine_rank, e->tic_step, e->toc_step, e->updates, e->g_updates, + e->s_updates, cpufreq); + int count = 0; + for (int l = 0; l < e->sched.nr_tasks; l++) { + if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) { + fprintf( + file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n", + engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type, + e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL), + e->sched.tasks[l].tic, e->sched.tasks[l].toc, + (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count + : 0, + (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count + : 0, + (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count + : 0, + (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count + : 0, + e->sched.tasks[l].flags, e->sched.tasks[l].sid); + } + count++; + } + fclose(file_thread); + } + + /* And we wait for all to synchronize. */ + MPI_Barrier(MPI_COMM_WORLD); + } + +#else + /* Non-MPI, so just a single engine's worth of tasks to dump. */ + char dumpfile[32]; + snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step); + FILE *file_thread; + file_thread = fopen(dumpfile, "w"); + + /* Add some information to help with the plots and conversion of ticks to + * seconds. */ + fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2, + -1, -1, 1, e->tic_step, e->toc_step, e->updates, e->g_updates, + e->s_updates, 0, cpufreq); + for (int l = 0; l < e->sched.nr_tasks; l++) { + if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) { + fprintf( + file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n", + e->sched.tasks[l].rid, e->sched.tasks[l].type, + e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL), + e->sched.tasks[l].tic, e->sched.tasks[l].toc, + (e->sched.tasks[l].ci == NULL) ? 0 + : e->sched.tasks[l].ci->hydro.count, + (e->sched.tasks[l].cj == NULL) ? 0 + : e->sched.tasks[l].cj->hydro.count, + (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count, + (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count, + e->sched.tasks[l].sid); + } + } + fclose(file_thread); +#endif // WITH_MPI +#endif // SWIFT_DEBUG_TASKS +} diff --git a/src/task.h b/src/task.h index 64b611534ed10e3c34b628e2c733a508e5d42a1c..4ba2635d6f0b199ce0faf23a6f1c823bfc223a6e 100644 --- a/src/task.h +++ b/src/task.h @@ -202,6 +202,7 @@ float task_overlap(const struct task *ta, const struct task *tb); int task_lock(struct task *t); void task_do_rewait(struct task *t); void task_print(const struct task *t); +void task_dump_all(struct engine *e, int step); #ifdef WITH_MPI void task_create_mpi_comms(void); #endif