diff --git a/src/scheduler.c b/src/scheduler.c index 6d076d11461bc25f4d7c3ae90b66ec7df0085e7f..a4bc916757d4c1a34af3f034597b48bd00a28fb5 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -2275,61 +2275,28 @@ void scheduler_write_task_level(const struct scheduler *s) { free(count); } /** - * @brief dump all the active queues of all the known schedulers into a file. + * @brief dump all the active queues of all the known schedulers into files. * * @param e the #scheduler */ void scheduler_dump_queues(struct engine *e) { struct scheduler *s = &e->sched; - - /* Open the file and write the header. */ char dumpfile[35]; + #ifdef WITH_MPI - snprintf(dumpfile, sizeof(dumpfile), "queue_dump_MPI-step%d.dat", e->step); + /* Open a file per rank and write the header. Use per rank to avoid MPI + * calls that can interact with other blocking ones. */ + snprintf(dumpfile, sizeof(dumpfile), "queue_dump_MPI-step%d.dat_%d", e->step, + e->nodeID); #else snprintf(dumpfile, sizeof(dumpfile), "queue_dump-step%d.dat", e->step); #endif - FILE *file_thread = NULL; - if (engine_rank == 0) { - file_thread = fopen(dumpfile, "w"); - fprintf(file_thread, "# rank queue index type subtype weight\n"); - } - -#ifdef WITH_MPI - - /* Make sure output file is closed and empty, then we reopen on each rank. */ - if (engine_rank == 0) fclose(file_thread); - MPI_Barrier(MPI_COMM_WORLD); - - for (int i = 0; i < e->nr_nodes; i++) { - - /* Rank 0 decides the index of the writing node, this happens - * one-by-one. */ - int kk = i; - MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); - - if (i == engine_rank) { - /* Open file and position at end. */ - file_thread = fopen(dumpfile, "a"); - - for (int l = 0; l < s->nr_queues; l++) { - queue_dump(engine_rank, l, file_thread, &s->queues[l]); - } - fclose(file_thread); - } - - /* And we wait for all to synchronize. */ - MPI_Barrier(MPI_COMM_WORLD); - } - -#else - - /* Non-MPI, so just a single schedulers worth of queues to dump. */ + FILE *file_thread = fopen(dumpfile, "w"); + fprintf(file_thread, "# rank queue index type subtype weight\n"); for (int l = 0; l < s->nr_queues; l++) { queue_dump(engine_rank, l, file_thread, &s->queues[l]); } fclose(file_thread); -#endif // WITH_MPI } diff --git a/src/task.c b/src/task.c index 1f91ce2fb926892edf9b8e3ff9cf457c6d6c03a8..30f6a9621916615d9aa6e9e315e30d222acd8f84 100644 --- a/src/task.c +++ b/src/task.c @@ -1174,15 +1174,15 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header, } /** - * @brief dump all the active tasks of all the known engines into a file. + * @brief dump all the active tasks of all the known engines into files. * - * Dumps the information to a file "task_dump-stepn.dat" where n is the given - * step value, or "task_dump_MPI-stepn.dat", if we are running under MPI. Note - * if running under MPI all the ranks are dumped into this one file, which has - * an additional field to identify the rank. Very similar to task_dump_all() - * except for the additional fields used in task debugging and we record tasks - * that have not ran (i.e !skip, but toc == 0) and how many waits are still - * active. + * Dumps the information into file "task_dump-stepn.dat" where n is the given + * step value, or files "task_dump_MPI-stepn.dat_rank", if we are running + * under MPI. Note if running under MPI all the ranks are dumped into separate + * files to avoid interaction with other MPI calls that may be blocking at the + * time. Very similar to task_dump_all() except for the additional fields used + * in task debugging and we record tasks that have not ran (i.e !skip, but toc + * == 0) and how many waits are still active. * * @param e the #engine */ @@ -1190,98 +1190,48 @@ void task_dump_active(struct engine *e) { /* Need this to convert ticks to seconds. */ unsigned long long cpufreq = clocks_get_cpufreq(); - -#ifdef WITH_MPI - /* Make sure output file is empty, only on one rank. */ char dumpfile[35]; - snprintf(dumpfile, sizeof(dumpfile), "task_dump_MPI-step%d.dat", e->step); - FILE *file_thread; - if (engine_rank == 0) { - file_thread = fopen(dumpfile, "w"); - fprintf(file_thread, - "# rank otherrank type subtype waits pair tic toc" - " ci.hydro.count cj.hydro.count ci.grav.count cj.grav.count" - " flags\n"); - fclose(file_thread); - } - MPI_Barrier(MPI_COMM_WORLD); - - for (int i = 0; i < e->nr_nodes; i++) { - - /* Rank 0 decides the index of the writing node, this happens - * one-by-one. */ - int kk = i; - MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); - - if (i == engine_rank) { - - /* Open file and position at end. */ - file_thread = fopen(dumpfile, "a"); - - /* Add some information to help with the plots and conversion of ticks to - * seconds. */ - fprintf( - file_thread, "%i 0 none none -1 0 %lld %lld %lld %lld %lld 0 %lld\n", - engine_rank, (long long int)e->tic_step, (long long int)e->toc_step, - e->updates, e->g_updates, e->s_updates, cpufreq); - int count = 0; - for (int l = 0; l < e->sched.nr_tasks; l++) { - struct task *t = &e->sched.tasks[l]; - - /* Not implicit and not skipped. */ - if (!t->implicit && !t->skip) { - - /* Get destination rank of MPI requests. */ - int paired = (t->cj != NULL); - int otherrank = t->ci->nodeID; - if (paired) otherrank = t->cj->nodeID; - - fprintf(file_thread, "%i %i %s %s %i %i %lli %lli %i %i %i %i %lli\n", - engine_rank, otherrank, taskID_names[t->type], - subtaskID_names[t->subtype], t->wait, paired, - (long long int)t->tic, (long long int)t->toc, - (t->ci != NULL) ? t->ci->hydro.count : 0, - (t->cj != NULL) ? t->cj->hydro.count : 0, - (t->ci != NULL) ? t->ci->grav.count : 0, - (t->cj != NULL) ? t->cj->grav.count : 0, t->flags); - } - count++; - } - fclose(file_thread); - } - - /* And we wait for all to synchronize. */ - MPI_Barrier(MPI_COMM_WORLD); - } +#ifdef WITH_MPI + snprintf(dumpfile, sizeof(dumpfile), "task_dump_MPI-step%d.dat_%d", e->step, + e->nodeID); #else - /* Non-MPI, so just a single engine's worth of tasks to dump. */ - char dumpfile[32]; snprintf(dumpfile, sizeof(dumpfile), "task_dump-step%d.dat", e->step); - FILE *file_thread; - file_thread = fopen(dumpfile, "w"); +#endif + + FILE *file_thread = fopen(dumpfile, "w"); fprintf(file_thread, - "#type subtype waits pair tic toc ci.hydro.count cj.hydro.count " - "ci.grav.count cj.grav.count\n"); + "# rank otherrank type subtype waits pair tic toc" + " ci.hydro.count cj.hydro.count ci.grav.count cj.grav.count" + " flags\n"); /* Add some information to help with the plots and conversion of ticks to * seconds. */ - fprintf(file_thread, "none none -1 0, %lld %lld %lld %lld %lld %lld\n", - (unsigned long long)e->tic_step, (unsigned long long)e->toc_step, + fprintf(file_thread, "%i 0 none none -1 0 %lld %lld %lld %lld %lld 0 %lld\n", + engine_rank, (long long int)e->tic_step, (long long int)e->toc_step, e->updates, e->g_updates, e->s_updates, cpufreq); + int count = 0; for (int l = 0; l < e->sched.nr_tasks; l++) { struct task *t = &e->sched.tasks[l]; + + /* Not implicit and not skipped. */ if (!t->implicit && !t->skip) { - fprintf(file_thread, "%s %s %i %i %lli %lli %i %i %i %i\n", - taskID_names[t->type], subtaskID_names[t->subtype], t->wait, - (t->cj == NULL), (unsigned long long)t->tic, - (unsigned long long)t->toc, - (t->ci == NULL) ? 0 : t->ci->hydro.count, - (t->cj == NULL) ? 0 : t->cj->hydro.count, - (t->ci == NULL) ? 0 : t->ci->grav.count, - (t->cj == NULL) ? 0 : t->cj->grav.count); + + /* Get destination rank of MPI requests. */ + int paired = (t->cj != NULL); + int otherrank = t->ci->nodeID; + if (paired) otherrank = t->cj->nodeID; + + fprintf(file_thread, "%i %i %s %s %i %i %lli %lli %i %i %i %i %lli\n", + engine_rank, otherrank, taskID_names[t->type], + subtaskID_names[t->subtype], t->wait, paired, + (long long int)t->tic, (long long int)t->toc, + (t->ci != NULL) ? t->ci->hydro.count : 0, + (t->cj != NULL) ? t->cj->hydro.count : 0, + (t->ci != NULL) ? t->ci->grav.count : 0, + (t->cj != NULL) ? t->cj->grav.count : 0, t->flags); } + count++; } fclose(file_thread); -#endif // WITH_MPI }