diff --git a/configure.ac b/configure.ac index 6bbb800db73385cdbd6030e5ea38a548677318dc..795c9870356e1f24d7676760864f67ebaf7b6b74 100644 --- a/configure.ac +++ b/configure.ac @@ -380,6 +380,7 @@ fi # Check whether we have any of the ARM v8.1 tick timers AX_ASM_ARM_PMCCNTR AX_ASM_ARM_CNTVCT + # See if we want memuse reporting. AC_ARG_ENABLE([memuse-reports], [AS_HELP_STRING([--enable-memuse-reports], @@ -392,6 +393,18 @@ if test "$enable_memuse_reports" = "yes"; then AC_DEFINE([SWIFT_MEMUSE_REPORTS],1,[Enable memory usage reports]) fi +# Check if we want to make the dumper thread active. +AC_ARG_ENABLE([dumper], + [AS_HELP_STRING([--enable-dumper], + [Dump active tasks and memory use (if configured)@<:@yes/no@:>@] + )], + [enable_dumper="$enableval"], + [enable_dumper="no"] +) +if test "$enable_dumper" = "yes"; then + AC_DEFINE([SWIFT_DUMPER_THREAD],1,[Enable dumper thread]) +fi + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN diff --git a/src/engine.c b/src/engine.c index 5e2ed48cf550659c145ec340e0a2b40ae1bbf7e8..b8406314513f7bed8b9dac8c0a2647d683f8e73e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -33,8 +33,11 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/types.h> +#include <sys/stat.h> #include <unistd.h> + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> @@ -4869,6 +4872,67 @@ void engine_unpin(void) { #endif } +#ifdef SWIFT_DUMPER_THREAD +/** + * @brief dumper thread action, checks got the existence of the .dump file + * every 5 seconds and does the dump if found. + * + * @param p the #engine + */ +static void *engine_dumper_poll(void *p) { + struct engine *e = (struct engine *)p; + while (1) { + if (access(".dump", F_OK) == 0) { + + /* OK, do our work. */ + message("Dumping engine tasks in step: %d", e->step); + task_dump_active(e); + +#ifdef SWIFT_MEMUSE_REPORTS + /* Dump the currently logged memory. */ + message("Dumping memory use report"); + memuse_log_dump_error(e->nodeID); +#endif + + /* Add more interesting diagnostics. */ + scheduler_dump_queues(e); + + /* Delete the file. */ + unlink(".dump"); + message("Dumping completed"); + fflush(stdout); + } + + /* Take a breath. */ + sleep(5); + } + return NULL; +} +#endif /* SWIFT_DUMPER_THREAD */ + +#ifdef SWIFT_DUMPER_THREAD +/** + * @brief creates the dumper thread. + * + * This watches for the creation of a ".dump" file in the current directory + * and if found dumps the current state of the tasks and memory use (if also + * configured). + * + * @param e the #engine + * + */ +static void engine_dumper_init(struct engine *e) { + pthread_t dumper; + + /* Make sure the .dump file is not present, that is bad when starting up. */ + struct stat buf; + if (stat(".dump", &buf) == 0) unlink(".dump"); + + /* Thread does not exit, so nothing to do but create it. */ + pthread_create(&dumper, NULL, &engine_dumper_poll, e); +} +#endif /* SWIFT_DUMPER_THREAD */ + /** * @brief init an engine struct with the necessary properties for the * simulation. @@ -5759,6 +5823,12 @@ void engine_config(int restart, int fof, struct engine *e, free(buf); #endif +#ifdef SWIFT_DUMPER_THREAD + + /* Start the dumper thread.*/ + engine_dumper_init(e); +#endif + /* Wait for the runner threads to be in place. */ swift_barrier_wait(&e->wait_barrier); } diff --git a/src/queue.c b/src/queue.c index 3a9919163a4fb9d146c055e58859a175858c17eb..95ade9027f5b3a2ba7ae5a75e7f6e5719ab34731 100644 --- a/src/queue.c +++ b/src/queue.c @@ -301,3 +301,34 @@ void queue_clean(struct queue *q) { free(q->tid); free(q->tid_incoming); } + + +/** + * @brief Dump a formatted list of tasks in the queue to the given file stream. + * + * @param nodeID the node id of this rank. + * @param index a number for this queue, added to the output. + * @param file the FILE stream, should opened for write. + * @param q The task #queue. + */ +void queue_dump(int nodeID, int index, FILE *file, struct queue *q) { + + swift_lock_type *qlock = &q->lock; + + /* Grab the queue lock. */ + if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n"); + + /* Fill any tasks from the incoming DEQ. */ + queue_get_incoming(q); + + /* Loop over the queue entries. */ + for (int k = 0; k < q->count; k++) { + struct task *t = &q->tasks[q->tid[k]]; + + fprintf(file, "%d %d %d %s %s %.2f\n", nodeID, index, k, + taskID_names[t->type], subtaskID_names[t->subtype], t->weight); + } + + /* Release the task lock. */ + if (lock_unlock(qlock) != 0) error("Unlocking the qlock failed.\n"); +} diff --git a/src/queue.h b/src/queue.h index c85cf0cabe30a03d163e2564fdc216c19495761a..e3c7b6f7b2059df14c14813b34946df2a1935c74 100644 --- a/src/queue.h +++ b/src/queue.h @@ -67,4 +67,6 @@ void queue_init(struct queue *q, struct task *tasks); void queue_insert(struct queue *q, struct task *t); void queue_clean(struct queue *q); +void queue_dump(int nodeID, int index, FILE *file, struct queue *q); + #endif /* SWIFT_QUEUE_H */ diff --git a/src/scheduler.c b/src/scheduler.c index 4f4557c711660d9168ec8ce180eba210a94a79eb..fe9fb9a208ac6a1a0d816c9c42d921f697d0b345 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -2303,3 +2303,62 @@ void scheduler_write_task_level(const struct scheduler *s) { fclose(f); free(count); } +/** + * @brief dump all the active queues of all the known schedulers into a file. + * + * @param e the #scheduler + */ +void scheduler_dump_queues(struct engine *e) { + + struct scheduler *s = &e->sched; + + /* Open the file and write the header. */ + char dumpfile[35]; +#ifdef WITH_MPI + snprintf(dumpfile, sizeof(dumpfile), "queue_dump_MPI-step%d.dat", e->step); +#else + snprintf(dumpfile, sizeof(dumpfile), "queue_dump-step%d.dat", e->step); +#endif + FILE *file_thread = NULL; + if (engine_rank == 0) { + file_thread = fopen(dumpfile, "w"); + fprintf(file_thread, "# rank queue index type subtype weight\n"); + } + +#ifdef WITH_MPI + + /* Make sure output file is closed and empty, then we reopen on each rank. */ + if (engine_rank == 0) fclose(file_thread); + MPI_Barrier(MPI_COMM_WORLD); + + for (int i = 0; i < e->nr_nodes; i++) { + + /* Rank 0 decides the index of the writing node, this happens + * one-by-one. */ + int kk = i; + MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if (i == engine_rank) { + + /* Open file and position at end. */ + file_thread = fopen(dumpfile, "a"); + + for (int l = 0; l < s->nr_queues; l++) { + queue_dump(engine_rank, l, file_thread, &s->queues[l]); + } + fclose(file_thread); + } + + /* And we wait for all to synchronize. */ + MPI_Barrier(MPI_COMM_WORLD); + } + +#else + + /* Non-MPI, so just a single schedulers worth of queues to dump. */ + for (int l = 0; l < s->nr_queues; l++) { + queue_dump(engine_rank, l, file_thread, &s->queues[l]); + } + fclose(file_thread); +#endif // WITH_MPI +} diff --git a/src/scheduler.h b/src/scheduler.h index ac9bc754db1ea03f91c0ce522dc325c18d25ec09..c12d2e98061b1d68a671d68ade9b9d879dfab651 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -201,5 +201,6 @@ void scheduler_clean(struct scheduler *s); void scheduler_free_tasks(struct scheduler *s); void scheduler_write_dependencies(struct scheduler *s, int verbose); void scheduler_write_task_level(const struct scheduler *s); +void scheduler_dump_queues(struct engine *e); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/task.c b/src/task.c index e4bc627def367ccf4984b08c7565f6bd466a9206..4a10e37ef432ec7be997e064b92d66ec5add3bd7 100644 --- a/src/task.c +++ b/src/task.c @@ -880,7 +880,7 @@ void task_create_mpi_comms(void) { * * Dumps the information to a file "thread_info-stepn.dat" where n is the * given step value, or "thread_info_MPI-stepn.dat", if we are running - * under MPI. Note if running under MPIU all the ranks are dumped into this + * under MPI. Note if running under MPI all the ranks are dumped into this * one file, which has an additional field to identify the rank. * * @param e the #engine @@ -1148,3 +1148,116 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header, } #endif } + +/** + * @brief dump all the active tasks of all the known engines into a file. + * + * Dumps the information to a file "task_dump-stepn.dat" where n is the given + * step value, or "task_dump_MPI-stepn.dat", if we are running under MPI. Note + * if running under MPI all the ranks are dumped into this one file, which has + * an additional field to identify the rank. Very similar to task_dump_all() + * except for the additional fields used in task debugging and we record tasks + * that have not ran (i.e !skip, but toc == 0) and how many waits are still + * active. + * + * @param e the #engine + */ +void task_dump_active(struct engine *e) { + + /* Need this to convert ticks to seconds. */ + unsigned long long cpufreq = clocks_get_cpufreq(); + +#ifdef WITH_MPI + /* Make sure output file is empty, only on one rank. */ + char dumpfile[35]; + snprintf(dumpfile, sizeof(dumpfile), "task_dump_MPI-step%d.dat", e->step); + FILE *file_thread; + if (engine_rank == 0) { + file_thread = fopen(dumpfile, "w"); + fprintf(file_thread, + "# rank otherrank type subtype waits pair tic toc" + " ci.hydro.count cj.hydro.count ci.grav.count cj.grav.count" + " flags\n"); + fclose(file_thread); + } + MPI_Barrier(MPI_COMM_WORLD); + + for (int i = 0; i < e->nr_nodes; i++) { + + /* Rank 0 decides the index of the writing node, this happens + * one-by-one. */ + int kk = i; + MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if (i == engine_rank) { + + /* Open file and position at end. */ + file_thread = fopen(dumpfile, "a"); + + /* Add some information to help with the plots and conversion of ticks to + * seconds. */ + fprintf( + file_thread, "%i 0 none none -1 0 %lld %lld %lld %lld %lld 0 %lld\n", + engine_rank, (long long int)e->tic_step, (long long int)e->toc_step, + e->updates, e->g_updates, e->s_updates, cpufreq); + int count = 0; + for (int l = 0; l < e->sched.nr_tasks; l++) { + struct task *t = &e->sched.tasks[l]; + + /* Not implicit and not skipped. */ + if (!t->implicit && !t->skip) { + + /* Get destination rank of MPI requests. */ + int paired = (t->cj != NULL); + int otherrank = t->ci->nodeID; + if (paired) otherrank = t->cj->nodeID; + + fprintf(file_thread, "%i %i %s %s %i %i %lli %lli %i %i %i %i %lli\n", + engine_rank, otherrank, taskID_names[t->type], + subtaskID_names[t->subtype], t->wait, paired, + (long long int)t->tic, (long long int)t->toc, + (t->ci != NULL) ? t->ci->hydro.count : 0, + (t->cj != NULL) ? t->cj->hydro.count : 0, + (t->ci != NULL) ? t->ci->grav.count : 0, + (t->cj != NULL) ? t->cj->grav.count : 0, t->flags); + } + count++; + } + fclose(file_thread); + } + + /* And we wait for all to synchronize. */ + MPI_Barrier(MPI_COMM_WORLD); + } + +#else + /* Non-MPI, so just a single engine's worth of tasks to dump. */ + char dumpfile[32]; + snprintf(dumpfile, sizeof(dumpfile), "task_dump-step%d.dat", e->step); + FILE *file_thread; + file_thread = fopen(dumpfile, "w"); + fprintf(file_thread, + "#type subtype waits pair tic toc ci.hydro.count cj.hydro.count " + "ci.grav.count cj.grav.count\n"); + + /* Add some information to help with the plots and conversion of ticks to + * seconds. */ + fprintf(file_thread, "none none -1 0, %lld %lld %lld %lld %lld %lld\n", + (unsigned long long)e->tic_step, (unsigned long long)e->toc_step, + e->updates, e->g_updates, e->s_updates, cpufreq); + for (int l = 0; l < e->sched.nr_tasks; l++) { + struct task *t = &e->sched.tasks[l]; + if (!t->implicit && !t->skip) { + fprintf(file_thread, "%s %s %i %i %lli %lli %i %i %i %i\n", + taskID_names[t->type], subtaskID_names[t->subtype], t->wait, + (t->cj == NULL), (unsigned long long)t->tic, + (unsigned long long)t->toc, + (t->ci == NULL) ? 0 : t->ci->hydro.count, + (t->cj == NULL) ? 0 : t->cj->hydro.count, + (t->ci == NULL) ? 0 : t->ci->grav.count, + (t->cj == NULL) ? 0 : t->cj->grav.count); + } + } + fclose(file_thread); +#endif // WITH_MPI +} diff --git a/src/task.h b/src/task.h index 5dd4f9f994cd05838cb157a285e8d42616912deb..9a188e578d2e0d03c9d37af7ab54994abee2f4ac 100644 --- a/src/task.h +++ b/src/task.h @@ -242,6 +242,7 @@ void task_print(const struct task *t); void task_dump_all(struct engine *e, int step); void task_dump_stats(const char *dumpfile, struct engine *e, int header, int allranks); +void task_dump_active(struct engine *e); void task_get_full_name(int type, int subtype, char *name); void task_get_group_name(int type, int subtype, char *cluster);