diff --git a/configure.ac b/configure.ac
index 9f50ba4563cad932314a9f5e7e0bd8cf13b88b7c..e46d7a27d34c19c0d086cb6b33a341ba0fbd4ae2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -407,6 +407,18 @@ if test "$enable_memuse_reports" = "yes"; then
    AC_DEFINE([SWIFT_MEMUSE_REPORTS],1,[Enable memory usage reports])
 fi
 
+# Check if we want to make the dumper thread active.
+AC_ARG_ENABLE([dumper],
+   [AS_HELP_STRING([--enable-dumper],
+     [Dump active tasks and memory use (if configured)@<:@yes/no@:>@]
+   )],
+   [enable_dumper="$enableval"],
+   [enable_dumper="no"]
+)
+if test "$enable_dumper" = "yes"; then
+   AC_DEFINE([SWIFT_DUMPER_THREAD],1,[Enable dumper thread])
+fi
+
 # See if we want mpi reporting.
 AC_ARG_ENABLE([mpiuse-reports],
    [AS_HELP_STRING([--enable-mpiuse-reports],
diff --git a/src/engine.c b/src/engine.c
index c1e294a442b7a78091ba0bf0af6e9f965a57d024..fc83a3f21c1958e5fc78885e856990576be12bf7 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -37,6 +37,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+
 /* MPI headers. */
 #ifdef WITH_MPI
 
@@ -3383,6 +3384,67 @@ void engine_unpin(void) {
 #endif
 }
 
+#ifdef SWIFT_DUMPER_THREAD
+/**
+ * @brief dumper thread action, checks got the existence of the .dump file
+ * every 5 seconds and does the dump if found.
+ *
+ * @param p the #engine
+ */
+static void *engine_dumper_poll(void *p) {
+  struct engine *e = (struct engine *)p;
+  while (1) {
+    if (access(".dump", F_OK) == 0) {
+
+      /* OK, do our work. */
+      message("Dumping engine tasks in step: %d", e->step);
+      task_dump_active(e);
+
+#ifdef SWIFT_MEMUSE_REPORTS
+      /* Dump the currently logged memory. */
+      message("Dumping memory use report");
+      memuse_log_dump_error(e->nodeID);
+#endif
+
+      /* Add more interesting diagnostics. */
+      scheduler_dump_queues(e);
+
+      /* Delete the file. */
+      unlink(".dump");
+      message("Dumping completed");
+      fflush(stdout);
+    }
+
+    /* Take a breath. */
+    sleep(5);
+  }
+  return NULL;
+}
+#endif /* SWIFT_DUMPER_THREAD */
+
+#ifdef SWIFT_DUMPER_THREAD
+/**
+ * @brief creates the dumper thread.
+ *
+ * This watches for the creation of a ".dump" file in the current directory
+ * and if found dumps the current state of the tasks and memory use (if also
+ * configured).
+ *
+ * @param e the #engine
+ *
+ */
+static void engine_dumper_init(struct engine *e) {
+  pthread_t dumper;
+
+  /* Make sure the .dump file is not present, that is bad when starting up. */
+  struct stat buf;
+  if (stat(".dump", &buf) == 0) unlink(".dump");
+
+  /* Thread does not exit, so nothing to do but create it. */
+  pthread_create(&dumper, NULL, &engine_dumper_poll, e);
+}
+#endif /* SWIFT_DUMPER_THREAD */
+
 /**
  * @brief init an engine struct with the necessary properties for the
  *        simulation.
@@ -4283,6 +4345,12 @@ void engine_config(int restart, int fof, struct engine *e,
   }
 #endif
 
+#ifdef SWIFT_DUMPER_THREAD
+
+  /* Start the dumper thread.*/
+  engine_dumper_init(e);
+#endif
+
   /* Wait for the runner threads to be in place. */
   swift_barrier_wait(&e->wait_barrier);
 }
diff --git a/src/queue.c b/src/queue.c
index 3a9919163a4fb9d146c055e58859a175858c17eb..95ade9027f5b3a2ba7ae5a75e7f6e5719ab34731 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -301,3 +301,34 @@ void queue_clean(struct queue *q) {
   free(q->tid);
   free(q->tid_incoming);
 }
+
+
+/**
+ * @brief Dump a formatted list of tasks in the queue to the given file stream.
+ *
+ * @param nodeID the node id of this rank.
+ * @param index a number for this queue, added to the output.
+ * @param file the FILE stream, should opened for write.
+ * @param q The task #queue.
+ */
+void queue_dump(int nodeID, int index, FILE *file, struct queue *q) {
+
+  swift_lock_type *qlock = &q->lock;
+
+  /* Grab the queue lock. */
+  if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n");
+
+  /* Fill any tasks from the incoming DEQ. */
+  queue_get_incoming(q);
+
+  /* Loop over the queue entries. */
+  for (int k = 0; k < q->count; k++) {
+    struct task *t = &q->tasks[q->tid[k]];
+
+    fprintf(file, "%d %d %d %s %s %.2f\n", nodeID, index, k,
+            taskID_names[t->type], subtaskID_names[t->subtype], t->weight);
+  }
+
+  /* Release the task lock. */
+  if (lock_unlock(qlock) != 0) error("Unlocking the qlock failed.\n");
+}
diff --git a/src/queue.h b/src/queue.h
index c85cf0cabe30a03d163e2564fdc216c19495761a..e3c7b6f7b2059df14c14813b34946df2a1935c74 100644
--- a/src/queue.h
+++ b/src/queue.h
@@ -67,4 +67,6 @@ void queue_init(struct queue *q, struct task *tasks);
 void queue_insert(struct queue *q, struct task *t);
 void queue_clean(struct queue *q);
 
+void queue_dump(int nodeID, int index, FILE *file, struct queue *q);
+
 #endif /* SWIFT_QUEUE_H */
diff --git a/src/scheduler.c b/src/scheduler.c
index 28482ae281cfd0ca4c6d1da1b5134ab77441d466..6d076d11461bc25f4d7c3ae90b66ec7df0085e7f 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -2274,3 +2274,62 @@ void scheduler_write_task_level(const struct scheduler *s) {
   fclose(f);
   free(count);
 }
+/**
+ * @brief dump all the active queues of all the known schedulers into a file.
+ *
+ * @param e the #scheduler
+ */
+void scheduler_dump_queues(struct engine *e) {
+
+  struct scheduler *s = &e->sched;
+
+  /* Open the file and write the header. */
+  char dumpfile[35];
+#ifdef WITH_MPI
+  snprintf(dumpfile, sizeof(dumpfile), "queue_dump_MPI-step%d.dat", e->step);
+#else
+  snprintf(dumpfile, sizeof(dumpfile), "queue_dump-step%d.dat", e->step);
+#endif
+  FILE *file_thread = NULL;
+  if (engine_rank == 0) {
+    file_thread = fopen(dumpfile, "w");
+    fprintf(file_thread, "# rank queue index type subtype weight\n");
+  }
+
+#ifdef WITH_MPI
+
+  /* Make sure output file is closed and empty, then we reopen on each rank. */
+  if (engine_rank == 0) fclose(file_thread);
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  for (int i = 0; i < e->nr_nodes; i++) {
+
+    /* Rank 0 decides the index of the writing node, this happens
+     * one-by-one. */
+    int kk = i;
+    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    if (i == engine_rank) {
+
+      /* Open file and position at end. */
+      file_thread = fopen(dumpfile, "a");
+
+      for (int l = 0; l < s->nr_queues; l++) {
+        queue_dump(engine_rank, l, file_thread, &s->queues[l]);
+      }
+      fclose(file_thread);
+    }
+
+    /* And we wait for all to synchronize. */
+    MPI_Barrier(MPI_COMM_WORLD);
+  }
+
+#else
+
+  /* Non-MPI, so just a single schedulers worth of queues to dump. */
+  for (int l = 0; l < s->nr_queues; l++) {
+    queue_dump(engine_rank, l, file_thread, &s->queues[l]);
+  }
+  fclose(file_thread);
+#endif  // WITH_MPI
+}
diff --git a/src/scheduler.h b/src/scheduler.h
index 694a88017b863d29ad9d15a4ae7d5acac5cf3223..23cb39afbd7ac890d2d3c7ecacef3bffe01b0128 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -202,5 +202,6 @@ void scheduler_clean(struct scheduler *s);
 void scheduler_free_tasks(struct scheduler *s);
 void scheduler_write_dependencies(struct scheduler *s, int verbose);
 void scheduler_write_task_level(const struct scheduler *s);
+void scheduler_dump_queues(struct engine *e);
 
 #endif /* SWIFT_SCHEDULER_H */
diff --git a/src/task.c b/src/task.c
index 1ba295588769c70ad48a4f713a8ed96c3defbe01..1f91ce2fb926892edf9b8e3ff9cf457c6d6c03a8 100644
--- a/src/task.c
+++ b/src/task.c
@@ -905,7 +905,7 @@ void task_free_mpi_comms(void) {
  *
  * Dumps the information to a file "thread_info-stepn.dat" where n is the
  * given step value, or "thread_info_MPI-stepn.dat", if we are running
- * under MPI. Note if running under MPIU all the ranks are dumped into this
+ * under MPI. Note if running under MPI all the ranks are dumped into this
  * one file, which has an additional field to identify the rank.
  *
  * @param e the #engine
@@ -1172,3 +1172,116 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header,
   }
 #endif
 }
+
+/**
+ * @brief dump all the active tasks of all the known engines into a file.
+ *
+ * Dumps the information to a file "task_dump-stepn.dat" where n is the given
+ * step value, or "task_dump_MPI-stepn.dat", if we are running under MPI. Note
+ * if running under MPI all the ranks are dumped into this one file, which has
+ * an additional field to identify the rank. Very similar to task_dump_all()
+ * except for the additional fields used in task debugging and we record tasks
+ * that have not ran (i.e !skip, but toc == 0) and how many waits are still
+ * active.
+ *
+ * @param e the #engine
+ */
+void task_dump_active(struct engine *e) {
+
+  /* Need this to convert ticks to seconds. */
+  unsigned long long cpufreq = clocks_get_cpufreq();
+
+#ifdef WITH_MPI
+  /* Make sure output file is empty, only on one rank. */
+  char dumpfile[35];
+  snprintf(dumpfile, sizeof(dumpfile), "task_dump_MPI-step%d.dat", e->step);
+  FILE *file_thread;
+  if (engine_rank == 0) {
+    file_thread = fopen(dumpfile, "w");
+    fprintf(file_thread,
+            "# rank otherrank type subtype waits pair tic toc"
+            " ci.hydro.count cj.hydro.count ci.grav.count cj.grav.count"
+            " flags\n");
+    fclose(file_thread);
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  for (int i = 0; i < e->nr_nodes; i++) {
+
+    /* Rank 0 decides the index of the writing node, this happens
+     * one-by-one. */
+    int kk = i;
+    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    if (i == engine_rank) {
+
+      /* Open file and position at end. */
+      file_thread = fopen(dumpfile, "a");
+
+      /* Add some information to help with the plots and conversion of ticks to
+       * seconds. */
+      fprintf(
+          file_thread, "%i 0 none none -1 0 %lld %lld %lld %lld %lld 0 %lld\n",
+          engine_rank, (long long int)e->tic_step, (long long int)e->toc_step,
+          e->updates, e->g_updates, e->s_updates, cpufreq);
+      int count = 0;
+      for (int l = 0; l < e->sched.nr_tasks; l++) {
+        struct task *t = &e->sched.tasks[l];
+
+        /* Not implicit and not skipped. */
+        if (!t->implicit && !t->skip) {
+
+          /* Get destination rank of MPI requests. */
+          int paired = (t->cj != NULL);
+          int otherrank = t->ci->nodeID;
+          if (paired) otherrank = t->cj->nodeID;
+
+          fprintf(file_thread, "%i %i %s %s %i %i %lli %lli %i %i %i %i %lli\n",
+                  engine_rank, otherrank, taskID_names[t->type],
+                  subtaskID_names[t->subtype], t->wait, paired,
+                  (long long int)t->tic, (long long int)t->toc,
+                  (t->ci != NULL) ? t->ci->hydro.count : 0,
+                  (t->cj != NULL) ? t->cj->hydro.count : 0,
+                  (t->ci != NULL) ? t->ci->grav.count : 0,
+                  (t->cj != NULL) ? t->cj->grav.count : 0, t->flags);
+        }
+        count++;
+      }
+      fclose(file_thread);
+    }
+
+    /* And we wait for all to synchronize. */
+    MPI_Barrier(MPI_COMM_WORLD);
+  }
+
+#else
+  /* Non-MPI, so just a single engine's worth of tasks to dump. */
+  char dumpfile[32];
+  snprintf(dumpfile, sizeof(dumpfile), "task_dump-step%d.dat", e->step);
+  FILE *file_thread;
+  file_thread = fopen(dumpfile, "w");
+  fprintf(file_thread,
+          "#type subtype waits pair tic toc ci.hydro.count cj.hydro.count "
+          "ci.grav.count cj.grav.count\n");
+
+  /* Add some information to help with the plots and conversion of ticks to
+   * seconds. */
+  fprintf(file_thread, "none none -1 0, %lld %lld %lld %lld %lld %lld\n",
+          (unsigned long long)e->tic_step, (unsigned long long)e->toc_step,
+          e->updates, e->g_updates, e->s_updates, cpufreq);
+  for (int l = 0; l < e->sched.nr_tasks; l++) {
+    struct task *t = &e->sched.tasks[l];
+    if (!t->implicit && !t->skip) {
+      fprintf(file_thread, "%s %s %i %i %lli %lli %i %i %i %i\n",
+              taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
+              (t->cj == NULL), (unsigned long long)t->tic,
+              (unsigned long long)t->toc,
+              (t->ci == NULL) ? 0 : t->ci->hydro.count,
+              (t->cj == NULL) ? 0 : t->cj->hydro.count,
+              (t->ci == NULL) ? 0 : t->ci->grav.count,
+              (t->cj == NULL) ? 0 : t->cj->grav.count);
+    }
+  }
+  fclose(file_thread);
+#endif  // WITH_MPI
+}
diff --git a/src/task.h b/src/task.h
index d3024d4d5dfc93e9dcdee2346127265b102ea9c7..02e1c5ca385749c3dff347b44d8e1f0e984fadd0 100644
--- a/src/task.h
+++ b/src/task.h
@@ -239,6 +239,7 @@ void task_print(const struct task *t);
 void task_dump_all(struct engine *e, int step);
 void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                      int allranks);
+void task_dump_active(struct engine *e);
 void task_get_full_name(int type, int subtype, char *name);
 void task_get_group_name(int type, int subtype, char *cluster);