diff --git a/.gitignore b/.gitignore
index 062378b9d28d9be3b1b937f7bed92675172e5275..88e311c84dc51a4e2dd4b987c96aa08204ded9ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,3 @@
 mpistalls
-mpistalls-timed
 *.o
 *~
diff --git a/Makefile b/Makefile
index 0e9fb5aabe26faf197833696d39c503a649721ce..830a22d6d2f71f513a2ab9567f4b0769c73f7c7a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,8 @@
-all: mpistalls mpistalls-timed
+all: mpistalls
 
 mpistalls: mpistalls.c mpiuse.c mpiuse.h atomic.h cycle.h clocks.h clocks.c
 	$(CC) -g -O0 -Wall -o mpistalls mpistalls.c mpiuse.c clocks.c -I/usr/include/mpi -lmpi -lpthread
 
-mpistalls-timed: mpistalls-timed.c mpiuse.c mpiuse.h atomic.h cycle.h clocks.h clocks.c
-	$(CC) -g -O0 -Wall -o mpistalls-timed mpistalls-timed.c mpiuse.c clocks.c -I/usr/include/mpi -lmpi -lpthread
-
 clean:
-	rm mpistalls mpistalls-timed
+	rm mpistalls
 
diff --git a/mpistalls-timed.c b/mpistalls-timed.c
deleted file mode 100644
index 28e31355ea13bca073ceaed4a2ff356546e82814..0000000000000000000000000000000000000000
--- a/mpistalls-timed.c
+++ /dev/null
@@ -1,392 +0,0 @@
-/**
- * Attempt to reproduce the asynchronous stalling that we see for medium
- * busy steps in SWIFT.
- *
- * Timed injection version.
- *
- * So we need to setup a multithreaded MPI program that performs asynchronous
- * exchanges of various data sizes and continuously checks the requests for
- * completion. Also need timers to record the time taken by all this...
- */
-#include <stdio.h>
-#include <mpi.h>
-#include <pthread.h>
-#include <stdlib.h>
-
-#include "atomic.h"
-#include "clocks.h"
-#include "error.h"
-#include "mpiuse.h"
-
-/* Global: Our rank for all to see. */
-int myrank = -1;
-
-/* Are we verbose. */
-static int verbose = 0;
-
-/* Attempt to keep original injection time differences. */
-static int usetics = 1;
-
-/* Integer types of send and recv tasks, must match log. */
-static const int task_type_send = 22;
-static const int task_type_recv = 23;
-
-/* Global communicators for each of the subtypes. */
-static const int task_subtype_count = 30; // Just some upper limit on subtype.
-static MPI_Comm subtypeMPI_comms[30];
-
-/* The local queues. */
-static struct mpiuse_log_entry **volatile reqs_queue;
-static int volatile ind_req = 0;
-static int volatile nr_reqs = 0;
-static int volatile injecting = 1;
-static struct mpiuse_log_entry **volatile recvs_queue;
-static int volatile nr_recvs = 0;
-static int volatile ind_recv = 0;
-static int volatile todo_recv = 0;
-static struct mpiuse_log_entry **volatile sends_queue;
-static int volatile nr_sends = 0;
-static int volatile ind_send = 0;
-static int volatile todo_send = 0;
-
-/* CPU frequency of the machine that created the MPI log. */
-// XXX need to store this in the data file.
-static double log_clocks_cpufreq = 2194844448.0;
-
-/**
- * @brief Injection thread, initiates MPI_Isend and MPI_Irecv requests.
- *
- * The requests are initiated in the time order of the original log and an
- * attempt to start these with the same interval gap is made if usetics is
- * set, otherwise we just do them as quickly as possible.
- */
-static void *inject_thread(void *arg) {
-
-  if (verbose)
-    message("%d: injection thread starts", *((int *)arg));
-  ticks starttics = getticks();
-
-  /* Ticks of our last attempt and ticks the first loop takes (usetics == 1). */
-  ticks basetic = reqs_queue[0]->tic;
-  ticks looptics = 0;
-
-  while (ind_req < nr_reqs) {
-    struct mpiuse_log_entry *log = reqs_queue[ind_req];
-
-    if (usetics) {
-      /* Expect time between this request and the previous one. */
-      ticks dt = log->tic - basetic;
-      basetic = log->tic;
-
-      /* We guess some time below which we should not attempt to wait,
-       * otherwise we'll start to overrun, and just inject the next call if we
-       * are below that (we time the ticks this loop takes without any waiting
-       * and use that). Otherwise we wait a while. Note we need to convert the
-       * ticks of the log file into nanoseconds, that requires the original
-       * CPU frequency. */
-      if (dt > looptics) {
-        struct timespec sleep;
-        sleep.tv_sec = 0;
-
-        /* Remember to be fair and remove the looptics, then convert to
-         * nanoseconds. */
-        double ns = (double)(dt - looptics) / log_clocks_cpufreq * 1.0e9;
-        if (ns < 1.0e9) {
-          sleep.tv_nsec = (long) ns;
-        } else {
-          /* Wait more than one second. Must be an error, but complain and
-           * continue.  */
-          sleep.tv_nsec = (long) 1.0e9;
-          message("wait greater than one second");
-        }
-        nanosleep(&sleep, NULL);
-      }
-    }
-
-    // Differences to SWIFT: MPI_BYTE might overflow, should use MPI_Type(?).
-    int err = 0;
-    if (log->type == task_type_send) {
-      err = MPI_Isend(log->data, log->size, MPI_BYTE, log->otherrank,
-                      log->tag, subtypeMPI_comms[log->subtype], &log->req);
-
-      /* Add a new send request. */
-      int ind = atomic_inc(&nr_sends);
-      sends_queue[ind] = log;
-      atomic_inc(&todo_send);
-
-    } else {
-      err = MPI_Irecv(log->data, log->size, MPI_BYTE, log->otherrank,
-                      log->tag, subtypeMPI_comms[log->subtype], &log->req);
-
-      /* Add a new recv request. */
-      int ind = atomic_inc(&nr_recvs);
-      recvs_queue[ind] = log;
-      atomic_inc(&todo_recv);
-    }
-    if (err != MPI_SUCCESS) error("Failed to activate send or recv");
-
-    ind_req++;
-
-    /* Set looptics on the first pass. Assumes MPI_Isend and MPI_Irecv are
-     * equally timed. */
-    if (looptics == 0 && usetics) {
-      looptics = starttics - getticks();
-      if (verbose)
-        message("injection loop took %.3f %s.", clocks_from_ticks(looptics),
-                clocks_getunit());
-    }
-  }
-
-  /* All done, thread exiting. */
-  if (verbose) {
-    message("%d injections completed, sends = %d, recvs = %d", ind_req,
-            nr_sends, nr_recvs);
-    message("remaining sends = %d, recvs = %d", todo_send, todo_recv);
-  }
-  message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
-          clocks_getunit());
-  atomic_dec(&injecting);
-  return NULL;
-}
-
-/**
- * @brief main loop to run over a queue of MPI requests and test for when they
- * complete. Returns the total amount of time spent in calls to MPI_Test and
- * the number of times it was called.
- *
- * @param logs the list of logs pointing to requests.
- * @param nr_logs pointer to the variable containing the current number of
- *                logs.
- * @param todos pointer to the variable containing the number of requests that
- *              are still active.
- * @param sum the total number of ticks spent in calls to MPI_Test.
- * @param ncalls the total number of calls to MPI_Test.
- * @param mint the minimum ticks an MPI_Test call took.
- * @param maxt the maximum ticks an MPI_Test call took.
- */
-static void queue_runner(struct mpiuse_log_entry **logs, int volatile *nr_logs,
-                         int volatile *todos, double *sum, int *ncalls,
-                         ticks *mint, ticks *maxt ) {
-
-  /* MPI_Test statistics. */
-  int lncalls = 0;
-  double lsum = 0.0;
-  ticks lmint = log_clocks_cpufreq;
-  ticks lmaxt = 0;
-
-  /* We loop while new requests are being injected and we still have requests
-   * to complete. */
-  while (injecting || (!injecting && *todos > 0)) {
-    int nlogs = *nr_logs;
-    for (int k = 0; k < nlogs; k++) {
-      struct mpiuse_log_entry *log = logs[k];
-      if (log != NULL) {
-        ticks tics = getticks();
-        int res;
-        MPI_Status stat;
-        int err = MPI_Test(&log->req, &res, &stat);
-        if (err != MPI_SUCCESS) {
-          error("MPI_Test call failed");
-        }
-        ticks dt = getticks() - tics;
-        lsum += (double)dt;
-        lncalls++;
-        if (dt < lmint) lmint = dt;
-        if (dt > lmaxt) lmaxt = dt;
-        if (res) {
-
-          /* Done, clean up. */
-          free(log->data);
-          logs[k] = NULL;
-          atomic_dec(todos);
-        }
-      }
-    }
-  }
-
-  /* All done. */
-  *sum = lsum;
-  *ncalls = lncalls;
-  *mint = lmint;
-  *maxt = lmaxt;
-  return;
-}
-
-/**
- * @brief Send thread, checks if MPI_Isend requests have completed.
- */
-static void *send_thread(void *arg) {
-
-  if (verbose)
-    message("%d: send thread starts (%d)", *((int *)arg), injecting);
-  ticks starttics = getticks();
-
-  int ncalls;
-  double sum;
-  ticks mint;
-  ticks maxt;
-  queue_runner(sends_queue, &nr_sends, &todo_send, &sum, &ncalls, &mint, &maxt);
-
-  message("%d MPI_Test calls took: %.3f, mean time %.3f, min time %.3f, max time %.3f (%s)",
-          ncalls, clocks_from_ticks(sum), clocks_from_ticks(sum/ncalls),
-          clocks_from_ticks(mint), clocks_from_ticks(maxt), clocks_getunit());
-  message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
-          clocks_getunit());
-
-  /* Thread exits. */
-  return NULL;
-}
-
-/**
- * @brief Recv thread, checks if MPI_Irecv requests have completed.
- */
-static void *recv_thread(void *arg) {
-
-  if (verbose)
-    message("%d: recv thread starts", *((int *)arg));
-  ticks starttics = getticks();
-
-  int ncalls;
-  double sum;
-  ticks mint;
-  ticks maxt;
-  queue_runner(recvs_queue, &nr_recvs, &todo_recv, &sum, &ncalls, &mint, &maxt);
-
-  message("%d MPI_Test calls took: %.3f, mean time %.3f, min time %.3f, max time %.3f (%s)",
-          ncalls, clocks_from_ticks(sum), clocks_from_ticks(sum/ncalls),
-          clocks_from_ticks(mint), clocks_from_ticks(maxt), clocks_getunit());
-  message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
-          clocks_getunit());
-
-  /* Thread exits. */
-  return NULL;
-}
-
-/**
- * @brief Comparison function for logged times.
- */
-static int cmp_logs(const void *p1, const void *p2) {
-  struct mpiuse_log_entry *l1 = *(struct mpiuse_log_entry **)p1;
-  struct mpiuse_log_entry *l2 = *(struct mpiuse_log_entry **)p2;
-
-  /* Large unsigned values, so take care. */
-  if (l1->tic > l2->tic)
-    return 1;
-  if (l1->tic < l2->tic)
-    return -1;
-  return 0;
-}
-
-/**
- * @brief Pick out the relevant logging data for our rank, i.e. all
- * activations of sends and recvs. We ignore the original completions.
- * The final list is sorted into increasing time of activation.
- */
-static void pick_logs(void) {
-  size_t nlogs = mpiuse_nr_logs();
-
-  /* Duplicate of logs. */
-  reqs_queue = (struct mpiuse_log_entry **)
-    malloc(sizeof(struct mpiuse_log_entry *) * nlogs);
-  nr_reqs= 0;
-  sends_queue = (struct mpiuse_log_entry **)
-    malloc(sizeof(struct mpiuse_log_entry *) * nlogs);
-  nr_sends= 0;
-  recvs_queue = (struct mpiuse_log_entry **)
-    malloc(sizeof(struct mpiuse_log_entry *) * nlogs);
-  nr_recvs = 0;
-
-  for (int k = 0; k < nlogs; k++) {
-    struct mpiuse_log_entry *log = mpiuse_get_log(k);
-    if (log->rank == myrank && log->activation) {
-      if (log->type == task_type_send || log->type == task_type_recv) {
-
-        /* Allocate space for data. */
-        log->data = calloc(log->size, 1);
-
-        /* And keep this log. */
-        reqs_queue[nr_reqs] = log;
-        nr_reqs++;
-
-      } else {
-        error("task type '%d' is not a known send or recv task", log->type);
-      }
-    }
-  }
-
-  /* Sort into increasing time. */
-  qsort(reqs_queue, nr_reqs, sizeof(struct mpiuse_log_entry *), cmp_logs);
-}
-
-
-/**
- * @brief main function.
- */
-int main(int argc, char *argv[]) {
-
-  /* First we read the SWIFT MPI logger output that defines the communcations
-   * we will undertake and the time differences between injections into the
-   * queues. */
-  mpiuse_log_restore("testdata/mpiuse_report-step1.dat");
-  int nranks = mpiuse_nr_ranks();
-
-  /* Initiate MPI. */
-  int prov = 0;
-  int res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov);
-  if (res != MPI_SUCCESS)
-    error("Call to MPI_Init_thread failed with error %i.", res);
-
-  int nr_nodes = 0;
-  res = MPI_Comm_size(MPI_COMM_WORLD, &nr_nodes);
-  if (res != MPI_SUCCESS)
-    error("MPI_Comm_size failed with error %i.", res);
-
-  /* This should match the expected size. */
-  if (nr_nodes != nranks)
-    error("The number of MPI ranks %d does not match the expected value %d",
-          nranks, nr_nodes);
-
-  res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-  if (res != MPI_SUCCESS)
-    error("Call to MPI_Comm_rank failed with error %i.", res);
-
-  /* Create communicators for each subtype of the tasks. */
-  for (int i = 0; i < task_subtype_count; i++) {
-    MPI_Comm_dup(MPI_COMM_WORLD, &subtypeMPI_comms[i]);
-  }
-
-  /* Each rank requires its own queue, so extract them. */
-  pick_logs();
-
-  /* Time to start time. */
-  clocks_set_cpufreq(0);
-  if (myrank == 0) {
-    message("Start of MPI tests");
-    message("==================");
-  }
-
-  /* Make three threads, one for injecting tasks and two to check for
-   * completions of the sends and recv independently. */
-  pthread_t injectthread;
-  if (pthread_create(&injectthread, NULL, &inject_thread, &myrank) != 0)
-    error("Failed to create injection thread.");
-  pthread_t sendthread;
-  if (pthread_create(&sendthread, NULL, &send_thread, &myrank) != 0)
-    error("Failed to create send thread.");
-  pthread_t recvthread;
-  if (pthread_create(&recvthread, NULL, &recv_thread, &myrank) != 0)
-    error("Failed to create recv thread.");
-
-  /* Wait until all threads have exited and all MPI requests have completed. */
-  pthread_join(injectthread, NULL);
-  pthread_join(sendthread, NULL);
-  pthread_join(recvthread, NULL);
-
-  /* Shutdown MPI. */
-  res = MPI_Finalize();
-  if (res != MPI_SUCCESS)
-    error("call to MPI_Finalize failed with error %i.", res);
-  if (myrank == 0) message("Bye");
-
-  return 0;
-}
diff --git a/mpistalls.c b/mpistalls.c
index 11296a8dfdbc213325a4c0b6be231667512e1066..28e31355ea13bca073ceaed4a2ff356546e82814 100644
--- a/mpistalls.c
+++ b/mpistalls.c
@@ -2,6 +2,8 @@
  * Attempt to reproduce the asynchronous stalling that we see for medium
  * busy steps in SWIFT.
  *
+ * Timed injection version.
+ *
  * So we need to setup a multithreaded MPI program that performs asynchronous
  * exchanges of various data sizes and continuously checks the requests for
  * completion. Also need timers to record the time taken by all this...
@@ -12,12 +14,19 @@
 #include <stdlib.h>
 
 #include "atomic.h"
+#include "clocks.h"
 #include "error.h"
 #include "mpiuse.h"
 
 /* Global: Our rank for all to see. */
 int myrank = -1;
 
+/* Are we verbose. */
+static int verbose = 0;
+
+/* Attempt to keep original injection time differences. */
+static int usetics = 1;
+
 /* Integer types of send and recv tasks, must match log. */
 static const int task_type_send = 22;
 static const int task_type_recv = 23;
@@ -40,16 +49,61 @@ static int volatile nr_sends = 0;
 static int volatile ind_send = 0;
 static int volatile todo_send = 0;
 
-/* Injection thread, initiates MPI_Isend and MPI_Irecv requests at various
- * times. */
+/* CPU frequency of the machine that created the MPI log. */
+// XXX need to store this in the data file.
+static double log_clocks_cpufreq = 2194844448.0;
+
+/**
+ * @brief Injection thread, initiates MPI_Isend and MPI_Irecv requests.
+ *
+ * The requests are initiated in the time order of the original log and an
+ * attempt to start these with the same interval gap is made if usetics is
+ * set, otherwise we just do them as quickly as possible.
+ */
 static void *inject_thread(void *arg) {
-  message("%d: injection thread starts", *((int *)arg));
+
+  if (verbose)
+    message("%d: injection thread starts", *((int *)arg));
+  ticks starttics = getticks();
+
+  /* Ticks of our last attempt and ticks the first loop takes (usetics == 1). */
+  ticks basetic = reqs_queue[0]->tic;
+  ticks looptics = 0;
 
   while (ind_req < nr_reqs) {
     struct mpiuse_log_entry *log = reqs_queue[ind_req];
 
-    // Differences to SWIFT: MPI_BYTE might overflow, should use MPI_Type(?)
-    // injections not timed.
+    if (usetics) {
+      /* Expect time between this request and the previous one. */
+      ticks dt = log->tic - basetic;
+      basetic = log->tic;
+
+      /* We guess some time below which we should not attempt to wait,
+       * otherwise we'll start to overrun, and just inject the next call if we
+       * are below that (we time the ticks this loop takes without any waiting
+       * and use that). Otherwise we wait a while. Note we need to convert the
+       * ticks of the log file into nanoseconds, that requires the original
+       * CPU frequency. */
+      if (dt > looptics) {
+        struct timespec sleep;
+        sleep.tv_sec = 0;
+
+        /* Remember to be fair and remove the looptics, then convert to
+         * nanoseconds. */
+        double ns = (double)(dt - looptics) / log_clocks_cpufreq * 1.0e9;
+        if (ns < 1.0e9) {
+          sleep.tv_nsec = (long) ns;
+        } else {
+          /* Wait more than one second. Must be an error, but complain and
+           * continue.  */
+          sleep.tv_nsec = (long) 1.0e9;
+          message("wait greater than one second");
+        }
+        nanosleep(&sleep, NULL);
+      }
+    }
+
+    // Differences to SWIFT: MPI_BYTE might overflow, should use MPI_Type(?).
     int err = 0;
     if (log->type == task_type_send) {
       err = MPI_Isend(log->data, log->size, MPI_BYTE, log->otherrank,
@@ -72,84 +126,145 @@ static void *inject_thread(void *arg) {
     if (err != MPI_SUCCESS) error("Failed to activate send or recv");
 
     ind_req++;
+
+    /* Set looptics on the first pass. Assumes MPI_Isend and MPI_Irecv are
+     * equally timed. */
+    if (looptics == 0 && usetics) {
+      looptics = starttics - getticks();
+      if (verbose)
+        message("injection loop took %.3f %s.", clocks_from_ticks(looptics),
+                clocks_getunit());
+    }
+  }
+
+  /* All done, thread exiting. */
+  if (verbose) {
+    message("%d injections completed, sends = %d, recvs = %d", ind_req,
+            nr_sends, nr_recvs);
+    message("remaining sends = %d, recvs = %d", todo_send, todo_recv);
   }
-  message("%d injections completed, sends = %d, recvs = %d", ind_req,
-          nr_sends, nr_recvs);
-  message("remaining sends = %d, recvs = %d", todo_send, todo_recv);
+  message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
+          clocks_getunit());
   atomic_dec(&injecting);
   return NULL;
 }
 
-/* Send thread, checks if MPI_Isend requests have completed. */
-static void *send_thread(void *arg) {
-  message("%d: send thread starts (%d)", *((int *)arg), injecting);
-
-  int res;
-  MPI_Status stat;
-
-  // Need a test that only exits when requests are all inserted and we have
-  // emptied our queue. */
-  size_t attempts = 0;
-  while (injecting || (!injecting && todo_send > 0)) {
-    int nsends = nr_sends;
-    for (int k = 0; k < nsends; k++) {
-      struct mpiuse_log_entry *log = sends_queue[k];
+/**
+ * @brief main loop to run over a queue of MPI requests and test for when they
+ * complete. Returns the total amount of time spent in calls to MPI_Test and
+ * the number of times it was called.
+ *
+ * @param logs the list of logs pointing to requests.
+ * @param nr_logs pointer to the variable containing the current number of
+ *                logs.
+ * @param todos pointer to the variable containing the number of requests that
+ *              are still active.
+ * @param sum the total number of ticks spent in calls to MPI_Test.
+ * @param ncalls the total number of calls to MPI_Test.
+ * @param mint the minimum ticks an MPI_Test call took.
+ * @param maxt the maximum ticks an MPI_Test call took.
+ */
+static void queue_runner(struct mpiuse_log_entry **logs, int volatile *nr_logs,
+                         int volatile *todos, double *sum, int *ncalls,
+                         ticks *mint, ticks *maxt ) {
+
+  /* MPI_Test statistics. */
+  int lncalls = 0;
+  double lsum = 0.0;
+  ticks lmint = log_clocks_cpufreq;
+  ticks lmaxt = 0;
+
+  /* We loop while new requests are being injected and we still have requests
+   * to complete. */
+  while (injecting || (!injecting && *todos > 0)) {
+    int nlogs = *nr_logs;
+    for (int k = 0; k < nlogs; k++) {
+      struct mpiuse_log_entry *log = logs[k];
       if (log != NULL) {
-        attempts++;
+        ticks tics = getticks();
+        int res;
+        MPI_Status stat;
         int err = MPI_Test(&log->req, &res, &stat);
         if (err != MPI_SUCCESS) {
           error("MPI_Test call failed");
         }
+        ticks dt = getticks() - tics;
+        lsum += (double)dt;
+        lncalls++;
+        if (dt < lmint) lmint = dt;
+        if (dt > lmaxt) lmaxt = dt;
         if (res) {
+
           /* Done, clean up. */
-          //message("MPI_Test successful");
           free(log->data);
-          sends_queue[k] = NULL;
-          atomic_dec(&todo_send);
+          logs[k] = NULL;
+          atomic_dec(todos);
         }
       }
     }
   }
-  message("sends completed, required %zd attempts (left: %d)", attempts,
-          todo_send);
+
+  /* All done. */
+  *sum = lsum;
+  *ncalls = lncalls;
+  *mint = lmint;
+  *maxt = lmaxt;
+  return;
+}
+
+/**
+ * @brief Send thread, checks if MPI_Isend requests have completed.
+ */
+static void *send_thread(void *arg) {
+
+  if (verbose)
+    message("%d: send thread starts (%d)", *((int *)arg), injecting);
+  ticks starttics = getticks();
+
+  int ncalls;
+  double sum;
+  ticks mint;
+  ticks maxt;
+  queue_runner(sends_queue, &nr_sends, &todo_send, &sum, &ncalls, &mint, &maxt);
+
+  message("%d MPI_Test calls took: %.3f, mean time %.3f, min time %.3f, max time %.3f (%s)",
+          ncalls, clocks_from_ticks(sum), clocks_from_ticks(sum/ncalls),
+          clocks_from_ticks(mint), clocks_from_ticks(maxt), clocks_getunit());
+  message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
+          clocks_getunit());
+
+  /* Thread exits. */
   return NULL;
 }
 
-/* Recv thread, checks if MPI_Irecv requests have completed. */
+/**
+ * @brief Recv thread, checks if MPI_Irecv requests have completed.
+ */
 static void *recv_thread(void *arg) {
-  message("%d: recv thread starts", *((int *)arg));
 
-  int res;
-  MPI_Status stat;
+  if (verbose)
+    message("%d: recv thread starts", *((int *)arg));
+  ticks starttics = getticks();
 
-  size_t attempts = 0;
-  while (injecting || (!injecting && todo_recv > 0)) {
-    int nrecvs = nr_recvs;
-    for (int k = 0; k < nrecvs; k++) {
-      struct mpiuse_log_entry *log = recvs_queue[k];
-      if (log != NULL) {
-        attempts++;
-        int err = MPI_Test(&log->req, &res, &stat);
-        if (err != MPI_SUCCESS) {
-          error("MPI_Test call failed");
-        }
-        if (res) {
-          /* Done, clean up. */
-          //message("MPI_Test successful");
-          free(log->data);
-          recvs_queue[k] = NULL;
-          atomic_dec(&todo_recv);
-        }
-      }
-    }
-  }
+  int ncalls;
+  double sum;
+  ticks mint;
+  ticks maxt;
+  queue_runner(recvs_queue, &nr_recvs, &todo_recv, &sum, &ncalls, &mint, &maxt);
 
-  message("recvs completed, required %zd attempts (left: %d)", attempts,
-          todo_recv);
+  message("%d MPI_Test calls took: %.3f, mean time %.3f, min time %.3f, max time %.3f (%s)",
+          ncalls, clocks_from_ticks(sum), clocks_from_ticks(sum/ncalls),
+          clocks_from_ticks(mint), clocks_from_ticks(maxt), clocks_getunit());
+  message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
+          clocks_getunit());
+
+  /* Thread exits. */
   return NULL;
 }
 
-/* Comparison function for logged times. */
+/**
+ * @brief Comparison function for logged times.
+ */
 static int cmp_logs(const void *p1, const void *p2) {
   struct mpiuse_log_entry *l1 = *(struct mpiuse_log_entry **)p1;
   struct mpiuse_log_entry *l2 = *(struct mpiuse_log_entry **)p2;
@@ -162,12 +277,15 @@ static int cmp_logs(const void *p1, const void *p2) {
   return 0;
 }
 
-/* Pick out the relevant logging data for our rank, i.e. all activations of
- * sends and recvs. */
+/**
+ * @brief Pick out the relevant logging data for our rank, i.e. all
+ * activations of sends and recvs. We ignore the original completions.
+ * The final list is sorted into increasing time of activation.
+ */
 static void pick_logs(void) {
   size_t nlogs = mpiuse_nr_logs();
 
-  /* Duplicate of logs. XXX could loop twice to reduce memory use if needed. */
+  /* Duplicate of logs. */
   reqs_queue = (struct mpiuse_log_entry **)
     malloc(sizeof(struct mpiuse_log_entry *) * nlogs);
   nr_reqs= 0;
@@ -200,6 +318,10 @@ static void pick_logs(void) {
   qsort(reqs_queue, nr_reqs, sizeof(struct mpiuse_log_entry *), cmp_logs);
 }
 
+
+/**
+ * @brief main function.
+ */
 int main(int argc, char *argv[]) {
 
   /* First we read the SWIFT MPI logger output that defines the communcations
@@ -232,11 +354,17 @@ int main(int argc, char *argv[]) {
   for (int i = 0; i < task_subtype_count; i++) {
     MPI_Comm_dup(MPI_COMM_WORLD, &subtypeMPI_comms[i]);
   }
-  message("Starts");
 
   /* Each rank requires its own queue, so extract them. */
   pick_logs();
 
+  /* Time to start time. */
+  clocks_set_cpufreq(0);
+  if (myrank == 0) {
+    message("Start of MPI tests");
+    message("==================");
+  }
+
   /* Make three threads, one for injecting tasks and two to check for
    * completions of the sends and recv independently. */
   pthread_t injectthread;
@@ -258,6 +386,7 @@ int main(int argc, char *argv[]) {
   res = MPI_Finalize();
   if (res != MPI_SUCCESS)
     error("call to MPI_Finalize failed with error %i.", res);
+  if (myrank == 0) message("Bye");
 
   return 0;
 }