From d75d43fd0c766f4b2642b0ca23b562d8d06992fe Mon Sep 17 00:00:00 2001
From: "Peter W. Draper" <p.w.draper@durham.ac.uk>
Date: Wed, 25 Sep 2019 16:39:19 +0100
Subject: [PATCH] Use MPI_Waitall to see if that helps or not. It does neither

---
 README.md         |  5 ++++
 swiftmpistepsim.c | 74 ++++++++++++++++++++++++-----------------------
 2 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 2601f6e..beb0652 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,11 @@
 SWIFTmpistepsim
 ===============
 
+Variation: this branch uses an `MPI_Waitall` call to synchronously wait
+for all the available requests, rather than polling using MPI_Test.
+The result runs slightly slower, but takes roughly similar times as
+the MPI_Test version.
+
 This project is a standalone part of [SWIFT](http://www.swiftsim.com) that
 aims to roughly simulate the MPI interactions that taking a single step of a
 SWIFT simulation makes. Making it possible to more easily see the performance
diff --git a/swiftmpistepsim.c b/swiftmpistepsim.c
index 394dea0..5df26b5 100644
--- a/swiftmpistepsim.c
+++ b/swiftmpistepsim.c
@@ -175,7 +175,7 @@ static void *inject_thread(void *arg) {
 
 /**
  * @brief main loop to run over a queue of MPI requests and test for when they
- * complete. Returns the total amount of time spent in calls to MPI_Test and
+ * complete. Returns the total amount of time spent in calls to MPI_Waitall and
  * the number of times it was called.
  *
  * @param logs the list of logs pointing to requests.
@@ -183,16 +183,16 @@ static void *inject_thread(void *arg) {
  *                logs.
  * @param todos pointer to the variable containing the number of requests that
  *              are still active.
- * @param sum the total number of ticks spent in calls to MPI_Test.
- * @param ncalls the total number of calls to MPI_Test.
- * @param mint the minimum ticks an MPI_Test call took.
- * @param maxt the maximum ticks an MPI_Test call took.
+ * @param sum the total number of ticks spent in calls to MPI_Waitall.
+ * @param ncalls the total number of calls to MPI_Waitall.
+ * @param mint the minimum ticks an MPI_Waitall call took.
+ * @param maxt the maximum ticks an MPI_Waitall call took.
  */
 static void queue_runner(struct mpiuse_log_entry **logs, int volatile *nr_logs,
                          int volatile *todos, double *sum, int *ncalls,
                          ticks *mint, ticks *maxt) {
 
-  /* Global MPI_Test statistics. */
+  /* Global MPI_Waitall statistics. */
   int lncalls = 0;
   double lsum = 0.0;
   ticks lmint = INT_MAX;
@@ -201,40 +201,42 @@ static void queue_runner(struct mpiuse_log_entry **logs, int volatile *nr_logs,
   /* We loop while new requests are being injected and we still have requests
    * to complete. */
   while (injecting || (!injecting && *todos > 0)) {
+
+    /* Make a list of active requests. */
     int nlogs = *nr_logs;
+    MPI_Request *reqs = (MPI_Request *)calloc(nlogs, sizeof(MPI_Request));
+    int *inds = (int *)calloc(nlogs, sizeof(int));
+    int nactlogs = 0;
     for (int k = 0; k < nlogs; k++) {
       struct mpiuse_log_entry *log = logs[k];
       if (log != NULL && !log->done) {
-        ticks tics = getticks();
-        int res;
-        MPI_Status stat;
-        int err = MPI_Test(&log->req, &res, &stat);
-        if (err != MPI_SUCCESS) {
-          error("MPI_Test call failed");
-        }
-
-        /* Increment etc. of statistics about time in MPI_Test. */
-        ticks dt = getticks() - tics;
-        log->tsum += (double)dt;
-        lsum += (double)dt;
-
-        log->nr_tests++;
-        lncalls++;
-
-        if (dt < log->tmin) log->tmin = dt;
-        if (dt > log->tmax) log->tmax = dt;
-        if (dt < lmint) lmint = dt;
-        if (dt > lmaxt) lmaxt = dt;
-
-        if (res) {
-          /* Done, clean up. */
-          log->done = 1;
-          log->endtic = getticks();
-          free(log->data);
-          atomic_dec(todos);
-        }
+          reqs[nactlogs] = logs[k]->req;
+          inds[nactlogs] = k;
+          nactlogs++;
       }
     }
+    if (nactlogs > 0) {
+      ticks tics = getticks();
+      int err = MPI_Waitall(nactlogs, reqs, MPI_STATUSES_IGNORE);
+      if (err != MPI_SUCCESS) error("MPI_Waitall call failed");
+
+      /* Done, clean up. */
+      for (int k = 0; k < nactlogs; k++) {
+        struct mpiuse_log_entry *log = logs[inds[k]];
+        log->done = 1;
+        log->endtic = getticks();
+        free(log->data);
+      }
+
+      atomic_sub(todos, nactlogs);
+      lncalls++;
+      ticks dt = getticks() - tics;
+      if (dt > lmaxt) lmaxt = dt;
+      if (dt < lmint) lmint = dt;
+      lsum += dt;
+    }
+    free(reqs);
+    free(inds);
   }
 
   /* All done. */
@@ -260,7 +262,7 @@ static void *send_thread(void *arg) {
   queue_runner(sends_queue, &nr_sends, &todo_send, &sum, &ncalls, &mint, &maxt);
 
   message(
-      "%d MPI_Test calls took: %.3f, mean time %.3f, min time %.3f, max time "
+      "%d MPI_Waitall calls took: %.3f, mean time %.3f, min time %.3f, max time "
       "%.3f (%s)",
       ncalls, clocks_from_ticks(sum), clocks_from_ticks(sum / ncalls),
       clocks_from_ticks(mint), clocks_from_ticks(maxt), clocks_getunit());
@@ -287,7 +289,7 @@ static void *recv_thread(void *arg) {
   queue_runner(recvs_queue, &nr_recvs, &todo_recv, &sum, &ncalls, &mint, &maxt);
 
   message(
-      "%d MPI_Test calls took: %.3f, mean time %.3f, min time %.3f, max time "
+      "%d MPI_Waitall calls took: %.3f, mean time %.3f, min time %.3f, max time "
       "%.3f (%s)",
       ncalls, clocks_from_ticks(sum), clocks_from_ticks(sum / ncalls),
       clocks_from_ticks(mint), clocks_from_ticks(maxt), clocks_getunit());
-- 
GitLab