From d0cc664a942601cbddcb01dc39bbae72fd62c0aa Mon Sep 17 00:00:00 2001
From: "Peter W. Draper" <p.w.draper@durham.ac.uk>
Date: Thu, 16 Apr 2020 13:08:17 +0100
Subject: [PATCH] Try to assign communicators for the subtypes to different
 progress threads using the suggested MPI_Info hints, still need to think
 about how to get thread-split to work, that would require that injections and
 completions happen using same threads

---
 swiftmpistepsim.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/swiftmpistepsim.c b/swiftmpistepsim.c
index bbfe53c..6e635a4 100644
--- a/swiftmpistepsim.c
+++ b/swiftmpistepsim.c
@@ -29,7 +29,8 @@
 #include "error.h"
 #include "mpiuse.h"
 
-/* Number of threads used to partition the requests. */
+/* Number of threads used to partition the requests. Should match the number
+ * of progress threads and communicators should be assigned to one? */
 #define NTHREADS 2
 
 /* Global: Our rank for all to see. */
@@ -50,7 +51,7 @@ static const int task_type_recv = 23;
 
 /* Global communicators for each of the subtypes. */
 static const int task_subtype_count = 30;  // Just some upper limit on subtype.
-static MPI_Comm subtypeMPI_comms[NTHREADS][30];
+static MPI_Comm subtypeMPI_comms[30];
 
 /* The local queues. We need to partition these to keep the MPI communications
  * separate so we can use the MPI_THREAD_SPLIT option of the Intel 2020 MPI
@@ -172,7 +173,7 @@ static void *inject_thread(void *arg) {
 
       /* And send. */
       err = MPI_Isend(log->data, log->size, MPI_BYTE, log->otherrank, log->tag,
-                      subtypeMPI_comms[tid][log->subtype], &log->req);
+                      subtypeMPI_comms[log->subtype], &log->req);
 
       /* Add a new send request. */
       int ind = atomic_inc(&nr_sends[tid]);
@@ -184,7 +185,7 @@ static void *inject_thread(void *arg) {
       /* Ready to receive. */
       log->data = calloc(log->size, 1);
       err = MPI_Irecv(log->data, log->size, MPI_BYTE, log->otherrank, log->tag,
-                      subtypeMPI_comms[tid][log->subtype], &log->req);
+                      subtypeMPI_comms[log->subtype], &log->req);
 
       /* Add a new recv request. */
       int ind = atomic_inc(&nr_recvs[tid]);
@@ -488,6 +489,9 @@ int main(int argc, char *argv[]) {
   char *infile = argv[optind];
   char *logfile = argv[optind + 1];
 
+  /* Initialize time for messages and timers. */
+  clocks_set_cpufreq(0);
+
   /* Now we read the SWIFT MPI logger output that defines the communcations
    * we will undertake and the time differences between injections into the
    * queues. Note this has all ranks for a single steps, SWIFT outputs one MPI
@@ -500,17 +504,28 @@ int main(int argc, char *argv[]) {
     error("The number of MPI ranks %d does not match the expected value %d",
           nranks, nr_nodes);
 
-  /* Create communicators for each subtype of the tasks and each thread. */
+  /* Create communicators for each subtype, these are associated with the
+   * threads somewhat randomly, we should seek to balance the work. */
   for (int i = 0; i < task_subtype_count; i++) {
-    for (int j = 0; j < NTHREADS; j++) {
-      MPI_Comm_dup(MPI_COMM_WORLD, &subtypeMPI_comms[j][i]);
+    MPI_Comm_dup(MPI_COMM_WORLD, &subtypeMPI_comms[i]);
+    char str[8] = {0};
+    if (i == 11) {
+      sprintf(str, "%d", 0); // XXX fudge for xv
+    } else {
+      sprintf(str, "%d", i % NTHREADS);
     }
+    MPI_Info info;
+    MPI_Info_create(&info);
+    MPI_Info_set(info, "thread_id", str);
+    MPI_Comm_set_info(subtypeMPI_comms[i], info);
+    message("subtype %d assigned to thread_id %s", i, str);
+    MPI_Info_free(&info);
   }
 
   /* Each rank requires its own queue, so extract them. */
   pick_logs();
 
-  /* Time to start time. Try to make it synchronous across the ranks. */
+  /* Try to make it synchronous across the ranks and reset time to zero. */
   MPI_Barrier(MPI_COMM_WORLD);
   clocks_set_cpufreq(0);
   if (myrank == 0) {
-- 
GitLab