From d0cc664a942601cbddcb01dc39bbae72fd62c0aa Mon Sep 17 00:00:00 2001 From: "Peter W. Draper" <p.w.draper@durham.ac.uk> Date: Thu, 16 Apr 2020 13:08:17 +0100 Subject: [PATCH] Try to assign communicators for the subtypes to different progress threads using the suggested MPI_Info hints, still need to think about how to get thread-split to work, that would require that injections and completions happen using same threads --- swiftmpistepsim.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/swiftmpistepsim.c b/swiftmpistepsim.c index bbfe53c..6e635a4 100644 --- a/swiftmpistepsim.c +++ b/swiftmpistepsim.c @@ -29,7 +29,8 @@ #include "error.h" #include "mpiuse.h" -/* Number of threads used to partition the requests. */ +/* Number of threads used to partition the requests. Should match the number + * of progress threads and communicators should be assigned to one? */ #define NTHREADS 2 /* Global: Our rank for all to see. */ @@ -50,7 +51,7 @@ static const int task_type_recv = 23; /* Global communicators for each of the subtypes. */ static const int task_subtype_count = 30; // Just some upper limit on subtype. -static MPI_Comm subtypeMPI_comms[NTHREADS][30]; +static MPI_Comm subtypeMPI_comms[30]; /* The local queues. We need to partition these to keep the MPI communications * separate so we can use the MPI_THREAD_SPLIT option of the Intel 2020 MPI @@ -172,7 +173,7 @@ static void *inject_thread(void *arg) { /* And send. */ err = MPI_Isend(log->data, log->size, MPI_BYTE, log->otherrank, log->tag, - subtypeMPI_comms[tid][log->subtype], &log->req); + subtypeMPI_comms[log->subtype], &log->req); /* Add a new send request. */ int ind = atomic_inc(&nr_sends[tid]); @@ -184,7 +185,7 @@ static void *inject_thread(void *arg) { /* Ready to receive. */ log->data = calloc(log->size, 1); err = MPI_Irecv(log->data, log->size, MPI_BYTE, log->otherrank, log->tag, - subtypeMPI_comms[tid][log->subtype], &log->req); + subtypeMPI_comms[log->subtype], &log->req); /* Add a new recv request. */ int ind = atomic_inc(&nr_recvs[tid]); @@ -488,6 +489,9 @@ int main(int argc, char *argv[]) { char *infile = argv[optind]; char *logfile = argv[optind + 1]; + /* Initialize time for messages and timers. */ + clocks_set_cpufreq(0); + /* Now we read the SWIFT MPI logger output that defines the communcations * we will undertake and the time differences between injections into the * queues. Note this has all ranks for a single steps, SWIFT outputs one MPI @@ -500,17 +504,28 @@ int main(int argc, char *argv[]) { error("The number of MPI ranks %d does not match the expected value %d", nranks, nr_nodes); - /* Create communicators for each subtype of the tasks and each thread. */ + /* Create communicators for each subtype, these are associated with the + * threads somewhat randomly, we should seek to balance the work. */ for (int i = 0; i < task_subtype_count; i++) { - for (int j = 0; j < NTHREADS; j++) { - MPI_Comm_dup(MPI_COMM_WORLD, &subtypeMPI_comms[j][i]); + MPI_Comm_dup(MPI_COMM_WORLD, &subtypeMPI_comms[i]); + char str[8] = {0}; + if (i == 11) { + sprintf(str, "%d", 0); // XXX fudge for xv + } else { + sprintf(str, "%d", i % NTHREADS); } + MPI_Info info; + MPI_Info_create(&info); + MPI_Info_set(info, "thread_id", str); + MPI_Comm_set_info(subtypeMPI_comms[i], info); + message("subtype %d assigned to thread_id %s", i, str); + MPI_Info_free(&info); } /* Each rank requires its own queue, so extract them. */ pick_logs(); - /* Time to start time. Try to make it synchronous across the ranks. */ + /* Try to make it synchronous across the ranks and reset time to zero. */ MPI_Barrier(MPI_COMM_WORLD); clocks_set_cpufreq(0); if (myrank == 0) { -- GitLab