diff --git a/Makefile b/Makefile
index 91ba3eee1f3b6b43c28d86634d17144862eeb14f..1cf80bbabb57a5ccdea52ad3e93078ab013143ab 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-#CFLAGS = -g -O0 -Wall  -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined
-CFLAGS = -g -O0 -Wall
+#CFLAGS = -g -O0 -Wall -std=gnu11 -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined
+CFLAGS = -g -O0 -Wall -std=gnu11
 
 all: swiftmpistepsim swiftmpifakestepsim swiftmpirdmastepsim
 
diff --git a/swiftmpirdmastepsim.c b/swiftmpirdmastepsim.c
index 1d8bd26585e301cc8722c1e3c031314c0a79c60d..1c749d4eec0bfb69d35abede09f110b16e003615 100644
--- a/swiftmpirdmastepsim.c
+++ b/swiftmpirdmastepsim.c
@@ -96,8 +96,8 @@ static double messagescale = 1.0;
 static int datacheck = 0;
 
 /* Integer types of send and recv tasks, must match log. */
-static const int task_type_send = 22;
-static const int task_type_recv = 23;
+static const int task_type_send = 25;
+static const int task_type_recv = 26;
 
 /* Global ommunicators for each of the subtypes. */
 static MPI_Comm subtypeMPI_comms[task_subtype_count];
@@ -116,7 +116,6 @@ static size_t *ranktag_lists;
 /* The local send queue. */
 static struct mpiuse_log_entry **volatile send_queue;
 static int volatile nr_send = 0;
-static int volatile todo_send = 0;
 static int volatile injecting = 1;
 
 /* The local receive queue. */
@@ -126,7 +125,6 @@ static int volatile todo_recv = 0;
 
 /* The local requests queue. */
 static struct mpiuse_log_entry **volatile req_queue;
-static int volatile ind_req = 0;
 static int volatile nr_req = 0;
 static int volatile todo_req = 0;
 
@@ -210,7 +208,10 @@ static void *send_thread(void *arg) {
   ticks starttics = getticks();
 
   injecting = 1;
+  double tsum = 0.0;
   for (int k = 0; k < nr_send; k++) {
+    ticks tics = getticks();
+
     struct mpiuse_log_entry *log = send_queue[k];
     if (log == NULL) error("NULL send message queued (%d/%d)", k, nr_send);
 
@@ -257,21 +258,24 @@ static void *send_thread(void *arg) {
     }
 
     /* And start the send of data to other rank. */
-    int ret = MPI_Raccumulate(&dataptr[1], datasize - 1, MPI_BLOCKTYPE,
-                              log->otherrank, log->offset + 1, datasize - 1,
-                              MPI_BLOCKTYPE, MPI_REPLACE,
-                              mpi_window[log->subtype], &log->req);
-    if (ret != MPI_SUCCESS) mpi_error_message(ret, "Failed to accumulate data");
+    int ret = MPI_Rput(&dataptr[1], datasize - 1, MPI_BLOCKTYPE,
+                       log->otherrank, log->offset + 1, datasize - 1,
+                       MPI_BLOCKTYPE, mpi_window[log->subtype], &log->req);
+    if (ret != MPI_SUCCESS) mpi_error_message(ret, "Failed to put data");
 
     /* Add to the requests queue. */
     int ind = atomic_inc(&nr_req);
     req_queue[ind] = log;
     atomic_inc(&todo_req);
+
+    ticks dt = getticks() - tics;
+    tsum += (double)dt;
   }
 
   /* All done. */
   atomic_dec(&injecting);
 
+  message("sum = %f, mean = %f", clocks_from_ticks(tsum), clocks_from_ticks(tsum / nr_send));
   message("took %.3f %s.", clocks_from_ticks(getticks() - starttics),
           clocks_getunit());
 
@@ -298,29 +302,15 @@ static void *req_thread(void *arg) {
           error("MPI_Test call failed");
         }
         if (res) {
-
-          //  Start new epoch on window?
-          int ret =
-              MPI_Win_flush_local(log->otherrank, mpi_window[log->subtype]);
-          if (ret != MPI_SUCCESS)
-            mpi_error_message(ret, "MPI_Win_flush failed");
-
           /* Send the UNLOCKED message. */
           BLOCKTYPE newval[1];
           BLOCKTYPE oldval[1];
           newval[0] = UNLOCKED;
           oldval[0] = 0;
-          ret = MPI_Compare_and_swap(&newval[0], log->data, &oldval[0],
-                                     MPI_BLOCKTYPE, log->otherrank, log->offset,
-                                     mpi_window[log->subtype]);
-
-          if (ret != MPI_SUCCESS)
-            mpi_error_message(ret, "MPI_Compare_and_swap error");
-
-          /* And complete. */
-          ret = MPI_Win_flush_local(log->otherrank, mpi_window[log->subtype]);
-          if (ret != MPI_SUCCESS)
-            mpi_error_message(ret, "MPI_Win_flush failed");
+          int ret = MPI_Put(&newval[0], 1, MPI_BLOCKTYPE,
+                            log->otherrank, log->offset, 1,
+                            MPI_BLOCKTYPE, mpi_window[log->subtype]);
+          if (ret != MPI_SUCCESS) mpi_error_message(ret, "Failed to put unlock");
 
           /* Done. */
           log->done = 1;