diff --git a/swiftmpistepsim.c b/swiftmpistepsim.c
index f371fdf838875773894df514bbd5fdba6e34fe29..c0bf1d72f6c8dda58929653016920f3743498dad 100644
--- a/swiftmpistepsim.c
+++ b/swiftmpistepsim.c
@@ -38,6 +38,9 @@ static int verbose = 0;
 /* Attempt to keep original injection time differences. */
 static int usetics = 1;
 
+/* Scale to apply to the size of the messages we send. */
+static float messagescale = 1.0;
+
 /* Size of the messages we send. This overrides the logged values when not
  * zero . */
 static size_t messagesize = 0;
@@ -390,6 +393,9 @@ static void pick_logs(void) {
         /* Override size if needed. */
         if (messagesize > 0) log->size = messagesize;
 
+        /* Scale size. */
+        log->size *= messagescale ;
+
         /* And keep this log. */
         log->data = NULL;
         reqs_queue[nr_reqs] = log;
@@ -416,10 +422,12 @@ static void pick_logs(void) {
  * @brief usage help.
  */
 static void usage(char *argv[]) {
-  fprintf(stderr, "Usage: %s [-vf] SWIFT_mpiuse-log-file.dat logfile.dat\n",
+  fprintf(stderr, "Usage: %s [-vfdc:s:] SWIFT_mpiuse-log-file.dat logfile.dat\n",
           argv[0]);
   fprintf(stderr, " options: -v verbose, -f fast injections, "
-                  "-s message size (bytes)\n");
+          "-d fill messages and check values on receive, "
+          "-s <value> use fixed message of this size (bytes), "
+          "-c <value> scale factor for all messages\n");
   fflush(stderr);
 }
 
@@ -445,7 +453,7 @@ int main(int argc, char *argv[]) {
   /* Handle the command-line, we expect a mpiuse data file to read and various
    * options. */
   int opt;
-  while ((opt = getopt(argc, argv, "vfds:")) != -1) {
+  while ((opt = getopt(argc, argv, "vfdc:s:")) != -1) {
     switch (opt) {
       case 'd':
         datacheck = 1;
@@ -456,6 +464,8 @@ int main(int argc, char *argv[]) {
       case 'v':
         verbose = 1;
         break;
+      case 'c':
+        messagescale = atof(optarg);
       case 's':
         messagesize = atoll(optarg);
         break;
@@ -496,11 +506,15 @@ int main(int argc, char *argv[]) {
   clocks_set_cpufreq(0);
   if (myrank == 0) {
     message("Start of MPI tests");
-    message("==================");
     if (messagesize > 0) {
       message(" ");
       message("  Using fixed message size of %zd", messagesize);
     }
+    message("==================");
+    if (messagescale != 1.0f) {
+      message(" ");
+      message("  Using message scale of %f", messagescale);
+    }
     if (verbose) {
       if (!usetics) message("using fast untimed injections");
       if (datacheck)
diff --git a/testdata/doubledata.py b/testdata/doubledata.py
new file mode 100755
index 0000000000000000000000000000000000000000..a608930016e4ad9e8db20d407c7f44ed8a3aa4c0
--- /dev/null
+++ b/testdata/doubledata.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+"""
+Usage:
+    doubledata.py [options] input-log doubled-log
+
+Split pairs of send and recv log data entries to increase the number of
+messages that we send to represent the data of a step. Preserves the data
+volume, but not the tagging.
+
+This file is part of SWIFT.
+
+Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk)
+All Rights Reserved.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import argparse
+import sys
+from operator import itemgetter
+
+#  Handle the command line.
+parser = argparse.ArgumentParser(description="Pair MPI logs")
+
+parser.add_argument(
+    "input",
+    help="Output log from simulator")
+parser.add_argument(
+    "output",
+    help="Doubled log")
+parser.add_argument(
+    "-v",
+    "--verbose",
+    dest="verbose",
+    help="Verbose output",
+    default=False,
+    action="store_true",
+)
+args = parser.parse_args()
+infile = args.input
+outfile = args.output
+
+#  Indices for words in a line.
+sticcol = 0
+eticcol = 1
+dticcol = 2
+stepcol = 3
+rankcol = 4
+otherrankcol = 5
+typecol = 6
+itypecol = 7
+subtypecol = 8
+isubtypecol = 9
+activationcol = 10
+tagcol = 11
+sizecol = 12
+sumcol = 13
+
+# The plan is to pair the sends and receives across all ranks so that
+# we can split each pair into two and give everything new tags that also
+# match.
+
+#  Keyed lines.
+keysends = {}
+keyrecvs = {}
+
+#  Indexed lines.
+sends = []
+recvs = []
+nsends = 0
+nrecvs = 0
+
+#  Generate keys that are unique between send/recv pairs and gather the
+#  associated lines initiation lines.
+with open(infile, "r") as fp:
+    for line in fp:
+        if line[0] == '#':
+            continue
+        words = line.split()
+        if words[activationcol] == "0":
+            continue
+
+        if words[itypecol] == "22":
+            key = words[otherrankcol] + "/" + \
+                  words[rankcol] + "/" + \
+                  words[isubtypecol] + "/" + \
+                  words[tagcol] + "/" + \
+                  words[sizecol]
+            if not key in keysends:
+                keysends[key] = [nsends]
+            else:
+                keysends[key].append(nsends)
+            sends.append(words)
+            nsends = nsends + 1
+
+        elif words[itypecol] == "23":
+            key = words[rankcol] + "/" + \
+                  words[otherrankcol] + "/" + \
+                  words[isubtypecol] + "/" + \
+                  words[tagcol] + "/" + \
+                  words[sizecol]
+            if not key in keyrecvs:
+                keyrecvs[key] = [nrecvs]
+            else:
+                keyrecvs[key].append(nrecvs)
+            recvs.append(words)
+            nrecvs = nrecvs + 1
+
+print "# Read " + str(nsends) + " sends and " + str(nrecvs) + " recvs"
+
+# Now get the indices of the matches.
+msends = [None] * nsends
+for key in keysends:
+    if key in keyrecvs:
+        if len(keysends[key]) == 1 and len(keyrecvs[key]) == 1:
+            isend = keysends[key][0]
+            irecv = keyrecvs[key][0]
+            msends[isend] = irecv
+        else:
+            print "# ERROR: found ", len(keysends[key]), "/", len(keyrecvs[key]), " matches for key: ", key, " should be 1/1"
+    else:
+        print "# ERROR: missing recv key: ", key
+
+# Reorder recvs to same order as sends.
+recvs = [recvs[i] for i in msends]
+    
+# Now we can split and retag. XXX if you run out of tags need to
+# split by rank and then maybe itype and use tag count with these.
+splits = []
+ctag = 0
+for i in range(nsends):
+    swords = sends[i]
+    rwords = recvs[i]
+    size = str(int(float(swords[sizecol]) / 2.0))
+    swords[sizecol] = size
+    rwords[sizecol] = size
+
+    ctag = ctag + 1
+    stag = str(ctag)
+    swords[tagcol] = stag
+    rwords[tagcol] = stag
+    splits.append(swords)
+    splits.append(rwords)
+
+    ctag = ctag + 1
+    stag = str(ctag)
+    cswords = list(swords)
+    cswords[tagcol] = stag
+    crwords = list(rwords)
+    crwords[tagcol] = stag
+    splits.append(cswords)
+    splits.append(crwords)
+
+# Sort by tic.
+splits = sorted(splits, key=lambda x: int(x[sticcol]))
+
+#  And output.
+with open(outfile, "w") as fp:
+    fp.write("# stic etic dtic step rank otherrank type itype subtype isubtype activation tag size sum\n")
+    for line in splits:
+        fp.write(" ".join(line) + "\n")
+
+print "# Finished"
+
+sys.exit(0)
diff --git a/testdata/pairdata.py b/testdata/pairdata.py
new file mode 100755
index 0000000000000000000000000000000000000000..c53ce22ca7691227188e6f128d3dd43e5f989ed3
--- /dev/null
+++ b/testdata/pairdata.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+"""
+Usage:
+    pairdata.py [options] output-log paired-log
+
+Combine pairs of send and recv log data entries to reduce the number of
+messages that we send to represent the data of a step. Uses a simple strategy
+identifying sends and recvs of the same ranks and type/subtypes that are close
+in time and combining them. We need to keep the tags and sizes matched across
+the ranks so we pick the tag of the first in time and sum the sizes.
+
+This file is part of SWIFT.
+
+Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk)
+All Rights Reserved.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import argparse
+import sys
+from operator import itemgetter
+
+#  Handle the command line.
+parser = argparse.ArgumentParser(description="Pair MPI logs")
+
+parser.add_argument(
+    "input",
+    help="Output log from simulator")
+parser.add_argument(
+    "output",
+    help="Paired log")
+parser.add_argument(
+    "-v",
+    "--verbose",
+    dest="verbose",
+    help="Verbose output",
+    default=False,
+    action="store_true",
+)
+args = parser.parse_args()
+infile = args.input
+outfile = args.output
+
+#  Indices for words in a line.
+sticcol = 0
+eticcol = 1
+dticcol = 2
+stepcol = 3
+rankcol = 4
+otherrankcol = 5
+typecol = 6
+itypecol = 7
+subtypecol = 8
+isubtypecol = 9
+activationcol = 10
+tagcol = 11
+sizecol = 12
+sumcol = 13
+
+#  Indexed lines.
+lines = []
+nlines = 0
+
+#  Gather lines from the input log. We only want activation lines.
+with open(infile, "r") as fp:
+    for line in fp:
+        if line[0] == '#':
+            continue
+        words = line.split()
+        if words[activationcol] == "1":
+            lines.append(words)
+            nlines = nlines + 1
+
+#  Sort by tag as we need a stable join (sends and recvs are not ordered).
+lines = sorted(lines, key=lambda x: float(x[tagcol]))
+if args.verbose:
+    print "# Read ", nlines, " activation logs from ", infile
+
+# Now locate pairs.
+nmatches = 0
+nnlines = 0
+with open(outfile, "w") as fp:
+    for n in range(0, nlines):
+        nwords = lines[n]
+
+        # If not already used.
+        if nwords[rankcol] == "-1":
+            continue
+
+        # Check remaining lines for a match.
+        for m in range(n + 1, nlines):
+            mwords = lines[m]
+
+            if (nwords[rankcol] == mwords[rankcol] and
+                nwords[otherrankcol] == mwords[otherrankcol] and
+                nwords[itypecol] == mwords[itypecol] and
+                nwords[isubtypecol] == mwords[isubtypecol]):
+
+                #  Matching send or recv of same type to and from same
+                #  ranks. Use sum of sizes and min of tags and tics.
+                nwords[sizecol] = str(int(nwords[sizecol]) + int(mwords[sizecol]))
+                nwords[tagcol] = str(min(int(nwords[tagcol]), int(mwords[tagcol])))
+                nwords[sticcol] = str(min(int(nwords[sticcol]), int(mwords[sticcol])))
+
+                #  Don't use this other entry again. Will be skipped or
+                #  fail to match.
+                mwords[rankcol] = "-1"
+                nmatches = nmatches + 1
+                break
+
+        # And output.
+        fp.write(" ".join(nwords) + "\n")
+        nnlines = nnlines + 1
+
+if args.verbose:
+    print "# Matched ", nmatches, " wrote ", nnlines, " lines to ", outfile
+
+sys.exit(0)