diff --git a/mpiuse.c b/mpiuse.c
index c194495b98c0f4f8c2fb06d52c3da45268c2f3d1..77bd4a1009172ede87c6f6623ba60934d1c39359 100644
--- a/mpiuse.c
+++ b/mpiuse.c
@@ -285,28 +285,77 @@ static double gauss_rand_upper(void) {
  * @param nr_nodes the number of ranks that will be used.
  * @param nr_logs the number of logs to generate per rank.
  * @param size bytes per message, unless random when this is the maximum
- *             and the minimum is 1 for uniform and 2.5 sigma for gaussian.
+ *             and the minimum is 1 for uniform, if using a gaussian
+ *             distribution the value is a 2.5 sigma, for a CDF based
+ *             selection this is just a scale factor of the values.
  * @param random whether to use random sizes.
  * @param seed the random seed, use same for fixed sequences.
- * @param uniform whether to use a uniform distribution.
+ * @param uniform whether to use a uniform distribution other gaussian, unless
+ *                cdf is defined, in which case this parameter is ignored.
+ * @param cdf text file containing a normalized CDF to use as a basis for inverse
+ *            transform sampling of the randoms. NULL for no file.
  */
 void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random,
-                         long int seed, int uniform) {
+                         long int seed, int uniform, const char *cdf) {
 
-  /* Each rank exchanges messages with all the others and each "log" has the
-   * same size. */
+  /* Only used for CDF, may need to increase these. */
+  int nvals = 0;
+  double imin[256], imax[256], value[256];
+
+  /* Note that each rank exchanges messages with all the others and each "log"
+   * has the same size. */
+  /* Set seed. */
   if (random) srand48(seed);
 
+  /* Check for CDF. This should be based on some real distribution, the format
+   * is same as output from TOPCAT, i.e. bin-low, bin-high, value space
+   * separated values. Note the value column should be normalised into the
+   * range 0 to 1 so that it maps into a uniform random distribution. */
+  if (cdf != NULL) {
+    FILE *infile = fopen(cdf, "r");
+    if (infile == NULL) error("Failed to open CDF file: %s", cdf);
+    char line[132];
+    while (!feof(infile)) {
+      if (fgets(line, 132, infile) != NULL) {
+        if (line[0] != '#') {
+          int nread = sscanf(line, "%lf %lf %lf", &imin[nvals], &imax[nvals],
+                             &value[nvals]);
+          if (nread == 3) nvals++;
+        }
+      }
+    }
+    fclose(infile);
+  }
+
+  /* Message tags increment with across rank logs. */
   int tag = 1;
   for (int k = 0; k < nr_logs; k++) {
 
     /* Set size for this messages. */
     int logsize = size;
     if (random) {
-      if (uniform) {
+      if (cdf) {
+        /* CDF randoms. */
+        double rand = drand48();
+    
+        /* Binary search for containing bin for this rand. */
+        unsigned int lower = 0;
+        unsigned int upper = nvals;
+        unsigned int middle = 0;
+        while (lower < upper) {
+          middle = (upper + lower) / 2;
+          if (rand > value[middle])
+            lower = middle + 1;
+          else
+            upper = middle;
+        }
+        logsize = 0.5 * (imax[middle] + imin[middle]);
+
+      } else if (uniform) {
+        /* Uniform randoms in the range 0 to 1 */
         logsize = (drand48() * (double)size) + 1;
       } else {
-        // Gaussian so no maximum, assume size is 2.5 sigma.
+        // Gaussian randoms so no maximum, assume size is 2.5 sigma.
         logsize = (gauss_rand_upper() * (double)size * 0.25) + 1;
       }
     }
diff --git a/mpiuse.h b/mpiuse.h
index feae1e6f7999a30fb79c9aa4b1ed9844a59a1fce..f621698be1f8cc8cf14a029e55654ce08dfcfacf 100644
--- a/mpiuse.h
+++ b/mpiuse.h
@@ -102,6 +102,6 @@ int mpiuse_nr_ranks(void);
 void mpiuse_dump_logs(int nranks, const char *logfile);
 
 void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random,
-                         long int seed, int uniform);
+                         long int seed, int uniform, const char *cdf);
 
 #endif /* SWIFT_MPIUSE_H */
diff --git a/swiftmpifakestepsim.c b/swiftmpifakestepsim.c
index 7d991b752bbc1fb292414f65bf5e3bfd116d1d24..80b9e5e3ef046e67e41fcdf11cc52b3ed2c74d5c 100644
--- a/swiftmpifakestepsim.c
+++ b/swiftmpifakestepsim.c
@@ -38,8 +38,8 @@ static int verbose = 0;
 /* Set a data pattern and check we get this back, slow... */
 static int datacheck = 0;
 
-/* Fixed seed for pseudorandoms. */
-static long int seed = 1987654321;
+/* Default seed for pseudorandoms. */
+static long int default_seed = 1987654321;
 
 /* The local queues. */
 static struct mpiuse_log_entry **volatile reqs_queue;
@@ -339,11 +339,13 @@ static void pick_logs(void) {
  * @brief usage help.
  */
 static void usage(char *argv[]) {
-  fprintf(stderr, "Usage: %s [-vf] nr_messages logfile.dat\n",
+  fprintf(stderr, "Usage: %s [-vfgcx] nr_messages logfile.dat\n",
           argv[0]);
-  fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes), "
-          "-r uniform random from 1 to size, "
-          "-r -g half gaussian random from 1 with 2.5 sigma size.\n");
+  fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes/scale), \n"
+          "\t[-r uniform random from 1 to size, | \n"
+          "\t-r -g half gaussian random from 1 with 2.5 sigma size., | \n"
+          "\t-r -c <file> use cdf from file, size is a scale factor.,] \n"
+          "\t-x random seed\n");
   fflush(stderr);
 }
 
@@ -368,28 +370,36 @@ int main(int argc, char *argv[]) {
 
   /* Handle the command-line, we expect the number of messages to exchange per
    * rank an output log and some options, the interesting ones are a size and
-   * whether to use a random selection (with a fixed seed). */
+   * whether to use a random selections of various kinds. */
   int size = 1024;
   int random = 0;
   int uniform = 1;
+  char *cdf = NULL;
   int opt;
-  while ((opt = getopt(argc, argv, "vds:rg")) != -1) {
+  unsigned int seed = default_seed;
+  while ((opt = getopt(argc, argv, "vds:rgx:c:")) != -1) {
     switch (opt) {
       case 'd':
         datacheck = 1;
         break;
+      case 'c':
+        cdf = optarg;
+        break;
+      case 'g':
+        uniform = 0;
+        break;
       case 's':
         size = atoi(optarg);
         break;
       case 'r':
         random = 1;
         break;
-      case 'g':
-        uniform = 0;
-        break;
       case 'v':
         verbose = 1;
         break;
+      case 'x':
+        seed = atol(optarg);
+        break;
       default:
         if (myrank == 0) usage(argv);
         return 1;
@@ -408,7 +418,12 @@ int main(int argc, char *argv[]) {
   /* Generate the fake logs for the exchanges. */
   if (myrank == 0) {
     if (random) {
-      if (uniform) {
+      if (cdf != NULL) {
+        message("Generating %d fake logs for %d ranks with randoms"
+                " based on cdf %s scaled by factor %d", nr_logs, nr_nodes,
+                cdf,size);
+
+      } else if (uniform) {
         message("Generating %d fake logs for %d ranks with random distribution"
                 " using size %d", nr_logs, nr_nodes, size);
       } else {
@@ -420,7 +435,7 @@ int main(int argc, char *argv[]) {
               nr_logs, nr_nodes, size);
     }
   }
-  mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform);
+  mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform, cdf);
   int nranks = mpiuse_nr_ranks();
 
   /* Each rank requires its own queue, so extract them. */