diff --git a/examples/analyse_dump_cells.py b/examples/analyse_dump_cells.py
new file mode 100755
index 0000000000000000000000000000000000000000..3140e799566c75fe494a75895db0f4a8dcff4e57
--- /dev/null
+++ b/examples/analyse_dump_cells.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+"""
+Usage:
+    analysedumpcells.py nx ny nx cell<1>.dat cell<2>.dat ...
+
+Analyses a number of output files created by calls to the dumpCells() debug
+function (presumably called in engine_step()) to output a list of active
+top-level cells, identifying those that are on the edges of the volumes being
+processed on by various nodes. The point is that these should be minimised to
+reduce the MPI communications.
+
+The nx, ny and nz arguments are the number of cells in the complete space,
+we need these so that we can wrap around the edge of space correctly.
+
+This file is part of SWIFT.
+Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+import pylab as pl
+import numpy as np
+import sys
+import pandas
+
+xcol = 0
+ycol = 1
+zcol = 2
+xwcol = 3
+ywcol = 4
+zwcol = 5
+localcol = 18
+supercol = 15
+activecol = 16
+
+#  Command-line arguments.
+if len(sys.argv) < 5:
+    print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..."
+    sys.exit(1)
+nx = int(sys.argv[1])
+ny = int(sys.argv[2])
+nz = int(sys.argv[3])
+
+print "# x y z onedge"
+allactives = []
+onedge = 0
+tcount = 0
+for i in range(4, len(sys.argv)):
+
+    #  Read the file.
+    data = pl.loadtxt(sys.argv[i])
+    #print data
+
+    #  Select cells that are on the current rank and are super cells.
+    rdata = data[data[:,localcol] == 1]
+    tdata = rdata[rdata[:,supercol] == 1]
+
+    #  Separation of the cells is in data.
+    xwidth = tdata[0,xwcol]
+    ywidth = tdata[0,ywcol]
+    zwidth = tdata[0,zwcol]
+
+    #  Fill space nx, ny,n nz with all toplevel cells and flag their active
+    #  state.
+    space = np.zeros((nx,ny,nz))
+    actives = []
+    for line in tdata:
+        ix = int(np.rint(line[xcol] / xwidth))
+        iy = int(np.rint(line[ycol] / ywidth))
+        iz = int(np.rint(line[zcol] / zwidth))
+        active = int(line[activecol])
+        space[ix,iy,iz] = 1 + active
+        tcount = tcount + 1
+        if active == 1:
+            actives.append([ix, iy, iz, line])
+    
+    #  Report all active cells and flag any without 26 neighbours. These are
+    #  on the edge of the partition volume and will have foreign neighbour
+    #  cells.
+    for active in actives:
+        count = 0
+        for ii in [-1, 0, 1]:
+            i = active[0] + ii
+            if i < 0:
+                i = i + nx
+            elif i >= nx:
+                i = i - nx
+
+            for jj in [-1, 0, 1]:
+                j = active[1] + jj
+                if j < 0:
+                    j = j + ny
+                elif j >= ny:
+                    j = j - ny
+
+                for kk in [-1, 0, 1]:
+                    k = active[2] + kk
+                    if k < 0:
+                        k = k + nz
+                    elif k >= nz:
+                        k = k - nz
+                    if space[i, j, k] > 0:
+                        count = count + 1
+        if count < 27:
+            onedge = onedge + 1
+            print active[3][0], active[3][1], active[3][2], 1
+        else:
+            print active[3][0], active[3][1], active[3][2], 0
+
+    allactives.extend(actives)
+
+print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge
+
+sys.exit(0)
+
diff --git a/examples/process_cells b/examples/process_cells
index a9c72e4a092b41e7f49036937c6f8c53fc0339a8..eead4572387f2732cf0b86265f14d20039f73ae1 100755
--- a/examples/process_cells
+++ b/examples/process_cells
@@ -1,20 +1,27 @@
 #!/bin/bash
 #
 # Usage:
-#  process_cells nprocess
+#  process_cells nx ny nz nprocess
 #
 # Description:
-#  Process all the cell dumps in the current directory
+#  Process all the cell dumps in the current directory.
+
+#  Outputs file per rank with the active cells identified and marked as to
+#  whether they are near an edge or not. Note requires the numbers of cells
+#  per dimension of the space.
 #
-#  Outputs file per rank with the active cells identified and marked
-#  as to whether they are near an edge or not.
+#  Also outputs a graphic showing the fraction of active cells on edges
+#  for each step.
 
 #  Handle command-line
-if test "$1" = ""; then
-    echo "Usage: $0 nprocess"
+if test "$4" = ""; then
+    echo "Usage: $0 nx ny nz nprocess"
     exit 1
 fi
-NPROCS=$1
+NX=$1
+NY=$2
+NZ=$3
+NPROCS=$4
 
 #  Locate script.
 SCRIPTHOME=$(dirname "$0")
@@ -33,19 +40,25 @@ echo "Number of ranks = $ranks"
 #  Now construct a list of files ordered by rank, not step.
 files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4)
 
+#  Need number of steps.
+nfiles=$(echo $files| wc -w)
+echo "Number of files = $nfiles"
+steps=$(( $nfiles / $ranks + 1 ))
+echo "Number of steps = $steps"
+
 #  And process them,
 echo "Processing cell dumps files..."
-echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper 20 20 20 \$0 \$1 \$2 \$3"
+#echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3"
 
 #  Create summary.
 grep "top cells" step*-active-cells.dat | sort -h > active_cells.log
 
 #  And plot of active cells to edge cells.
-stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=-100 ymax=2200 grid=1 \
+stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \
        legend=false xpix=600 ypix=500 xlabel="Edge cells/Active cells" ylabel="Step" \
-       layer1=mark x1="col9/1.0/col6" y1="index*7" size1=3 shading1=aux auxmap=rainbow \
+       layer1=mark x1="col9/1.0/col6" y1="index" size1=3 shading1=aux auxmap=rainbow \
        aux=col6 auxfunc=log auxlabel="Active cells" layer2=histogram x2="col9/1.0/col6" \
-       color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps weight2=30 thick2=1 \
+       color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps thick2=1 \
        out=active_cells.png
 
 exit
diff --git a/examples/process_cells_helper b/examples/process_cells_helper
index 7862e7def0cc2770566f935c90851881413404f7..a96bdba0de56e04c28ed6d8675c705241ce241d9 100755
--- a/examples/process_cells_helper
+++ b/examples/process_cells_helper
@@ -6,6 +6,7 @@
 SCRIPTHOME=$(dirname "$0")
 
 step=$(echo $4|sed 's,cells_\(.*\)_\(.*\).dat,\2,')
+echo "${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat"
 ${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat
 
 exit
diff --git a/src/debug.c b/src/debug.c
index 4e99108f509fa4c327e0d2a801920cd8fb374bc2..4c521397b12c555923f8cea8a4f0cdb4c4551b97 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -276,11 +276,13 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
  * only.
  */
 static void dumpCells_map(struct cell *c, void *data) {
-  uintptr_t *ldata = (uintptr_t *)data;
+  size_t *ldata = (size_t *)data;
   FILE *file = (FILE *)ldata[0];
   struct engine *e = (struct engine *)ldata[1];
   float ntasks = c->nr_tasks;
-  int pactive = (int)ldata[2];
+  int active = (int)ldata[2];
+  int mpiactive = (int)ldata[3];
+  int pactive = (int)ldata[4];
 
 #if SWIFT_DEBUG_CHECKS
   /* The c->nr_tasks field does not include all the tasks. So let's check this
@@ -301,30 +303,44 @@ static void dumpCells_map(struct cell *c, void *data) {
   }
 #endif
 
-  /* Only locally active cells are dumped. */
+  /* Only cells with particles are dumped. */
   if (c->count > 0 || c->gcount > 0 || c->scount > 0) {
 
-    /* If requested we work out how many particles are active in this cell. */
-    int pactcount = 0;
-    if (pactive) {
-      const struct part *parts = c->parts;
-      for (int k = 0; k < c->count; k++)
-        if (part_is_active(&parts[k], e)) pactcount++;
-      struct gpart *gparts = c->gparts;
-      for (int k = 0; k < c->gcount; k++)
-        if (gpart_is_active(&gparts[k], e)) pactcount++;
-      struct spart *sparts = c->sparts;
-      for (int k = 0; k < c->scount; k++)
-        if (spart_is_active(&sparts[k], e)) pactcount++;
-    }
+/* In MPI mode we may only output cells with foreign partners.
+ * These define the edges of the partitions. */
 #if WITH_MPI
-    int sendto = (c->send_xv != NULL);
+    if (mpiactive)
+      mpiactive = (c->send_xv != NULL);
+    else
+      mpiactive = 1;
 #else
-    int sendto = 0;
+    mpiactive = 1;
 #endif
 
-    /* Local cells that are active and are super cells and have MPI tasks. */
-    if (c->nodeID == e->nodeID && cell_is_active(c, e) && (c->super == c) && sendto)
+    /* Active cells, otherwise all. */
+    if (active)
+      active = cell_is_active(c, e);
+    else
+      active = 1;
+
+    /* So output local super cells that are active and have MPI tasks as
+     * requested. */
+    if (c->nodeID == e->nodeID && (c->super == c) && active && mpiactive) {
+
+      /* If requested we work out how many particles are active in this cell. */
+      int pactcount = 0;
+      if (pactive) {
+        const struct part *parts = c->parts;
+        for (int k = 0; k < c->count; k++)
+          if (part_is_active(&parts[k], e)) pactcount++;
+        struct gpart *gparts = c->gparts;
+        for (int k = 0; k < c->gcount; k++)
+          if (gpart_is_active(&gparts[k], e)) pactcount++;
+        struct spart *sparts = c->sparts;
+        for (int k = 0; k < c->scount; k++)
+          if (spart_is_active(&sparts[k], e)) pactcount++;
+      }
+
       fprintf(file,
               "  %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d "
               "%6.1f %20lld %6d %6d %6d %6d %6d\n",
@@ -333,7 +349,7 @@ static void dumpCells_map(struct cell *c, void *data) {
               c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min),
               (c->super == c), cell_is_active(c, e), c->nodeID,
               c->nodeID == e->nodeID);
-
+    }
   }
 }
 
@@ -344,19 +360,21 @@ static void dumpCells_map(struct cell *c, void *data) {
  *
  * @param prefix base output filename, result is written to
  *               %prefix%_%rank%_%step%.dat
+ * @param active just output active cells.
+ * @param mpiactive just output MPI active cells, i.e. those with foreign cells.
  * @param pactive also output a count of active particles.
  * @param s the space holding the cells to dump.
  * @param rank node ID of MPI rank, or 0 if not relevant.
  * @param step the current engine step, or some unique integer.
  */
-void dumpCells(const char *prefix, int pactive, struct space *s, int rank,
-               int step) {
+void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
+               struct space *s, int rank, int step) {
 
   FILE *file = NULL;
 
   /* Name of output file. */
   char fname[200];
-  sprintf(fname, "%s_%03d.dat", prefix, step);
+  sprintf(fname, "%s_%03d_%03d.dat", prefix, rank, step);
   file = fopen(fname, "w");
 
   /* Header. */
@@ -364,13 +382,15 @@ void dumpCells(const char *prefix, int pactive, struct space *s, int rank,
           "# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
           "%20s %6s %6s %6s %6s %6s\n",
           "x", "y", "z", "xw", "yw", "zw", "step", "count", "gcount", "scount",
-          "actcount", "depth", "tasks", "ti_end_min", "timebin",
-          "issuper", "active", "rank", "local");
+          "actcount", "depth", "tasks", "ti_end_min", "timebin", "issuper",
+          "active", "rank", "local");
 
-  uintptr_t data[3];
+  size_t data[5];
   data[0] = (size_t)file;
   data[1] = (size_t)s->e;
-  data[2] = (size_t)pactive;
+  data[2] = (size_t)active;
+  data[3] = (size_t)mpiactive;
+  data[4] = (size_t)pactive;
   space_map_cells_pre(s, 1, dumpCells_map, &data);
   fclose(file);
 }
diff --git a/src/debug.h b/src/debug.h
index 5e5fb4819e990349b57db7fa6c0bfe9dacba9d21..2cff37fa6f6f03185dc769bb770fe3a98a424ce1 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -36,8 +36,8 @@ void printgParticle_single(struct gpart *gp);
 
 int checkSpacehmax(struct space *s);
 int checkCellhdxmax(const struct cell *c, int *depth);
-void dumpCells(const char *prefix, int pactive, struct space *s, int rank,
-               int step);
+void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
+               struct space *s, int rank, int step);
 
 #ifdef HAVE_METIS
 #include "metis.h"
diff --git a/src/engine.c b/src/engine.c
index 869432417d848e68290a44ba4e63cc9b3dc75b9c..fdcf80abd69450ad2eabbcc05d90d248ca5efa7f 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -3750,6 +3750,9 @@ void engine_step(struct engine *e) {
   /* Print the number of active tasks ? */
   if (e->verbose) engine_print_task_counts(e);
 
+/* Dump local cells and active particle counts. */
+/* dumpCells("cells", 0, 0, 1, e->s, e->nodeID, e->step); */
+
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that we have the correct total mass in the top-level multipoles */
   size_t num_gpart_mpole = 0;
diff --git a/src/partition.c b/src/partition.c
index 3b916c8a906ea7bfe1ba546c5e3e7f65d27aa76b..567fcb38c030a0dec1f052329111b0cc6d82d8f7 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -528,7 +528,7 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
     if (t->cost == 0) continue;
 
     /* Get the task weight based on costs. */
-    double w = (double) t->cost;
+    double w = (double)t->cost;
 
     /* Get the top-level cells involved. */
     struct cell *ci, *cj;
@@ -590,15 +590,17 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
            * overflow int, so take care. */
           int dti = num_time_bins - get_time_bin(ci->ti_end_min);
           int dtj = num_time_bins - get_time_bin(cj->ti_end_min);
-          double dt = (double)(1<<dti) + (double)(1<<dtj);
+          double dt = (double)(1 << dti) + (double)(1 << dtj);
 
           /* ci */
           int kk;
-          for (kk = 26 * cid; inds[kk] != cjd; kk++);
+          for (kk = 26 * cid; inds[kk] != cjd; kk++)
+            ;
           weights_e[kk] += dt;
 
           /* cj */
-          for (kk = 26 * cjd; inds[kk] != cid; kk++);
+          for (kk = 26 * cjd; inds[kk] != cid; kk++)
+            ;
           weights_e[kk] += dt;
         } else {
 
@@ -606,11 +608,13 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
 
           /* ci */
           int kk;
-          for (kk = 26 * cid; inds[kk] != cjd; kk++);
+          for (kk = 26 * cid; inds[kk] != cjd; kk++)
+            ;
           weights_e[kk] += w;
 
           /* cj */
-          for (kk = 26 * cjd; inds[kk] != cid; kk++);
+          for (kk = 26 * cjd; inds[kk] != cid; kk++)
+            ;
           weights_e[kk] += w;
         }
       }
@@ -624,14 +628,14 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
   int res;
   if (bothweights) {
     if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
-                          nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD))
-        != MPI_SUCCESS)
+                          nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) !=
+        MPI_SUCCESS)
       mpi_error(res, "Failed to allreduce vertex weights.");
   }
 
   if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
-                        26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD))
-      != MPI_SUCCESS)
+                        26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0,
+                        MPI_COMM_WORLD)) != MPI_SUCCESS)
     mpi_error(res, "Failed to allreduce edge weights.");
 
   /* Allocate cell list for the partition. */
@@ -642,8 +646,8 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
   if (nodeID == 0) {
 
     /* We need to rescale the weights into the range of an integer for METIS
-     * really range of idx_t). Also we would like the range of vertex and
-     * edges weights to be simila r so they balance. */
+     * (really range of idx_t). Also we would like the range of vertex and
+     * edges weights to be similar so they balance. */
     double wminv = 0.0;
     double wmaxv = 0.0;
     if (bothweights) {
@@ -680,7 +684,6 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
       for (int k = 0; k < nr_cells; k++) {
         weights_v[k] = (weights_v[k] - wminv) * wscalev + 1.0;
       }
-
     }
 
     /* Scale to the METIS range. */
@@ -905,8 +908,8 @@ void partition_initial_partition(struct partition *initial_partition,
       accumulate_counts(s, weights);
 
       /* Get all the counts from all the nodes. */
-      if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE,
-                        MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+      if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM,
+                        MPI_COMM_WORLD) != MPI_SUCCESS)
         error("Failed to allreduce particle cell weights.");
     }
 
@@ -1040,41 +1043,40 @@ void partition_init(struct partition *partition,
   parser_get_opt_param_string(params, "DomainDecomposition:repartition_type",
                               part_type, default_repart);
 
-
   if (strcmp("none/none", part_type) == 0) {
-      repartition->type = REPART_NONE;
+    repartition->type = REPART_NONE;
 
 #ifdef HAVE_METIS
   } else if (strcmp("costs/costs", part_type) == 0) {
-      repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS;
+    repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS;
 
   } else if (strcmp("counts/none", part_type) == 0) {
-      repartition->type = REPART_METIS_VERTEX_COUNTS;
+    repartition->type = REPART_METIS_VERTEX_COUNTS;
 
   } else if (strcmp("none/costs", part_type) == 0) {
-      repartition->type = REPART_METIS_EDGE_COSTS;
+    repartition->type = REPART_METIS_EDGE_COSTS;
 
   } else if (strcmp("counts/costs", part_type) == 0) {
-      repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS;
+    repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS;
 
   } else if (strcmp("costs/time", part_type) == 0) {
-      repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS;
+    repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS;
 
   } else if (strcmp("counts/time", part_type) == 0) {
-      repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS;
+    repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS;
 
   } else if (strcmp("none/time", part_type) == 0) {
-      repartition->type = REPART_METIS_EDGE_TIMEBINS;
+    repartition->type = REPART_METIS_EDGE_TIMEBINS;
   } else {
-      message("Invalid choice of re-partition type '%s'.", part_type);
-      error(
-            "Permitted values are: 'none/none', 'costs/costs',"
-            "'counts/none', 'none/costs', 'counts/costs', "
-            "'costs/time', 'counts/time' or 'none/time'");
+    message("Invalid choice of re-partition type '%s'.", part_type);
+    error(
+        "Permitted values are: 'none/none', 'costs/costs',"
+        "'counts/none', 'none/costs', 'counts/costs', "
+        "'costs/time', 'counts/time' or 'none/time'");
 #else
   } else {
-      message("Invalid choice of re-partition type '%s'.", part_type);
-      error("Permitted values are: 'none/none' when compiled without METIS.");
+    message("Invalid choice of re-partition type '%s'.", part_type);
+    error("Permitted values are: 'none/none' when compiled without METIS.");
 #endif
   }