diff --git a/examples/main.c b/examples/main.c
index 32264f185f2d7cd7efb010f667ce74e486114c9d..a28faa628b856b3f1917331aecc6b95835c628b3 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -79,10 +79,25 @@ int main(int argc, char *argv[]) {
   char dumpfile[30];
   float dt_max = 0.0f, dt_min = 0.0f;
   ticks tic;
-  int nr_nodes = 1, myrank = 0, grid[3] = {1, 1, 1};
+  int nr_nodes = 1, myrank = 0;
   FILE *file_thread;
   int with_outputs = 1;
 
+#ifdef WITH_MPI
+  struct partition initial_partition;
+  enum repartition_type reparttype = REPART_NONE;
+
+  initial_partition.type = INITPART_GRID;
+  initial_partition.grid[0] = 1;
+  initial_partition.grid[1] = 1;
+  initial_partition.grid[2] = 1;
+#ifdef HAVE_METIS
+  /* Defaults make use of METIS. */
+  reparttype = REPART_METIS_BOTH;
+  initial_partition.type = INITPART_METIS_NOWEIGHT;
+#endif
+#endif
+
 /* Choke on FP-exceptions. */
 // feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW );
 
@@ -107,9 +122,11 @@ int main(int argc, char *argv[]) {
   fflush(stdout);
 
   /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
-  factor(nr_nodes, &grid[0], &grid[1]);
-  factor(nr_nodes / grid[1], &grid[0], &grid[2]);
-  factor(grid[0] * grid[1], &grid[1], &grid[0]);
+  factor(nr_nodes, &initial_partition.grid[0], &initial_partition.grid[1]);
+  factor(nr_nodes / initial_partition.grid[1], &initial_partition.grid[0],
+         &initial_partition.grid[2]);
+  factor(initial_partition.grid[0] * initial_partition.grid[1],
+         &initial_partition.grid[1], &initial_partition.grid[0]);
 #endif
 
   /* Greeting message */
@@ -137,7 +154,7 @@ int main(int argc, char *argv[]) {
   bzero(&s, sizeof(struct space));
 
   /* Parse the options */
-  while ((c = getopt(argc, argv, "a:c:d:e:f:g:m:oq:s:t:w:y:z:")) != -1)
+  while ((c = getopt(argc, argv, "a:c:d:e:f:m:oP:q:R:s:t:w:y:z:")) != -1)
     switch (c) {
       case 'a':
         if (sscanf(optarg, "%lf", &scaling) != 1)
@@ -166,10 +183,6 @@ int main(int argc, char *argv[]) {
       case 'f':
         if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name.");
         break;
-      case 'g':
-        if (sscanf(optarg, "%i %i %i", &grid[0], &grid[1], &grid[2]) != 3)
-          error("Error parsing grid.");
-        break;
       case 'm':
         if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max.");
         if (myrank == 0) message("maximum h set to %e.", h_max);
@@ -178,10 +191,63 @@ int main(int argc, char *argv[]) {
       case 'o':
         with_outputs = 0;
         break;
+      case 'P':
+        /* Partition type is one of "g", "m", "w", or "v"; "g" can be
+         * followed by three numbers defining the grid. */
+#ifdef WITH_MPI
+        switch (optarg[0]) {
+          case 'g':
+            initial_partition.type = INITPART_GRID;
+            if (strlen(optarg) > 2) {
+              if (sscanf(optarg, "g %i %i %i", &initial_partition.grid[0],
+                         &initial_partition.grid[1],
+                         &initial_partition.grid[2]) != 3)
+                error("Error parsing grid.");
+            }
+            break;
+#ifdef HAVE_METIS
+          case 'm':
+            initial_partition.type = INITPART_METIS_NOWEIGHT;
+            break;
+          case 'w':
+            initial_partition.type = INITPART_METIS_WEIGHT;
+            break;
+#endif
+          case 'v':
+            initial_partition.type = INITPART_VECTORIZE;
+            break;
+        }
+#endif
+        break;
       case 'q':
         if (sscanf(optarg, "%d", &nr_queues) != 1)
           error("Error parsing number of queues.");
         break;
+      case 'R':
+        /* Repartition type "n", "b", "v", "e" or "x". 
+         * Note only none is available without METIS. */
+#ifdef WITH_MPI
+        switch (optarg[0]) {
+          case 'n':
+            reparttype = REPART_NONE;
+            break;
+#ifdef HAVE_METIS
+          case 'b':
+            reparttype = REPART_METIS_BOTH;
+            break;
+          case 'v':
+            reparttype = REPART_METIS_VERTEX;
+            break;
+          case 'e':
+            reparttype = REPART_METIS_EDGE;
+            break;
+          case 'x':
+            reparttype = REPART_METIS_VERTEX_EDGE;
+            break;
+#endif
+        }
+#endif
+        break;
       case 's':
         if (sscanf(optarg, "%lf %lf %lf", &shift[0], &shift[1], &shift[2]) != 3)
           error("Error parsing shift.");
@@ -212,10 +278,15 @@ int main(int argc, char *argv[]) {
         break;
     }
 
-#if defined(WITH_MPI)
+#ifdef WITH_MPI
   if (myrank == 0) {
     message("Running with %i thread(s) per node.", nr_threads);
-    message("grid set to [ %i %i %i ].", grid[0], grid[1], grid[2]);
+    message("Using initial partition %s",
+            initial_partition_name[initial_partition.type]);
+    if (initial_partition.type == INITPART_GRID)
+      message("grid set to [ %i %i %i ].", initial_partition.grid[0],
+              initial_partition.grid[1], initial_partition.grid[2]);
+    message("Using %s repartitioning", repartition_name[reparttype]);
 
     if (nr_nodes == 1) {
       message("WARNING: you are running with one MPI rank.");
@@ -349,7 +420,7 @@ int main(int argc, char *argv[]) {
 
 #ifdef WITH_MPI
   /* Split the space. */
-  engine_split(&e, grid);
+  engine_split(&e, &initial_partition);
   engine_redistribute(&e);
 #endif
 
@@ -400,8 +471,8 @@ int main(int argc, char *argv[]) {
   for (j = 0; e.time < time_end; j++) {
 
 /* Repartition the space amongst the nodes? */
-#if defined(WITH_MPI) && defined(HAVE_METIS)
-    if (j % 100 == 2) e.forcerepart = 1;
+#ifdef WITH_MPI
+    if (j % 100 == 2) e.forcerepart = reparttype;
 #endif
 
     timers_reset(timers_mask_all);
diff --git a/src/Makefile.am b/src/Makefile.am
index 8cb27ef60ffaf884b1c0d7626b5d17d6a7a2a7fc..f4b293925bfcfdf989889db768d60e3de778c985 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,13 +35,13 @@ endif
 # List required headers
 include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
-    common_io.h single_io.h multipole.h map.h tools.h
+    common_io.h single_io.h multipole.h map.h tools.h partition.h
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
     units.c common_io.c single_io.c multipole.c version.c map.c \
-    kernel.c tools.c part.c
+    kernel.c tools.c part.c partition.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
diff --git a/src/engine.c b/src/engine.c
index 9c89080ef8fa5bf8d0b9c3c6c18f29f2a0d7c084..272d5c0c9ff07b98befed3c68810d4c909e78c57 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1,6 +1,7 @@
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -33,10 +34,6 @@
 /* MPI headers. */
 #ifdef WITH_MPI
 #include <mpi.h>
-/* METIS headers only used when MPI is also available. */
-#ifdef HAVE_METIS
-#include <metis.h>
-#endif
 #endif
 
 #ifdef HAVE_LIBNUMA
@@ -55,6 +52,7 @@
 #include "hydro.h"
 #include "minmax.h"
 #include "part.h"
+#include "partition.h"
 #include "timers.h"
 
 const char *engine_policy_names[12] = {
@@ -287,6 +285,7 @@ void engine_redistribute(struct engine *e) {
 #endif
 }
 
+
 /**
  * @brief Repartition the cells amongst the nodes.
  *
@@ -297,343 +296,17 @@ void engine_repartition(struct engine *e) {
 
 #if defined(WITH_MPI) && defined(HAVE_METIS)
 
-  int i, j, k, l, cid, cjd, ii, jj, kk, res;
-  idx_t *inds, *nodeIDs;
-  idx_t *weights_v = NULL, *weights_e = NULL;
-  struct space *s = e->s;
-  int nr_cells = s->nr_cells, my_cells = 0;
-  struct cell *cells = s->cells;
-  int ind[3], *cdim = s->cdim;
-  struct task *t, *tasks = e->sched.tasks;
-  struct cell *ci, *cj;
-  int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
-  float wscale = 1e-3, vscale = 1e-3, wscale_buff;
-  idx_t wtot = 0;
-  idx_t wmax = 1e9 / e->nr_nodes;
-  idx_t wmin;
-
   /* Clear the repartition flag. */
-  e->forcerepart = 0;
+  enum repartition_type reparttype = e->forcerepart;
+  e->forcerepart = REPART_NONE;
 
   /* Nothing to do if only using a single node. Also avoids METIS
    * bug that doesn't handle this case well. */
-  if (nr_nodes == 1) return;
-
-  /* Allocate the inds and weights. */
-  if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
-      (weights_v = (idx_t *)malloc(sizeof(idx_t) *nr_cells)) == NULL ||
-      (weights_e = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
-      (nodeIDs = (idx_t *)malloc(sizeof(idx_t) * nr_cells)) == NULL)
-    error("Failed to allocate inds and weights arrays.");
-
-  /* Fill the inds array. */
-  for (cid = 0; cid < nr_cells; cid++) {
-    ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5;
-    ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5;
-    ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5;
-    l = 0;
-    for (i = -1; i <= 1; i++) {
-      ii = ind[0] + i;
-      if (ii < 0)
-        ii += cdim[0];
-      else if (ii >= cdim[0])
-        ii -= cdim[0];
-      for (j = -1; j <= 1; j++) {
-        jj = ind[1] + j;
-        if (jj < 0)
-          jj += cdim[1];
-        else if (jj >= cdim[1])
-          jj -= cdim[1];
-        for (k = -1; k <= 1; k++) {
-          kk = ind[2] + k;
-          if (kk < 0)
-            kk += cdim[2];
-          else if (kk >= cdim[2])
-            kk -= cdim[2];
-          if (i || j || k) {
-            inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk);
-            l += 1;
-          }
-        }
-      }
-    }
-  }
-
-  /* Init the weights arrays. */
-  bzero(weights_e, sizeof(idx_t) * 26 * nr_cells);
-  bzero(weights_v, sizeof(idx_t) * nr_cells);
-
-  /* Loop over the tasks... */
-  for (j = 0; j < e->sched.nr_tasks; j++) {
-
-    /* Get a pointer to the kth task. */
-    t = &tasks[j];
-
-    /* Skip un-interesting tasks. */
-    if (t->type != task_type_self && t->type != task_type_pair &&
-        t->type != task_type_sub && t->type != task_type_ghost &&
-        t->type != task_type_drift && t->type != task_type_kick &&
-        t->type != task_type_init)
-      continue;
-
-    /* Get the task weight. */
-    idx_t w = (t->toc - t->tic) * wscale;
-    if (w < 0) error("Bad task weight (%" SCIDX ").", w);
-
-    /* Do we need to re-scale? */
-    wtot += w;
-    while (wtot > wmax) {
-      wscale /= 2;
-      wtot /= 2;
-      w /= 2;
-      for (k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5;
-      for (k = 0; k < nr_cells; k++) weights_v[k] *= 0.5;
-    }
-
-    /* Get the top-level cells involved. */
-    for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
-      ;
-    if (t->cj != NULL)
-      for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
-        ;
-    else
-      cj = NULL;
-
-    /* Get the cell IDs. */
-    cid = ci - cells;
-
-    /* Different weights for different tasks. */
-    if (t->type == task_type_ghost || t->type == task_type_drift ||
-        t->type == task_type_kick) {
+  if (e->nr_nodes == 1) return;
 
-      /* Particle updates add only to vertex weight. */
-      weights_v[cid] += w;
-
-    }
-
-    /* Self interaction? */
-    else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
-             (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) {
-
-      /* Self interactions add only to vertex weight. */
-      weights_v[cid] += w;
-
-    }
-
-    /* Pair? */
-    else if (t->type == task_type_pair ||
-             (t->type == task_type_sub && cj != NULL)) {
-
-      /* In-cell pair? */
-      if (ci == cj) {
-
-        /* Add weight to vertex for ci. */
-        weights_v[cid] += w;
-
-      }
-
-      /* Distinct cells with local ci? */
-      else if (ci->nodeID == nodeID) {
-
-        /* Index of the jth cell. */
-        cjd = cj - cells;
-
-        /* Add half of weight to each cell. */
-        if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w;
-        if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
-
-        /* Add Weight to edge. */
-        for (k = 26 * cid; inds[k] != cjd; k++)
-          ;
-        weights_e[k] += w;
-        for (k = 26 * cjd; inds[k] != cid; k++)
-          ;
-        weights_e[k] += w;
-      }
-    }
-  }
-
-  /* Get the minimum scaling and re-scale if necessary. */
-  if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN,
-                           MPI_COMM_WORLD)) != MPI_SUCCESS) {
-    char buff[MPI_MAX_ERROR_STRING];
-    MPI_Error_string(res, buff, &i);
-    error("Failed to allreduce the weight scales (%s).", buff);
-  }
-  if (wscale_buff != wscale) {
-    float scale = wscale_buff / wscale;
-    for (k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale;
-    for (k = 0; k < nr_cells; k++) weights_v[k] *= scale;
-  }
-
-/* Merge the weights arrays across all nodes. */
-#if IDXTYPEWIDTH == 32
-  if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
-                        nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) !=
-      MPI_SUCCESS) {
-#else
-  if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
-                        nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
-                        MPI_COMM_WORLD)) != MPI_SUCCESS) {
-#endif
-    char buff[MPI_MAX_ERROR_STRING];
-    MPI_Error_string(res, buff, &i);
-    error("Failed to allreduce vertex weights (%s).", buff);
-  }
-#if IDXTYPEWIDTH == 32
-  if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
-                 26 * nr_cells, MPI_INT, MPI_SUM, 0,
-                 MPI_COMM_WORLD) != MPI_SUCCESS)
-#else
-  if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
-                 26 * nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
-                 MPI_COMM_WORLD) != MPI_SUCCESS)
-#endif
-    error("Failed to allreduce edge weights.");
-
-  /* As of here, only one node needs to compute the partition. */
-  if (nodeID == 0) {
-
-    /* Final rescale of all weights to avoid a large range. Large ranges have
-     * been seen to cause an incomplete graph. */
-    wmin = wmax;
-    wmax = 0.0;
-    for (k = 0; k < 26 * nr_cells; k++) {
-      wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
-      wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
-    }
-    if ((wmax - wmin) > engine_maxmetisweight) {
-      wscale = engine_maxmetisweight / (wmax - wmin);
-      for (k = 0; k < 26 * nr_cells; k++) {
-        weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
-      }
-      for (k = 0; k < nr_cells; k++) {
-        weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
-      }
-    }
-
-    /* Check that the edge weights are fully symmetric. */
-    /* for ( cid = 0 ; cid < nr_cells ; cid++ )
-        for ( k = 0 ; k < 26 ; k++ ) {
-            cjd = inds[ cid*26 + k ];
-            for ( j = 26*cjd ; inds[j] != cid ; j++ );
-            if ( weights_e[ cid*26+k ] != weights_e[ j ] )
-                error( "Unsymmetric edge weights detected (%i vs %i)." ,
-       weights_e[ cid*26+k ] , weights_e[ j ] );
-            } */
-    /* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0];
-    for ( k = 1 ; k < 26*nr_cells ; k++ ) {
-        w_tot += weights_e[k];
-        if ( weights_e[k] < w_min )
-            w_min = weights_e[k];
-        else if ( weights_e[k] > w_max )
-            w_max = weights_e[k];
-        }
-    message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
-    w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0];
-    for ( k = 1 ; k < nr_cells ; k++ ) {
-        w_tot += weights_v[k];
-        if ( weights_v[k] < w_min )
-            w_min = weights_v[k];
-        else if ( weights_v[k] > w_max )
-            w_max = weights_v[k];
-        }
-    message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
-    */
-
-    /* Make sure there are no zero weights. */
-    for (k = 0; k < 26 * nr_cells; k++)
-      if (weights_e[k] == 0) weights_e[k] = 1;
-    for (k = 0; k < nr_cells; k++)
-      if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
-
-    /* Allocate and fill the connection array. */
-    idx_t *offsets;
-    if ((offsets = (idx_t *)malloc(sizeof(idx_t) * (nr_cells + 1))) == NULL)
-      error("Failed to allocate offsets buffer.");
-    offsets[0] = 0;
-    for (k = 0; k < nr_cells; k++) offsets[k + 1] = offsets[k] + 26;
-
-    /* Set the METIS options. +1 to keep the GCC sanitizer happy. */
-    idx_t options[METIS_NOPTIONS + 1];
-    METIS_SetDefaultOptions(options);
-    options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
-    options[METIS_OPTION_NUMBERING] = 0;
-    options[METIS_OPTION_CONTIG] = 1;
-    options[METIS_OPTION_NCUTS] = 10;
-    options[METIS_OPTION_NITER] = 20;
-    // options[ METIS_OPTION_UFACTOR ] = 1;
-
-    /* Set the initial partition, although this is probably ignored. */
-    for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
-
-    /* Call METIS. */
-    idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
-    idx_t objval;
-
-    /* Dump graph in METIS format */
-    /*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
-                   weights_v, NULL, weights_e);*/
-
-    if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
-                                 NULL, weights_e, &idx_nr_nodes, NULL, NULL,
-                                 options, &objval, nodeIDs) != METIS_OK)
-      error("Call to METIS_PartGraphRecursive failed.");
-
-    /* Dump the 3d array of cell IDs. */
-    /* printf( "engine_repartition: nodeIDs = reshape( [" );
-    for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
-        printf( "%i " , (int)nodeIDs[ i ] );
-    printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
-
-    /* Check that the nodeIDs are ok. */
-    for (k = 0; k < nr_cells; k++)
-      if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
-        error("Got bad nodeID %" PRIDX " for cell %i.", nodeIDs[k], k);
-
-    /* Check that the partition is complete and all nodes have some work. */
-    int present[nr_nodes];
-    int failed = 0;
-    for (i = 0; i < nr_nodes; i++) present[i] = 0;
-    for (i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
-    for (i = 0; i < nr_nodes; i++) {
-      if (!present[i]) {
-        failed = 1;
-        message("Node %d is not present after repartition", i);
-      }
-    }
-
-    /* If partition failed continue with the current one, but make this
-     * clear. */
-    if (failed) {
-      message(
-          "WARNING: METIS repartition has failed, continuing with "
-          "the current partition, load balance will not be optimal");
-      for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
-    }
-  }
-
-/* Broadcast the result of the partition. */
-#if IDXTYPEWIDTH == 32
-  if (MPI_Bcast(nodeIDs, nr_cells, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to bcast the node IDs.");
-#else
-  if (MPI_Bcast(nodeIDs, nr_cells, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD) !=
-      MPI_SUCCESS)
-    error("Failed to bcast the node IDs.");
-#endif
-
-  /* Set the cell nodeIDs and clear any non-local parts. */
-  for (k = 0; k < nr_cells; k++) {
-    cells[k].nodeID = nodeIDs[k];
-    if (nodeIDs[k] == nodeID) my_cells += 1;
-  }
-
-  /* Clean up. */
-  free(inds);
-  free(weights_v);
-  free(weights_e);
-  free(nodeIDs);
+  /* Do the repartitioning. */
+  partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s,
+                        e->sched.tasks, e->sched.nr_tasks);
 
   /* Now comes the tricky part: Exchange particles between all nodes.
      This is done in two steps, first allreducing a matrix of
@@ -1894,7 +1567,7 @@ void engine_step(struct engine *e) {
   // message("\nACCELERATION AND KICK\n");
 
   /* Re-distribute the particles amongst the nodes? */
-  if (e->forcerepart) engine_repartition(e);
+  if (e->forcerepart != REPART_NONE) engine_repartition(e);
 
   /* Prepare the space. */
   engine_prepare(e);
@@ -2054,34 +1727,19 @@ void engine_makeproxies(struct engine *e) {
 }
 
 /**
- * @brief Split the underlying space according to the given grid.
+ * @brief Split the underlying space into regions and assign to separate nodes.
  *
  * @param e The #engine.
- * @param grid The grid.
+ * @param initial_partition structure defining the cell partition technique
  */
 
-void engine_split(struct engine *e, int *grid) {
+void engine_split(struct engine *e, struct partition *initial_partition) {
 
 #ifdef WITH_MPI
-
-  int j, k;
-  int ind[3];
   struct space *s = e->s;
-  struct cell *c;
 
-  /* If we've got the wrong number of nodes, fail. */
-  if (e->nr_nodes != grid[0] * grid[1] * grid[2])
-    error("Grid size does not match number of nodes.");
-
-  /* Run through the cells and set their nodeID. */
-  // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]);
-  for (k = 0; k < s->nr_cells; k++) {
-    c = &s->cells[k];
-    for (j = 0; j < 3; j++) ind[j] = c->loc[j] / s->dim[j] * grid[j];
-    c->nodeID = ind[0] + grid[0] * (ind[1] + grid[1] * ind[2]);
-    // message("cell at [%e,%e,%e]: ind = [%i,%i,%i], nodeID = %i", c->loc[0],
-    // c->loc[1], c->loc[2], ind[0], ind[1], ind[2], c->nodeID);
-  }
+  /* Do the initial partition of the cells. */
+  partition_initial_partition(initial_partition, e->nodeID, e->nr_nodes, s);
 
   /* Make the proxies. */
   engine_makeproxies(e);
@@ -2104,9 +1762,6 @@ void engine_split(struct engine *e, int *grid) {
   free(s->xparts);
   s->parts = parts_new;
   s->xparts = xparts_new;
-
-#else
-  error("SWIFT was not compiled with MPI support.");
 #endif
 }
 
@@ -2175,7 +1830,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
 
       int home = numa_node_of_cpu(sched_getcpu()), half = nr_cores / 2;
       bool done = false, swap_hyperthreads = hyperthreads_present();
-      if (swap_hyperthreads && nodeID == 0) 
+      if (swap_hyperthreads && nodeID == 0)
 	message("prefer physical cores to hyperthreads");
 
       while (!done) {
@@ -2228,7 +1883,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
   e->proxy_ind = NULL;
   e->nr_proxies = 0;
   e->forcerebuild = 1;
-  e->forcerepart = 0;
+  e->forcerepart = REPART_NONE;
   e->links = NULL;
   e->nr_links = 0;
   e->timeBegin = timeBegin;
diff --git a/src/engine.h b/src/engine.h
index 38bdb262288f456d70c597f0d18dd4715dfdd2b8..13c7ec40612713d3771543150763da6924144c1a 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -38,6 +38,7 @@
 #include "scheduler.h"
 #include "space.h"
 #include "task.h"
+#include "partition.h"
 
 /* Some constants. */
 enum engine_policy {
@@ -62,7 +63,6 @@ extern const char *engine_policy_names[];
 #define engine_maxproxies 64
 #define engine_tasksreweight 10
 
-#define engine_maxmetisweight 10000.0f
 
 /* The rank of the engine as a global variable (for messages). */
 extern int engine_rank;
@@ -149,7 +149,8 @@ struct engine {
   float wallclock_time;
 
   /* Force the engine to rebuild? */
-  int forcerebuild, forcerepart;
+  int forcerebuild;
+  enum repartition_type forcerepart;
 
   /* How many steps have we done with the same set of tasks? */
   int tasks_age;
@@ -177,7 +178,7 @@ void engine_print(struct engine *e);
 void engine_init_particles(struct engine *e);
 void engine_step(struct engine *e);
 void engine_maketasks(struct engine *e);
-void engine_split(struct engine *e, int *grid);
+void engine_split(struct engine *e, struct partition *initial_partition);
 int engine_exchange_strays(struct engine *e, int offset, int *ind, int N);
 void engine_rebuild(struct engine *e);
 void engine_repartition(struct engine *e);
diff --git a/src/error.h b/src/error.h
index 7e5c290203786bce33d244f0aa2dace621ec374d..14c6518efa99b4c5b2a5862619426f30d7f69b7b 100644
--- a/src/error.h
+++ b/src/error.h
@@ -2,6 +2,7 @@
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
  *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -49,6 +50,34 @@ extern int engine_rank;
   }
 #endif
 
+#ifdef WITH_MPI
+/**
+ * @brief MPI error macro. Prints the message given in argument,
+ *                         followed by the MPI error string and aborts.
+ *
+ */
+#define mpi_error(res,s, ...)                                            \
+  {                                                                      \
+    fprintf(stderr, "[%03i] %s:%s():%i: " s "\n", engine_rank, __FILE__, \
+            __FUNCTION__, __LINE__, ##__VA_ARGS__);                      \
+    int len = 1024;                                                      \
+    char buf[len];                                                       \
+    MPI_Error_string( res, buf, &len );                                  \
+    fprintf(stderr, "%s\n\n", buf );                                     \
+    MPI_Abort(MPI_COMM_WORLD, -1);                                       \
+  }
+
+#define mpi_error_string(res,s, ...)                                            \
+  {                                                                      \
+    fprintf(stderr, "[%03i] %s:%s():%i: " s "\n", engine_rank, __FILE__, \
+            __FUNCTION__, __LINE__, ##__VA_ARGS__);                      \
+    int len = 1024;                                                      \
+    char buf[len];                                                       \
+    MPI_Error_string( res, buf, &len );                                  \
+    fprintf(stderr, "%s\n\n", buf );                                     \
+  }
+#endif
+
 /**
  * @brief Macro to print a localized message with variable arguments.
  *
diff --git a/src/partition.c b/src/partition.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1fe2b2a8436994de13501dc4ce6714ec1d2e92d
--- /dev/null
+++ b/src/partition.c
@@ -0,0 +1,955 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *                    Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/**
+ *  @file partition.c
+ *  @brief file of various techniques for partitioning and repartitioning
+ *  a grid of cells into geometrically connected regions and distributing
+ *  these around a number of MPI nodes.
+ *
+ *  Currently supported partitioning types: grid, vectorise and METIS.
+ */
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard headers. */
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <values.h>
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+/* METIS headers only used when MPI is also available. */
+#ifdef HAVE_METIS
+#include <metis.h>
+#endif
+#endif
+
+/* Local headers. */
+#include "const.h"
+#include "debug.h"
+#include "error.h"
+#include "partition.h"
+#include "space.h"
+
+/* Maximum weight used for METIS. */
+#define metis_maxweight 10000.0f
+
+/* Simple descriptions of initial partition types for reports. */
+const char *initial_partition_name[] = {
+    "gridded cells",
+    "vectorized point associated cells",
+    "METIS particle weighted cells",
+    "METIS unweighted cells"
+};
+
+/* Simple descriptions of repartition types for reports. */
+const char *repartition_name[] = {
+    "no",
+    "METIS edge and vertex time weighted cells",
+    "METIS particle count vertex weighted cells",
+    "METIS time edge weighted cells",
+    "METIS particle count vertex and time edge cells"
+};
+
+/* Local functions, if needed. */
+static int check_complete(struct space *s, int verbose, int nregions);
+
+/*  Vectorisation support */
+/*  ===================== */
+
+#if defined(WITH_MPI)
+/**
+ *  @brief Pick a number of cell positions from a vectorised list.
+ *
+ *  Vectorise the cell space and pick positions in it for the number of
+ *  expected regions using a single step. Vectorisation is guaranteed
+ *  to work, providing there are more cells than regions.
+ *
+ *  @param s the space.
+ *  @param nregions the number of regions
+ *  @param samplecells the list of sample cell positions, size of 3*nregions
+ */
+static void pick_vector(struct space *s, int nregions, int *samplecells) {
+
+  /* Get length of space and divide up. */
+  int length = s->cdim[0] * s->cdim[1] * s->cdim[2];
+  if (nregions > length) {
+    error("Too few cells (%d) for this number of regions (%d)", length,
+          nregions);
+  }
+
+  int step = length / nregions;
+  int n = 0;
+  int m = 0;
+  int l = 0;
+
+  for (int i = 0; i < s->cdim[0]; i++) {
+    for (int j = 0; j < s->cdim[1]; j++) {
+      for (int k = 0; k < s->cdim[2]; k++) {
+        if (n == 0 && l < nregions) {
+          samplecells[m++] = i;
+          samplecells[m++] = j;
+          samplecells[m++] = k;
+          l++;
+        }
+        n++;
+        if (n == step) n = 0;
+      }
+    }
+  }
+}
+#endif
+
+#if defined(WITH_MPI)
+/**
+ * @brief Partition the space.
+ *
+ * Using the sample positions as seeds pick cells that are geometrically
+ * closest and apply the partition to the space.
+ */
+static void split_vector(struct space *s, int nregions, int *samplecells) {
+  int n = 0;
+  for (int i = 0; i < s->cdim[0]; i++) {
+    for (int j = 0; j < s->cdim[1]; j++) {
+      for (int k = 0; k < s->cdim[2]; k++) {
+        int select = -1;
+        float rsqmax = FLT_MAX;
+        int m = 0;
+        for (int l = 0; l < nregions; l++) {
+          float dx = samplecells[m++] - i;
+          float dy = samplecells[m++] - j;
+          float dz = samplecells[m++] - k;
+          float rsq = (dx * dx + dy * dy + dz * dz);
+          if (rsq < rsqmax) {
+            rsqmax = rsq;
+            select = l;
+          }
+        }
+        s->cells[n++].nodeID = select;
+      }
+    }
+  }
+}
+#endif
+
+/* METIS support
+ * =============
+ *
+ * METIS partitions using a multi-level k-way scheme. We support using this in
+ * a unweighted scheme, which works well and seems to be guaranteed, and a
+ * weighted by the number of particles scheme. Note METIS is optional.
+ *
+ * Repartitioning is based on METIS and uses weights determined from the times
+ * that cell tasks have taken. These weight the graph edges and vertices, or
+ * just the edges, with vertex weights from the particle counts or none.
+ */
+
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+/**
+ * @brief Fill the METIS xadj and adjncy arrays defining the graph of cells
+ *        in a space.
+ *
+ * See the METIS manual if you want to understand this format. The cell graph
+ * consists of all nodes as vertices with edges as the connections to all
+ * neighbours, so we have 26 per vertex.
+ *
+ * @param s the space of cells.
+ * @param adjncy the METIS adjncy array to fill, must be of size
+ *               26 * the number of cells in the space.
+ * @param xadj the METIS xadj array to fill, must be of size
+ *             number of cells in space + 1. NULL for not used.
+ */
+static void graph_init_metis(struct space *s, idx_t *adjncy, idx_t *xadj) {
+
+  /* Loop over all cells in the space. */
+  int cid = 0;
+  for (int l = 0; l < s->cdim[0]; l++) {
+    for (int m = 0; m < s->cdim[1]; m++) {
+      for (int n = 0; n < s->cdim[2]; n++) {
+
+        /* Visit all neighbours of this cell, wrapping space at edges. */
+        int p = 0;
+        for (int i = -1; i <= 1; i++) {
+          int ii = l + i;
+          if (ii < 0)
+            ii += s->cdim[0];
+          else if (ii >= s->cdim[0])
+            ii -= s->cdim[0];
+          for (int j = -1; j <= 1; j++) {
+            int jj = m + j;
+            if (jj < 0)
+              jj += s->cdim[1];
+            else if (jj >= s->cdim[1])
+              jj -= s->cdim[1];
+            for (int k = -1; k <= 1; k++) {
+              int kk = n + k;
+              if (kk < 0)
+                kk += s->cdim[2];
+              else if (kk >= s->cdim[2])
+                kk -= s->cdim[2];
+
+              /* If not self, record id of neighbour. */
+              if (i || j || k) {
+                adjncy[cid * 26 + p] = cell_getid(s->cdim, ii, jj, kk);
+                p++;
+              }
+            }
+          }
+        }
+
+        /* Next cell. */
+        cid++;
+      }
+    }
+  }
+
+  /* If given set xadj. */
+  if (xadj != NULL) {
+    xadj[0] = 0;
+    for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26;
+
+  }
+}
+#endif
+
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+/**
+ * @brief Accumulate the counts of particles per cell.
+ *
+ * @param s the space containing the cells.
+ * @param counts the number of particles per cell. Should be
+ *               allocated as size s->nr_parts.
+ */
+static void accumulate_counts(struct space *s, int *counts) {
+
+  struct part *parts = s->parts;
+  int *cdim = s->cdim;
+  double ih[3], dim[3];
+  ih[0] = s->ih[0];
+  ih[1] = s->ih[1];
+  ih[2] = s->ih[2];
+  dim[0] = s->dim[0];
+  dim[1] = s->dim[1];
+  dim[2] = s->dim[2];
+
+  bzero(counts, sizeof(int) * s->nr_cells);
+
+  for (int k = 0; k < s->nr_parts; k++) {
+    for (int j = 0; j < 3; j++) {
+      if (parts[k].x[j] < 0.0)
+        parts[k].x[j] += dim[j];
+      else if (parts[k].x[j] >= dim[j])
+        parts[k].x[j] -= dim[j];
+    }
+    const int cid = cell_getid(cdim, parts[k].x[0] * ih[0],
+                               parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]);
+    counts[cid]++;
+  }
+}
+#endif
+
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+/**
+ * @brief Apply METIS cell list partitioning to a cell structure.
+ *
+ * Uses the results of part_metis_pick to assign each cell's nodeID to the
+ * picked region index, thus partitioning the space into regions.
+ *
+ * @param s the space containing the cells to split into regions.
+ * @param nregions number of regions.
+ * @param celllist list of regions for each cell.
+ */
+static void split_metis(struct space *s, int nregions, int *celllist) {
+
+  for (int i = 0; i < s->nr_cells; i++)
+      s->cells[i].nodeID = celllist[i];
+}
+#endif
+
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+/**
+ * @brief Partition the given space into a number of connected regions.
+ *
+ * Split the space using METIS to derive a partitions using the
+ * given edge and vertex weights. If no weights are given then an
+ * unweighted partition is performed.
+ *
+ * @param s the space of cells to partition.
+ * @param nregions the number of regions required in the partition.
+ * @param vertexw weights for the cells, sizeof number of cells if used,
+ *        NULL for unit weights.
+ * @param edgew weights for the graph edges between all cells, sizeof number
+ *        of cells * 26 if used, NULL for unit weights. Need to be packed
+ *        in CSR format, so same as adjncy array.
+ * @param celllist on exit this contains the ids of the selected regions,
+ *        sizeof number of cells.
+ */
+static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
+                       int *celllist) {
+
+  /* Total number of cells. */
+  int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2];
+
+  /* Nothing much to do if only using a single partition. Also avoids METIS
+   * bug that doesn't handle this case well. */
+  if (nregions == 1) {
+    for (int i = 0; i < ncells; i++) celllist[i] = 0;
+    return;
+  }
+
+  /* Allocate weights and adjacency arrays . */
+  idx_t *xadj;
+  if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + 1))) == NULL)
+    error("Failed to allocate xadj buffer.");
+  idx_t *adjncy;
+  if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL)
+    error("Failed to allocate adjncy array.");
+  idx_t *weights_v = NULL;
+  if (vertexw != NULL)
+    if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL)
+      error("Failed to allocate vertex weights array");
+  idx_t *weights_e = NULL;
+  if (edgew != NULL)
+    if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == NULL)
+      error("Failed to allocate edge weights array");
+  idx_t *regionid;
+  if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL)
+    error("Failed to allocate regionid array");
+
+  /* Define the cell graph. */
+  graph_init_metis(s, adjncy, xadj);
+
+  /* Init the vertex weights array. */
+  if (vertexw != NULL) {
+    for (int k = 0; k < ncells; k++) {
+      if (vertexw[k] > 0) {
+        weights_v[k] = vertexw[k];
+      } else {
+        weights_v[k] = 1;
+      }
+    }
+  }
+
+  /* Init the edges weights array. */
+  if (edgew != NULL) {
+    for (int k = 0; k < 26 * ncells; k++) {
+      if (edgew[k] > 0) {
+        weights_e[k] = edgew[k];
+      } else {
+        weights_e[k] = 1;
+      }
+    }
+  }
+
+  /* Set the METIS options. */
+  idx_t options[METIS_NOPTIONS];
+  METIS_SetDefaultOptions(options);
+  options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
+  options[METIS_OPTION_NUMBERING] = 0;
+  options[METIS_OPTION_CONTIG] = 1;
+  options[METIS_OPTION_NCUTS] = 10;
+  options[METIS_OPTION_NITER] = 20;
+
+  /* Call METIS. */
+  idx_t one = 1;
+  idx_t idx_ncells = ncells;
+  idx_t idx_nregions = nregions;
+  idx_t objval;
+
+  /* Dump graph in METIS format */
+  /* dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy,
+   *                weights_v, weights_e, NULL);
+   */
+
+  if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, weights_e,
+                          NULL, &idx_nregions, NULL, NULL, options, &objval,
+                          regionid) != METIS_OK)
+    error("Call to METIS_PartGraphKway failed.");
+
+  /* Check that the regionids are ok. */
+  for (int k = 0; k < ncells; k++)
+    if (regionid[k] < 0 || regionid[k] >= nregions)
+      error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k);
+
+  /* Set the cell list to the region index. */
+  for (int k = 0; k < ncells; k++) {
+    celllist[k] = regionid[k];
+  }
+
+  /* Clean up. */
+  if (weights_v != NULL) free(weights_v);
+  if (weights_e != NULL) free(weights_e);
+  free(xadj);
+  free(adjncy);
+  free(regionid);
+
+}
+#endif
+
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+/**
+ * @brief Repartition the cells amongst the nodes using task timings
+ *        as edge weights and vertex weights also from task timings
+ *        or particle cells counts.
+ *
+ * @param partweights whether particle counts will be used as vertex weights.
+ * @param bothweights whether vertex and edge weights will be used, otherwise
+ *                    only edge weights will be used.
+ * @param nodeID our nodeID.
+ * @param nr_nodes the number of nodes.
+ * @param s the space of cells holding our local particles.
+ * @param tasks the completed tasks from the last engine step for our node.
+ * @param nr_tasks the number of tasks.
+ */
+static void repart_edge_metis(int partweights, int bothweights,
+                              int nodeID, int nr_nodes, struct space *s,
+                              struct task *tasks, int nr_tasks) {
+
+
+  /* Create weight arrays using task ticks for vertices and edges (edges
+   * assume the same graph structure as used in the part_ calls). */
+  int nr_cells = s->nr_cells;
+  struct cell *cells = s->cells;
+  float wscale = 1e-3, vscale = 1e-3, wscale_buff;
+  int wtot = 0;
+  int wmax = 1e9 / nr_nodes;
+  int wmin;
+
+  /* Allocate and fill the adjncy indexing array defining the graph of
+   * cells. */
+  idx_t *inds;
+  if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL)
+    error("Failed to allocate the inds array");
+  graph_init_metis(s, inds, NULL);
+
+  /* Allocate and init weights. */
+  int *weights_v = NULL;
+  int *weights_e = NULL;
+  if (bothweights) {
+    if ((weights_v = (int *)malloc(sizeof(int) * nr_cells)) == NULL)
+      error("Failed to allocate vertex weights arrays.");
+    bzero(weights_v, sizeof(int) * nr_cells);
+  }
+  if ((weights_e = (int *)malloc(sizeof(int) * 26 * nr_cells)) == NULL)
+      error("Failed to allocate edge weights arrays.");
+  bzero(weights_e, sizeof(int) * 26 * nr_cells);
+
+  /* Generate task weights for vertices. */
+  int taskvweights = (bothweights && !partweights);
+
+  /* Loop over the tasks... */
+  for (int j = 0; j < nr_tasks; j++) {
+    /* Get a pointer to the kth task. */
+    struct task *t = &tasks[j];
+
+    /* Skip un-interesting tasks. */
+    if (t->type != task_type_self && t->type != task_type_pair &&
+        t->type != task_type_sub && t->type != task_type_ghost &&
+        t->type != task_type_drift && t->type != task_type_kick &&
+        t->type != task_type_init)
+      continue;
+
+    /* Get the task weight. */
+    int w = (t->toc - t->tic) * wscale;
+    if (w < 0) error("Bad task weight (%d).", w);
+
+    /* Do we need to re-scale? */
+    wtot += w;
+    while (wtot > wmax) {
+      wscale /= 2;
+      wtot /= 2;
+      w /= 2;
+      for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5;
+      if (taskvweights)
+        for (int k = 0; k < nr_cells; k++) weights_v[k] *= 0.5;
+    }
+
+    /* Get the top-level cells involved. */
+    struct cell *ci, *cj;
+    for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
+      ;
+    if (t->cj != NULL)
+      for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
+        ;
+    else
+      cj = NULL;
+
+    /* Get the cell IDs. */
+    int cid = ci - cells;
+
+    /* Different weights for different tasks. */
+    if (t->type == task_type_ghost || t->type == task_type_drift ||
+        t->type == task_type_kick) {
+      /* Particle updates add only to vertex weight. */
+      if (taskvweights)
+        weights_v[cid] += w;
+
+    }
+
+    /* Self interaction? */
+    else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
+             (t->type == task_type_sub && cj == NULL &&
+                ci->nodeID == nodeID)) {
+      /* Self interactions add only to vertex weight. */
+      if (taskvweights)
+        weights_v[cid] += w;
+
+    }
+
+    /* Pair? */
+    else if (t->type == task_type_pair ||
+             (t->type == task_type_sub && cj != NULL)) {
+      /* In-cell pair? */
+      if (ci == cj) {
+        /* Add weight to vertex for ci. */
+        if (taskvweights)
+          weights_v[cid] += w;
+
+      }
+
+      /* Distinct cells with local ci? */
+      else if (ci->nodeID == nodeID) {
+        /* Index of the jth cell. */
+        int cjd = cj - cells;
+
+        /* Add half of weight to each cell. */
+        if (taskvweights) {
+          if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w;
+          if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
+        }
+
+        /* Add weights to edge. */
+        int kk;
+        for (kk = 26 * cid; inds[kk] != cjd; kk++)
+          ;
+        weights_e[kk] += w;
+        for (kk = 26 * cjd; inds[kk] != cid; kk++)
+          ;
+        weights_e[kk] += w;
+      }
+    }
+  }
+
+  /* Re-calculate the vertices if using particle counts. */
+  if (partweights && bothweights) {
+    accumulate_counts(s, weights_v);
+
+    /*  Rescale to balance times. */
+    float vwscale = (float)wtot / (float)nr_tasks;
+    for (int k = 0; k < nr_cells; k++) {
+      weights_v[k] *= vwscale;
+    }
+  }
+
+  /* Get the minimum scaling and re-scale if necessary. */
+  int res;
+  if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN,
+                           MPI_COMM_WORLD)) != MPI_SUCCESS)
+    mpi_error(res, "Failed to allreduce the weight scales.");
+
+  if (wscale_buff != wscale) {
+    float scale = wscale_buff / wscale;
+    for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale;
+    if (bothweights)
+      for (int k = 0; k < nr_cells; k++) weights_v[k] *= scale;
+  }
+
+  /* Merge the weights arrays across all nodes. */
+  if (bothweights) {
+    if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
+                          nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) !=
+        MPI_SUCCESS)
+      mpi_error(res, "Failed to allreduce vertex weights.");
+  }
+
+  if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
+                        26 * nr_cells, MPI_INT, MPI_SUM, 0,
+                        MPI_COMM_WORLD)) != MPI_SUCCESS)
+    mpi_error(res, "Failed to allreduce edge weights.");
+
+  /* Allocate cell list for the partition. */
+  int *celllist = (int *)malloc(sizeof(int) * s->nr_cells);
+  if (celllist == NULL) error("Failed to allocate celllist");
+
+  /* As of here, only one node needs to compute the partition. */
+  if (nodeID == 0) {
+    /* Final rescale of all weights to avoid a large range. Large ranges
+     * have been seen to cause an incomplete graph. */
+    wmin = wmax;
+    wmax = 0;
+    for (int k = 0; k < 26 * nr_cells; k++) {
+      wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
+      wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
+    }
+    if (bothweights) {
+      for (int k = 0; k < nr_cells; k++) {
+        wmax = weights_v[k] > wmax ? weights_v[k] : wmax;
+        wmin = weights_v[k] < wmin ? weights_v[k] : wmin;
+      }
+    }
+
+    if ((wmax - wmin) > metis_maxweight) {
+      wscale = metis_maxweight / (wmax - wmin);
+      for (int k = 0; k < 26 * nr_cells; k++) {
+        weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
+      }
+      if (bothweights) {
+        for (int k = 0; k < nr_cells; k++) {
+          weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
+        }
+      }
+    }
+
+    /* Make sure there are no zero weights. */
+    for (int k = 0; k < 26 * nr_cells; k++)
+      if (weights_e[k] == 0) weights_e[k] = 1;
+    if (bothweights)
+      for (int k = 0; k < nr_cells; k++)
+        if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
+
+    /* And partition, use both weights or not as requested. */
+    if (bothweights)
+      pick_metis(s, nr_nodes, weights_v, weights_e, celllist);
+    else
+      pick_metis(s, nr_nodes, NULL, weights_e, celllist);
+
+    /* Check that all cells have good values. */
+    for (int k = 0; k < nr_cells; k++)
+      if (celllist[k] < 0 || celllist[k] >= nr_nodes)
+        error("Got bad nodeID %d for cell %i.", celllist[k], k);
+
+    /* Check that the partition is complete and all nodes have some work. */
+    int present[nr_nodes];
+    int failed = 0;
+    for (int i = 0; i < nr_nodes; i++) present[i] = 0;
+    for (int i = 0; i < nr_cells; i++) present[celllist[i]]++;
+    for (int i = 0; i < nr_nodes; i++) {
+      if (!present[i]) {
+        failed = 1;
+        message("Node %d is not present after repartition", i);
+      }
+    }
+
+    /* If partition failed continue with the current one, but make this
+     * clear. */
+    if (failed) {
+      message("WARNING: METIS repartition has failed, continuing with "
+              "the current partition, load balance will not be optimal");
+      for (int k = 0; k < nr_cells; k++) celllist[k] = cells[k].nodeID;
+    }
+  }
+
+  /* Distribute the celllist partition and apply. */
+  if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) !=
+      MPI_SUCCESS)
+    mpi_error(res, "Failed to bcast the cell list");
+
+  /* And apply to our cells */
+  split_metis(s, nr_nodes, celllist);
+
+  /* Clean up. */
+  if (bothweights) free(weights_v);
+  free(weights_e);
+  free(celllist);
+}
+#endif
+
+/**
+ * @brief Repartition the cells amongst the nodes using vertex weights
+ *
+ * @param s The space containing the local cells.
+ * @param nodeID our MPI node id.
+ * @param nr_nodes number of MPI nodes.
+ */
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) {
+
+  /* Use particle counts as vertex weights. */
+  /* Space for particles per cell counts, which will be used as weights. */
+  int *weights = NULL;
+  if ((weights = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
+    error("Failed to allocate weights buffer.");
+
+  /* Check each particle and accumulate the counts per cell. */
+  accumulate_counts(s, weights);
+
+  /* Get all the counts from all the nodes. */
+  int res;
+  if ((res = MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT,
+                           MPI_SUM, MPI_COMM_WORLD)) != MPI_SUCCESS)
+    mpi_error(res, "Failed to allreduce particle cell weights.");
+
+  /* Main node does the partition calculation. */
+  int *celllist = (int *)malloc(sizeof(int) * s->nr_cells);
+  if (celllist == NULL) error("Failed to allocate celllist");
+
+  if (nodeID == 0)
+    pick_metis(s, nr_nodes, weights, NULL, celllist);
+
+  /* Distribute the celllist partition and apply. */
+  if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) !=
+      MPI_SUCCESS)
+    mpi_error(res, "Failed to bcast the cell list");
+
+  /* And apply to our cells */
+  split_metis(s, nr_nodes, celllist);
+
+  free(weights);
+  free(celllist);
+}
+#endif
+
+
+/**
+ * @brief Repartition the space using the given repartition type.
+ *
+ * Note that at the end of this process all the cells will be re-distributed
+ * across the nodes, but the particles themselves will not be.
+ *
+ * @param reparttype the type of repartition to attempt, see the repart_type enum.
+ * @param nodeID our nodeID.
+ * @param nr_nodes the number of nodes.
+ * @param s the space of cells holding our local particles.
+ * @param tasks the completed tasks from the last engine step for our node.
+ * @param nr_tasks the number of tasks.
+ */
+void partition_repartition(enum repartition_type reparttype, int nodeID,
+                           int nr_nodes, struct space *s, struct task *tasks,
+                           int nr_tasks) {
+
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+
+  if (reparttype == REPART_METIS_BOTH || reparttype == REPART_METIS_EDGE ||
+      reparttype == REPART_METIS_VERTEX_EDGE) {
+
+    int partweights;
+    int bothweights;
+    if (reparttype == REPART_METIS_VERTEX_EDGE)
+      partweights = 1;
+    else
+      partweights = 0;
+
+    if (reparttype == REPART_METIS_BOTH)
+      bothweights = 1;
+    else
+      bothweights = 0;
+
+    repart_edge_metis(partweights, bothweights, nodeID, nr_nodes, s, tasks,
+                      nr_tasks);
+
+  } else if (reparttype == REPART_METIS_VERTEX) {
+
+    repart_vertex_metis(s, nodeID, nr_nodes);
+
+  } else {
+    error("Unknown repartition type");
+  }
+#else
+  error("SWIFT was not compiled with METIS support.");
+#endif
+}
+
+/**
+ * @brief Initial partition of space cells.
+ *
+ * Cells are assigned to a node on the basis of various schemes, all of which
+ * should attempt to distribute them in geometrically close regions to
+ * minimise the movement of particles.
+ *
+ * Note that the partition type is a suggestion and will be ignored if that
+ * scheme fails. In that case we fallback to a vectorised scheme, that is
+ * guaranteed to work provided we have more cells than nodes.
+ *
+ * @param initial_partition the type of partitioning to try.
+ * @param nodeID our nodeID.
+ * @param nr_nodes the number of nodes.
+ * @param s the space of cells.
+ */
+void partition_initial_partition(struct partition *initial_partition,
+                                 int nodeID, int nr_nodes, struct space *s) {
+
+  /* Geometric grid partitioning. */
+  if (initial_partition->type == INITPART_GRID) {
+    int j, k;
+    int ind[3];
+    struct cell *c;
+
+    /* If we've got the wrong number of nodes, fail. */
+    if (nr_nodes != initial_partition->grid[0] * initial_partition->grid[1]
+        * initial_partition->grid[2])
+      error("Grid size does not match number of nodes.");
+
+    /* Run through the cells and set their nodeID. */
+    // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]);
+    for (k = 0; k < s->nr_cells; k++) {
+      c = &s->cells[k];
+      for (j = 0; j < 3; j++)
+          ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j];
+      c->nodeID = ind[0] + initial_partition->grid[0] *
+                 (ind[1] + initial_partition->grid[1] * ind[2]);
+      // message("cell at [%e,%e,%e]: ind = [%i,%i,%i], nodeID = %i", c->loc[0],
+      // c->loc[1], c->loc[2], ind[0], ind[1], ind[2], c->nodeID);
+    }
+
+    /* The grid technique can fail, so check for this before proceeding. */
+    if (!check_complete(s, (nodeID == 0), nr_nodes)) {
+      if (nodeID == 0)
+        message("Grid initial partition failed, using a vectorised partition");
+      initial_partition->type = INITPART_VECTORIZE;
+      partition_initial_partition(initial_partition, nodeID, nr_nodes, s);
+      return;
+    }
+
+  } else if (initial_partition->type == INITPART_METIS_WEIGHT ||
+             initial_partition->type == INITPART_METIS_NOWEIGHT) {
+#if defined(WITH_MPI) && defined(HAVE_METIS)
+    /* Simple k-way partition selected by METIS using cell particle counts as
+     * weights or not. Should be best when starting with a inhomogeneous dist.
+     */
+
+    /* Space for particles per cell counts, which will be used as weights or
+     * not. */
+    int *weights = NULL;
+    if (initial_partition->type == INITPART_METIS_WEIGHT) {
+        if ((weights = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
+        error("Failed to allocate weights buffer.");
+      bzero(weights, sizeof(int) * s->nr_cells);
+
+      /* Check each particle and accumilate the counts per cell. */
+      struct part *parts = s->parts;
+      int *cdim = s->cdim;
+      double ih[3], dim[3];
+      ih[0] = s->ih[0];
+      ih[1] = s->ih[1];
+      ih[2] = s->ih[2];
+      dim[0] = s->dim[0];
+      dim[1] = s->dim[1];
+      dim[2] = s->dim[2];
+      for (int k = 0; k < s->nr_parts; k++) {
+        for (int j = 0; j < 3; j++) {
+          if (parts[k].x[j] < 0.0)
+            parts[k].x[j] += dim[j];
+          else if (parts[k].x[j] >= dim[j])
+            parts[k].x[j] -= dim[j];
+        }
+        const int cid =
+            cell_getid(cdim, parts[k].x[0] * ih[0], parts[k].x[1] * ih[1],
+                       parts[k].x[2] * ih[2]);
+        weights[cid]++;
+      }
+
+      /* Get all the counts from all the nodes. */
+      if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT, MPI_SUM,
+                        MPI_COMM_WORLD) != MPI_SUCCESS)
+        error("Failed to allreduce particle cell weights.");
+
+    }
+
+    /* Main node does the partition calculation. */
+    int *celllist = (int *)malloc(sizeof(int) * s->nr_cells);
+    if (celllist == NULL) error("Failed to allocate celllist");
+    if (nodeID == 0)
+      pick_metis(s, nr_nodes, weights, NULL, celllist);
+
+    /* Distribute the celllist partition and apply. */
+    int res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS) mpi_error(res, "Failed to bcast the cell list");
+
+    /* And apply to our cells */
+    split_metis(s, nr_nodes, celllist);
+
+    /* It's not known if this can fail, but check for this before
+     * proceeding. */
+    if (!check_complete(s, (nodeID == 0), nr_nodes)) {
+      if (nodeID == 0)
+        message("METIS initial partition failed, using a vectorised partition");
+      initial_partition->type = INITPART_VECTORIZE;
+      partition_initial_partition(initial_partition, nodeID, nr_nodes, s);
+    }
+
+    if (weights != NULL) free(weights);
+    free(celllist);
+#else
+    error("SWIFT was not compiled with METIS support");
+#endif
+
+  } else if (initial_partition->type == INITPART_VECTORIZE) {
+
+#if defined(WITH_MPI)
+    /* Vectorised selection, guaranteed to work for samples less than the
+     * number of cells, but not very clumpy in the selection of regions. */
+    int *samplecells = (int *)malloc(sizeof(int) * nr_nodes * 3);
+    if (samplecells == NULL) error("Failed to allocate samplecells");
+
+    if (nodeID == 0) {
+      pick_vector(s, nr_nodes, samplecells);
+    }
+
+    /* Share the samplecells around all the nodes. */
+    int res =
+        MPI_Bcast(samplecells, nr_nodes * 3, MPI_INT, 0, MPI_COMM_WORLD);
+    if (res != MPI_SUCCESS)
+      mpi_error(res, "Failed to bcast the partition sample cells.");
+
+    /* And apply to our cells */
+    split_vector(s, nr_nodes, samplecells);
+    free(samplecells);
+#else
+    error("SWIFT was not compiled with MPI support");
+#endif
+  }
+}
+
+/*  General support */
+/*  =============== */
+
+/**
+ * @brief Check if all regions have been assigned a node in the
+ *        cells of a space.
+ *
+ * @param s the space containing the cells to check.
+ * @param nregions number of regions expected.
+ * @param verbose if true report the missing regions.
+ * @return true if all regions have been found, false otherwise.
+ */
+static int check_complete(struct space *s, int verbose, int nregions) {
+
+  int *present = (int *)malloc(sizeof(int) * nregions);
+  if (present == NULL) error("Failed to allocate present array");
+
+  int failed = 0;
+  for (int i = 0; i < nregions; i++) present[i] = 0;
+  for (int i = 0; i < s->nr_cells; i++) {
+    present[s->cells[i].nodeID]++;
+  }
+  for (int i = 0; i < nregions; i++) {
+    if (!present[i]) {
+      failed = 1;
+      if (verbose) message("Region %d is not present in partition", i);
+    }
+  }
+  free(present);
+  return (!failed);
+}
diff --git a/src/partition.h b/src/partition.h
new file mode 100644
index 0000000000000000000000000000000000000000..492af04ac9ef017ceddabb410ee53496e64bb018
--- /dev/null
+++ b/src/partition.h
@@ -0,0 +1,59 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_PARTITION_H
+#define SWIFT_PARTITION_H
+
+#include "space.h"
+#include "task.h"
+
+/* Initial partitioning types. */
+enum partition_type {
+  INITPART_GRID = 0,
+  INITPART_VECTORIZE,
+  INITPART_METIS_WEIGHT,
+  INITPART_METIS_NOWEIGHT
+};
+
+/* Simple descriptions of types for reports. */
+extern const char *initial_partition_name[];
+
+/* The selected initial partition type and any related metadata. */
+struct partition {
+  enum partition_type type;
+  int grid[3];
+};
+/* Repartition type to use. */
+enum repartition_type {
+  REPART_NONE = 0,
+  REPART_METIS_BOTH,
+  REPART_METIS_VERTEX,
+  REPART_METIS_EDGE,
+  REPART_METIS_VERTEX_EDGE
+};
+
+/* Simple descriptions of types for reports. */
+extern const char *repartition_name[];
+
+void partition_repartition(enum repartition_type reparttype, int nodeID,
+                           int nr_nodes, struct space *s,
+                           struct task *tasks, int nr_tasks);
+void partition_initial_partition(struct partition *initial_partition,
+                                 int nodeID, int nr_nodes, struct space *s);
+
+#endif /* SWIFT_PARTITION_H */
diff --git a/src/swift.h b/src/swift.h
index 9cd7ac65da305a995fb48ba9c931cb8a9be782e6..03960e83bf75488b3456327157288160e995951b 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -48,6 +48,7 @@
 #include "timers.h"
 #include "units.h"
 #include "tools.h"
+#include "partition.h"
 #include "version.h"
 
 #endif /* SWIFT_SWIFT_H */