diff --git a/.gitignore b/.gitignore
index 8137ea759b24b3f4ec9909a460da4bcb47b0a1ac..9bae25ebff81d077253fd8f1227aad98545d28a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,15 +25,23 @@ examples/swift_mindt
 examples/swift_mindt_mpi
 examples/swift_mpi
 
-tests/testVectorize
-tests/brute_force.dat
-tests/swift_dopair.dat
+tests/testPair
+tests/brute_force_standard.dat
+tests/swift_dopair_standard.dat
+tests/brute_force_perturbed.dat
+tests/swift_dopair_perturbed.dat
+tests/test27cells
+tests/brute_force_27_standard.dat
+tests/swift_dopair_27_standard.dat
+tests/brute_force_27_perturbed.dat
+tests/swift_dopair_27_perturbed.dat
 tests/testGreetings
 tests/testReading
 tests/input.hdf5
 tests/testSingle
 tests/testTimeIntegration
 tests/testSPHStep
+tests/testParser
 
 theory/latex/swift.pdf
 
diff --git a/examples/main.c b/examples/main.c
index 121439e5afa1f9916d1702873c9432a86c598df6..e68f9e33f8c5c8c59eb6a9d63c040188cb982de3 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -1,4 +1,3 @@
-
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
@@ -68,7 +67,6 @@
  * @brief Main routine that loads a few particles and generates some output.
  *
  */
-
 int main(int argc, char *argv[]) {
 
   int c, icount, periodic = 1;
@@ -92,7 +90,8 @@ int main(int argc, char *argv[]) {
   int nr_nodes = 1, myrank = 0;
   FILE *file_thread;
   int with_outputs = 1;
-  int with_gravity = 0;
+  int with_external_gravity = 0;
+  int with_self_gravity = 0;
   int engine_policies = 0;
   int verbose = 0, talking = 0;
   unsigned long long cpufreq = 0;
@@ -171,7 +170,7 @@ int main(int argc, char *argv[]) {
   bzero(&s, sizeof(struct space));
 
   /* Parse the options */
-  while ((c = getopt(argc, argv, "a:c:d:e:f:gh:m:oP:q:R:s:t:v:w:y:z:")) != -1)
+  while ((c = getopt(argc, argv, "a:c:d:e:f:gGh:m:oP:q:R:s:t:v:w:y:z:")) != -1)
     switch (c) {
       case 'a':
         if (sscanf(optarg, "%lf", &scaling) != 1)
@@ -201,7 +200,10 @@ int main(int argc, char *argv[]) {
         if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name.");
         break;
       case 'g':
-        with_gravity = 1;
+        with_external_gravity = 1;
+        break;
+      case 'G':
+        with_self_gravity = 1;
         break;
       case 'h':
         if (sscanf(optarg, "%llu", &cpufreq) != 1)
@@ -361,10 +363,6 @@ int main(int argc, char *argv[]) {
     message("CPU frequency used for tick conversion: %llu Hz", cpufreq);
   }
 
-  /* Check we have sensible time step bounds */
-  if (dt_min > dt_max)
-    error("Minimal time step size must be large than maximal time step size ");
-
   /* Check whether an IC file has been provided */
   if (strcmp(ICfileName, "") == 0)
     error("An IC file name must be provided via the option -f");
@@ -375,7 +373,7 @@ int main(int argc, char *argv[]) {
 #if defined(WITH_MPI)
 #if defined(HAVE_PARALLEL_HDF5)
   read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic,
-		   myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
+                   myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
 #else
   read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic,
                  myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL);
@@ -406,10 +404,10 @@ int main(int argc, char *argv[]) {
 #endif
 
   /* MATTHIEU: Temporary fix to preserve master */
-  if (!with_gravity) {
+  if (!with_external_gravity && !with_self_gravity) {
     free(gparts);
-    for(size_t k = 0; k < Ngas; ++k)
-      parts[k].gpart = NULL;
+    gparts = NULL;
+    for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL;
     Ngpart = 0;
 #if defined(WITH_MPI)
     N_long[0] = Ngas;
@@ -482,7 +480,8 @@ int main(int argc, char *argv[]) {
 
   /* Construct the engine policy */
   engine_policies = ENGINE_POLICY | engine_policy_steal | engine_policy_hydro;
-  if (with_gravity) engine_policies |= engine_policy_external_gravity;
+  if (with_external_gravity) engine_policies |= engine_policy_external_gravity;
+  if (with_self_gravity) engine_policies |= engine_policy_self_gravity;
 
   /* Initialize the engine with this space. */
   if (myrank == 0) clocks_gettime(&tic);
@@ -560,8 +559,8 @@ int main(int argc, char *argv[]) {
   /* Legend */
   if (myrank == 0)
     printf(
-        "# Step  Time  time-step  Number of updates    CPU Wall-clock time "
-        "[%s]\n",
+        "# Step  Time  time-step  Number of updates  Number of updates "
+        "CPU Wall-clock time [%s]\n",
         clocks_getunit());
 
   /* Let loose a runner on the space. */
diff --git a/src/Makefile.am b/src/Makefile.am
index f44d47819672d10445fd969fe2ff20dbcb49463b..15c05a2a00d33ad86e7144b4a8e377252a2eedce 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,13 +35,13 @@ endif
 # List required headers
 include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
-    common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h
+    common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
     units.c common_io.c single_io.c multipole.c version.c map.c \
-    kernel.c tools.c part.c partition.c clocks.c
+    kernel.c tools.c part.c partition.c clocks.c parser.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
diff --git a/src/cell.c b/src/cell.c
index 68f4fcf7f9967b7ad42d8597190e82a5baaaf951..b2a27fb99038cfe71af7abeb83af911868bc9d87 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -45,6 +45,7 @@
 /* Local headers. */
 #include "atomic.h"
 #include "error.h"
+#include "gravity.h"
 #include "hydro.h"
 #include "space.h"
 #include "timers.h"
@@ -89,14 +90,18 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
   c->ti_end_min = pc->ti_end_min;
   c->ti_end_max = pc->ti_end_max;
   c->count = pc->count;
+  c->gcount = pc->gcount;
   c->tag = pc->tag;
 
-  /* Fill the progeny recursively, depth-first. */
+  /* Number of new cells created. */
   int count = 1;
+
+  /* Fill the progeny recursively, depth-first. */
   for (int k = 0; k < 8; k++)
     if (pc->progeny[k] >= 0) {
       struct cell *temp = space_getcell(s);
       temp->count = 0;
+      temp->gcount = 0;
       temp->loc[0] = c->loc[0];
       temp->loc[1] = c->loc[1];
       temp->loc[2] = c->loc[2];
@@ -122,7 +127,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
 }
 
 /**
- * @brief Link the cells recursively to the given part array.
+ * @brief Link the cells recursively to the given #part array.
  *
  * @param c The #cell.
  * @param parts The #part array.
@@ -130,7 +135,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
  * @return The number of particles linked.
  */
 
-int cell_link(struct cell *c, struct part *parts) {
+int cell_link_parts(struct cell *c, struct part *parts) {
 
   c->parts = parts;
 
@@ -139,14 +144,40 @@ int cell_link(struct cell *c, struct part *parts) {
     int offset = 0;
     for (int k = 0; k < 8; k++) {
       if (c->progeny[k] != NULL)
-        offset += cell_link(c->progeny[k], &parts[offset]);
+        offset += cell_link_parts(c->progeny[k], &parts[offset]);
     }
   }
 
-  /* Return the total number of unpacked cells. */
+  /* Return the total number of linked particles. */
   return c->count;
 }
 
+/**
+ * @brief Link the cells recursively to the given #gpart array.
+ *
+ * @param c The #cell.
+ * @param gparts The #gpart array.
+ *
+ * @return The number of particles linked.
+ */
+
+int cell_link_gparts(struct cell *c, struct gpart *gparts) {
+
+  c->gparts = gparts;
+
+  /* Fill the progeny recursively, depth-first. */
+  if (c->split) {
+    int offset = 0;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL)
+        offset += cell_link_gparts(c->progeny[k], &gparts[offset]);
+    }
+  }
+
+  /* Return the total number of linked particles. */
+  return c->gcount;
+}
+
 /**
  * @brief Pack the data of the given cell and all it's sub-cells.
  *
@@ -164,6 +195,7 @@ int cell_pack(struct cell *c, struct pcell *pc) {
   pc->ti_end_min = c->ti_end_min;
   pc->ti_end_max = c->ti_end_max;
   pc->count = c->count;
+  pc->gcount = c->gcount;
   c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag;
 
   /* Fill in the progeny, depth-first recursion. */
@@ -579,6 +611,27 @@ void cell_init_parts(struct cell *c, void *data) {
   c->ti_end_max = 0;
 }
 
+/**
+ * @brief Initialises all g-particles to a valid state even if the ICs were
+ *stupid
+ *
+ * @param c Cell to act upon
+ * @param data Unused parameter
+ */
+void cell_init_gparts(struct cell *c, void *data) {
+
+  struct gpart *gp = c->gparts;
+  const int gcount = c->gcount;
+
+  for (int i = 0; i < gcount; ++i) {
+    gp[i].ti_begin = 0;
+    gp[i].ti_end = 0;
+    gravity_first_init_gpart(&gp[i]);
+  }
+  c->ti_end_min = 0;
+  c->ti_end_max = 0;
+}
+
 /**
  * @brief Converts hydro quantities to a valid state after the initial density
  *calculation
diff --git a/src/cell.h b/src/cell.h
index 25e9342cab8948310ad58e28b13fd53282e18c28..607073660946fac1acda5cea512ed8adb4aa5218 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -44,7 +44,7 @@ struct pcell {
   int ti_end_min, ti_end_max;
 
   /* Number of particles in this cell. */
-  int count;
+  int count, gcount;
 
   /* tag used for MPI communication. */
   int tag;
@@ -144,7 +144,7 @@ struct cell {
   double mass, e_pot, e_int, e_kin;
 
   /* Number of particles updated in this cell. */
-  int updated;
+  int updated, g_updated;
 
   /* Linking pointer for "memory management". */
   struct cell *next;
@@ -178,8 +178,10 @@ void cell_gunlocktree(struct cell *c);
 int cell_pack(struct cell *c, struct pcell *pc);
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
 int cell_getsize(struct cell *c);
-int cell_link(struct cell *c, struct part *parts);
+int cell_link_parts(struct cell *c, struct part *parts);
+int cell_link_gparts(struct cell *c, struct gpart *gparts);
 void cell_init_parts(struct cell *c, void *data);
+void cell_init_gparts(struct cell *c, void *data);
 void cell_convert_hydro(struct cell *c, void *data);
 void cell_clean_links(struct cell *c, void *data);
 
diff --git a/src/common_io.c b/src/common_io.c
index 5fb2d9513ec2acc0cd8d389a226b14d427e02539..6183effe9ce392ab930c581cbd118f025bbce773 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -502,7 +502,7 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName,
  * @param gparts The array of #gpart freshly read in.
  * @param Ndm The number of DM particles read in.
  */
-void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) {
+void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) {
 
   /* Let's give all these gparts a negative id */
   for (size_t i = 0; i < Ndm; ++i) {
@@ -527,8 +527,9 @@ void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) {
  * @param Ngas The number of gas particles read in.
  * @param Ndm The number of DM particles read in.
  */
-void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts,
-                            size_t Ngas, size_t Ndm) {
+void duplicate_hydro_gparts(struct part* const parts,
+                            struct gpart* const gparts, size_t Ngas,
+                            size_t Ndm) {
 
   for (size_t i = 0; i < Ngas; ++i) {
 
@@ -557,16 +558,19 @@ void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts,
  * @param dmparts The array of #gpart containg DM particles to be filled.
  * @param Ndm The number of DM particles.
  */
-void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts,
-                       size_t Ndm) {
+void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
+                       struct gpart* const dmparts, size_t Ndm) {
 
   size_t count = 0;
 
   /* Loop over all gparts */
   for (size_t i = 0; i < Ntot; ++i) {
 
+    /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id,
+     * gparts[i].part); */
+
     /* And collect the DM ones */
-    if (gparts[i].id < 0) {
+    if (gparts[i].id < 0LL) {
       memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart));
       dmparts[count].id = -dmparts[count].id;
       count++;
diff --git a/src/common_io.h b/src/common_io.h
index 4ad0c6fb754c4288a0c731e2b1e2392998719d52..961f40e63d771e5e06ade525301caf59aae0bceb 100644
--- a/src/common_io.h
+++ b/src/common_io.h
@@ -78,11 +78,12 @@ extern const char* particle_type_names[];
 hid_t hdf5Type(enum DATA_TYPE type);
 size_t sizeOfType(enum DATA_TYPE type);
 
-void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts,
-                       size_t Ndm);
-void prepare_dm_gparts(struct gpart* gparts, size_t Ndm);
-void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts,
-                            size_t Ngas, size_t Ndm);
+void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
+                       struct gpart* const dmparts, size_t Ndm);
+void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm);
+void duplicate_hydro_gparts(struct part* const parts,
+                            struct gpart* const gparts, size_t Ngas,
+                            size_t Ndm);
 
 void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data);
 
diff --git a/src/engine.c b/src/engine.c
index 05198b5dd9435713e894e3353d19b85abed1d6e3..1b196c3c13e116d6b24a5c735af9112fdb98458f 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -157,39 +157,56 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c,
  * @brief Redistribute the particles amongst the nodes according
  *      to their cell's node IDs.
  *
+ * The strategy here is as follows:
+ * 1) Each node counts the number of particles it has to send to each other
+ * node.
+ * 2) The number of particles of each type is then exchanged.
+ * 3) The particles to send are placed in a temporary buffer in which the
+ * part-gpart links are preserved.
+ * 4) Each node allocates enough space for the new particles.
+ * 5) (Asynchronous) communications are issued to transfer the data.
+ *
+ *
  * @param e The #engine.
  */
-
 void engine_redistribute(struct engine *e) {
 
 #ifdef WITH_MPI
 
-  int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
+  const int nr_nodes = e->nr_nodes;
+  const int nodeID = e->nodeID;
   struct space *s = e->s;
-  int my_cells = 0;
-  int *cdim = s->cdim;
   struct cell *cells = s->cells;
-  int nr_cells = s->nr_cells;
+  const int nr_cells = s->nr_cells;
+  const int *cdim = s->cdim;
+  const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]};
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  struct part *parts = s->parts;
+  struct xpart *xparts = s->xparts;
+  struct gpart *gparts = s->gparts;
   ticks tic = getticks();
 
-  /* Start by sorting the particles according to their nodes and
-     getting the counts. The counts array is indexed as
-     count[from * nr_nodes + to]. */
-  int *counts;
-  size_t *dest;
-  double ih[3], dim[3];
-  ih[0] = s->ih[0];
-  ih[1] = s->ih[1];
-  ih[2] = s->ih[2];
-  dim[0] = s->dim[0];
-  dim[1] = s->dim[1];
-  dim[2] = s->dim[2];
-  if ((counts = (int *)malloc(sizeof(int) *nr_nodes *nr_nodes)) == NULL ||
-      (dest = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL)
-    error("Failed to allocate count and dest buffers.");
+  /* Allocate temporary arrays to store the counts of particles to be sent
+     and the destination of each particle */
+  int *counts, *g_counts;
+  if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate count temporary buffer.");
+  if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate gcount temporary buffer.");
   bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
-  struct part *parts = s->parts;
+  bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes);
+
+  // Allocate the destination index arrays.
+  int *dest, *g_dest;
+  if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
+    error("Failed to allocate dest temporary buffer.");
+  if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
+    error("Failed to allocate g_dest temporary buffer.");
+
+  /* Get destination of each particle */
   for (size_t k = 0; k < s->nr_parts; k++) {
+
+    /* Periodic boundary conditions */
     for (int j = 0; j < 3; j++) {
       if (parts[k].x[j] < 0.0)
         parts[k].x[j] += dim[j];
@@ -202,36 +219,121 @@ void engine_redistribute(struct engine *e) {
        error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
              cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */
     dest[k] = cells[cid].nodeID;
+
+    /* The counts array is indexed as count[from * nr_nodes + to]. */
     counts[nodeID * nr_nodes + dest[k]] += 1;
   }
+
+  /* Sort the particles according to their cell index. */
   space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
 
+  /* We need to re-link the gpart partners of parts. */
+  int current_dest = dest[0];
+  size_t count_this_dest = 0;
+  for (size_t k = 0; k < s->nr_parts; ++k) {
+    if (s->parts[k].gpart != NULL) {
+
+      /* As the addresses will be invalidated by the communications, we will */
+      /* instead store the absolute index from the start of the sub-array */
+      /* of particles to be sent to a given node. */
+      /* Recall that gparts without partners have a negative id. */
+      /* We will restore the pointers on the receiving node later on. */
+      if (dest[k] != current_dest) {
+        current_dest = dest[k];
+        count_this_dest = 0;
+      }
+
+      /* Debug */
+      /* if(s->parts[k].gpart->id < 0) */
+      /* 	error("Trying to link a partnerless gpart !"); */
+
+      s->parts[k].gpart->id = count_this_dest;
+      count_this_dest++;
+    }
+  }
+
+  /* Get destination of each g-particle */
+  for (size_t k = 0; k < s->nr_gparts; k++) {
+
+    /* Periodic boundary conditions */
+    for (int j = 0; j < 3; j++) {
+      if (gparts[k].x[j] < 0.0)
+        gparts[k].x[j] += dim[j];
+      else if (gparts[k].x[j] >= dim[j])
+        gparts[k].x[j] -= dim[j];
+    }
+    const int cid = cell_getid(cdim, gparts[k].x[0] * ih[0],
+                               gparts[k].x[1] * ih[1], gparts[k].x[2] * ih[2]);
+    /* if (cid < 0 || cid >= s->nr_cells)
+       error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
+             cid, k, g_parts[k].x[0], g_parts[k].x[1], g_parts[k].x[2]); */
+    g_dest[k] = cells[cid].nodeID;
+
+    /* The counts array is indexed as count[from * nr_nodes + to]. */
+    g_counts[nodeID * nr_nodes + g_dest[k]] += 1;
+  }
+
+  /* Sort the gparticles according to their cell index. */
+  space_gparts_sort(gparts, g_dest, s->nr_gparts, 0, nr_nodes - 1);
+
   /* Get all the counts from all the nodes. */
   if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                     MPI_COMM_WORLD) != MPI_SUCCESS)
     error("Failed to allreduce particle transfer counts.");
 
-  /* Get the new number of parts for this node, be generous in allocating. */
-  size_t nr_parts = 0;
+  /* Get all the g_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce gparticle transfer counts.");
+
+  /* Each node knows how many parts and gparts will be transferred to every
+     other node. We can start preparing to receive data */
+
+  /* Get the new number of parts and gparts for this node */
+  size_t nr_parts = 0, nr_gparts = 0;
   for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_gparts += g_counts[k * nr_nodes + nodeID];
+
+  /* Allocate the new arrays with some extra margin */
   struct part *parts_new = NULL;
-  struct xpart *xparts_new = NULL, *xparts = s->xparts;
+  struct xpart *xparts_new = NULL;
+  struct gpart *gparts_new = NULL;
   if (posix_memalign((void **)&parts_new, part_align,
-                     sizeof(struct part) * nr_parts * 1.2) != 0 ||
-      posix_memalign((void **)&xparts_new, part_align,
-                     sizeof(struct xpart) * nr_parts * 1.2) != 0)
+                     sizeof(struct part) * nr_parts *
+                         engine_redistribute_alloc_margin) != 0)
     error("Failed to allocate new part data.");
-
-  /* Emit the sends and recvs for the particle data. */
+  if (posix_memalign((void **)&xparts_new, xpart_align,
+                     sizeof(struct xpart) * nr_parts *
+                         engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new xpart data.");
+  if (posix_memalign((void **)&gparts_new, gpart_align,
+                     sizeof(struct gpart) * nr_gparts *
+                         engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new gpart data.");
+
+  /* Prepare MPI requests for the asynchronous communications */
   MPI_Request *reqs;
-  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) ==
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) ==
       NULL)
     error("Failed to allocate MPI request list.");
-  for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
-  for (size_t offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) {
-    int ind_send = nodeID * nr_nodes + k;
-    int ind_recv = k * nr_nodes + nodeID;
+  for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
+
+  /* Emit the sends and recvs for the particle and gparticle data. */
+  size_t offset_send = 0, offset_recv = 0;
+  size_t g_offset_send = 0, g_offset_recv = 0;
+  for (int k = 0; k < nr_nodes; k++) {
+
+    /* Indices in the count arrays of the node of interest */
+    const int ind_send = nodeID * nr_nodes + k;
+    const int ind_recv = k * nr_nodes + nodeID;
+
+    /* Are we sending any part/xpart ? */
     if (counts[ind_send] > 0) {
+
+      /* message("Sending %d part to node %d", counts[ind_send], k); */
+
+      /* If the send is to the same node, just copy */
       if (k == nodeID) {
         memcpy(&parts_new[offset_recv], &s->parts[offset_send],
                sizeof(struct part) * counts[ind_recv]);
@@ -239,36 +341,73 @@ void engine_redistribute(struct engine *e) {
                sizeof(struct xpart) * counts[ind_recv]);
         offset_send += counts[ind_send];
         offset_recv += counts[ind_recv];
+
+        /* Else, emit some communications */
       } else {
-        if (MPI_Isend(&s->parts[offset_send], counts[ind_send],
-                      e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD,
-                      &reqs[4 * k]) != MPI_SUCCESS)
-          error("Failed to isend parts to node %zi.", k);
-        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send],
-                      e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD,
-                      &reqs[4 * k + 1]) != MPI_SUCCESS)
-          error("Failed to isend xparts to node %zi.", k);
+        if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type,
+                      k, 3 * ind_send + 0, MPI_COMM_WORLD,
+                      &reqs[6 * k]) != MPI_SUCCESS)
+          error("Failed to isend parts to node %i.", k);
+        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type,
+                      k, 3 * ind_send + 1, MPI_COMM_WORLD,
+                      &reqs[6 * k + 1]) != MPI_SUCCESS)
+          error("Failed to isend xparts to node %i.", k);
         offset_send += counts[ind_send];
       }
     }
+
+    /* Are we sending any gpart ? */
+    if (g_counts[ind_send] > 0) {
+
+      /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */
+
+      /* If the send is to the same node, just copy */
+      if (k == nodeID) {
+        memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send],
+               sizeof(struct gpart) * g_counts[ind_recv]);
+        g_offset_send += g_counts[ind_send];
+        g_offset_recv += g_counts[ind_recv];
+
+        /* Else, emit some communications */
+      } else {
+        if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send],
+                      gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD,
+                      &reqs[6 * k + 2]) != MPI_SUCCESS)
+          error("Failed to isend gparts to node %i.", k);
+        g_offset_send += g_counts[ind_send];
+      }
+    }
+
+    /* Now emit the corresponding Irecv() */
+
+    /* Are we receiving any part/xpart from this node ? */
     if (k != nodeID && counts[ind_recv] > 0) {
-      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type,
-                    k, 2 * ind_recv + 0, MPI_COMM_WORLD,
-                    &reqs[4 * k + 2]) != MPI_SUCCESS)
-        error("Failed to emit irecv of parts from node %zi.", k);
-      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv],
-                    e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD,
-                    &reqs[4 * k + 3]) != MPI_SUCCESS)
-        error("Failed to emit irecv of parts from node %zi.", k);
+      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k,
+                    3 * ind_recv + 0, MPI_COMM_WORLD,
+                    &reqs[6 * k + 3]) != MPI_SUCCESS)
+        error("Failed to emit irecv of parts from node %i.", k);
+      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type,
+                    k, 3 * ind_recv + 1, MPI_COMM_WORLD,
+                    &reqs[6 * k + 4]) != MPI_SUCCESS)
+        error("Failed to emit irecv of xparts from node %i.", k);
       offset_recv += counts[ind_recv];
     }
+
+    /* Are we receiving any gpart from this node ? */
+    if (k != nodeID && g_counts[ind_recv] > 0) {
+      if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv],
+                    gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD,
+                    &reqs[6 * k + 5]) != MPI_SUCCESS)
+        error("Failed to emit irecv of gparts from node %i.", k);
+      g_offset_recv += g_counts[ind_recv];
+    }
   }
 
   /* Wait for all the sends and recvs to tumble in. */
-  MPI_Status stats[4 * nr_nodes];
+  MPI_Status stats[6 * nr_nodes];
   int res;
-  if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
-    for (int k = 0; k < 4 * nr_nodes; k++) {
+  if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
+    for (int k = 0; k < 6 * nr_nodes; k++) {
       char buff[MPI_MAX_ERROR_STRING];
       int res;
       MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
@@ -277,33 +416,90 @@ void engine_redistribute(struct engine *e) {
     error("Failed during waitall for part data.");
   }
 
+  /* We now need to restore the part<->gpart links */
+  size_t offset_parts = 0, offset_gparts = 0;
+  for (int node = 0; node < nr_nodes; ++node) {
+
+    const int ind_recv = node * nr_nodes + nodeID;
+    const size_t count_parts = counts[ind_recv];
+    const size_t count_gparts = g_counts[ind_recv];
+
+    /* Loop over the gparts received from that node */
+    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) {
+
+      /* Does this gpart have a partner ? */
+      if (gparts_new[k].id >= 0) {
+
+        const size_t partner_index = offset_parts + gparts_new[k].id;
+
+        /* Re-link */
+        gparts_new[k].part = &parts_new[partner_index];
+        gparts_new[k].part->gpart = &gparts_new[k];
+      }
+    }
+
+    offset_parts += count_parts;
+    offset_gparts += count_gparts;
+  }
+
   /* Verify that all parts are in the right place. */
   /* for ( int k = 0 ; k < nr_parts ; k++ ) {
-      int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] );
+      int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0],
+    parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] );
       if ( cells[ cid ].nodeID != nodeID )
-          error( "Received particle (%i) that does not belong here (nodeID=%i).", k , cells[ cid ].nodeID );
+          error( "Received particle (%i) that does not belong here
+    (nodeID=%i).", k , cells[ cid ].nodeID );
     } */
 
+  /* Verify that the links are correct */
+  /* MATTHIEU: To be commented out once we are happy */
+  for (size_t k = 0; k < nr_gparts; ++k) {
+
+    if (gparts_new[k].id > 0) {
+
+      if (gparts_new[k].part->gpart != &gparts_new[k])
+        error("Linking problem !");
+
+      if (gparts_new[k].x[0] != gparts_new[k].part->x[0] ||
+          gparts_new[k].x[1] != gparts_new[k].part->x[1] ||
+          gparts_new[k].x[2] != gparts_new[k].part->x[2])
+        error("Linked particles are not at the same position !");
+    }
+  }
+  for (size_t k = 0; k < nr_parts; ++k) {
+
+    if (parts_new[k].gpart != NULL) {
+
+      if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !");
+    }
+  }
+
   /* Set the new part data, free the old. */
   free(parts);
   free(xparts);
+  free(gparts);
   s->parts = parts_new;
   s->xparts = xparts_new;
+  s->gparts = gparts_new;
   s->nr_parts = nr_parts;
-  s->size_parts = 1.2 * nr_parts;
+  s->nr_gparts = nr_gparts;
+  s->size_parts = engine_redistribute_alloc_margin * nr_parts;
+  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts;
 
-  /* Be verbose about what just happened. */
-  for (int k = 0; k < nr_cells; k++)
-    if (cells[k].nodeID == nodeID) my_cells += 1;
-  if (e->verbose)
-    message("node %i now has %zi parts in %i cells.", nodeID, nr_parts,
-            my_cells);
-
-  /* Clean up other stuff. */
+  /* Clean up the temporary stuff. */
   free(reqs);
   free(counts);
   free(dest);
 
+  /* Be verbose about what just happened. */
+  if (e->verbose) {
+    int my_cells = 0;
+    for (int k = 0; k < nr_cells; k++)
+      if (cells[k].nodeID == nodeID) my_cells += 1;
+    message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID,
+            nr_parts, nr_gparts, my_cells);
+  }
+
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -565,31 +761,40 @@ void engine_exchange_cells(struct engine *e) {
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
-  int count_in = 0;
+  int count_parts_in = 0, count_gparts_in = 0;
   for (int k = 0; k < nr_proxies; k++)
-    for (int j = 0; j < e->proxies[k].nr_cells_in; j++)
-      count_in += e->proxies[k].cells_in[j]->count;
-  if (count_in > s->size_parts_foreign) {
+    for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
+      count_parts_in += e->proxies[k].cells_in[j]->count;
+      count_gparts_in += e->proxies[k].cells_in[j]->gcount;
+    }
+  if (count_parts_in > s->size_parts_foreign) {
     if (s->parts_foreign != NULL) free(s->parts_foreign);
-    s->size_parts_foreign = 1.1 * count_in;
+    s->size_parts_foreign = 1.1 * count_parts_in;
     if (posix_memalign((void **)&s->parts_foreign, part_align,
                        sizeof(struct part) * s->size_parts_foreign) != 0)
       error("Failed to allocate foreign part data.");
   }
+  if (count_gparts_in > s->size_gparts_foreign) {
+    if (s->gparts_foreign != NULL) free(s->gparts_foreign);
+    s->size_gparts_foreign = 1.1 * count_gparts_in;
+    if (posix_memalign((void **)&s->gparts_foreign, gpart_align,
+                       sizeof(struct gpart) * s->size_gparts_foreign) != 0)
+      error("Failed to allocate foreign gpart data.");
+  }
 
   /* Unpack the cells and link to the particle data. */
   struct part *parts = s->parts_foreign;
+  struct gpart *gparts = s->gparts_foreign;
   for (int k = 0; k < nr_proxies; k++) {
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
-      cell_link(e->proxies[k].cells_in[j], parts);
+      cell_link_parts(e->proxies[k].cells_in[j], parts);
+      cell_link_gparts(e->proxies[k].cells_in[j], gparts);
       parts = &parts[e->proxies[k].cells_in[j]->count];
+      gparts = &gparts[e->proxies[k].cells_in[j]->gcount];
     }
   }
   s->nr_parts_foreign = parts - s->parts_foreign;
-
-  /* Is the parts buffer large enough? */
-  if (s->nr_parts_foreign > s->size_parts_foreign)
-    error("Foreign parts buffer too small.");
+  s->nr_gparts_foreign = gparts - s->gparts_foreign;
 
   /* Free the pcell buffer. */
   free(pcells);
@@ -607,16 +812,24 @@ void engine_exchange_cells(struct engine *e) {
  * @brief Exchange straying parts with other nodes.
  *
  * @param e The #engine.
- * @param offset The index in the parts array as of which the foreign parts
- *reside.
- * @param ind The ID of the foreign #cell.
- * @param N The number of stray parts.
+ * @param offset_parts The index in the parts array as of which the foreign
+ *        parts reside.
+ * @param ind_part The foreign #cell ID of each part.
+ * @param Npart The number of stray parts, contains the number of parts received
+ *        on return.
+ * @param offset_gparts The index in the gparts array as of which the foreign
+ *        parts reside.
+ * @param ind_gpart The foreign #cell ID of each gpart.
+ * @param Ngpart The number of stray gparts, contains the number of gparts
+ *        received on return.
  *
- * @return The number of arrived parts copied to parts and xparts.
+ * Note that this function does not mess-up the linkage between parts and
+ * gparts, i.e. the received particles have correct linkeage.
  */
 
-int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
-                           size_t N) {
+void engine_exchange_strays(struct engine *e, size_t offset_parts,
+                            int *ind_part, size_t *Npart, size_t offset_gparts,
+                            int *ind_gpart, size_t *Ngpart) {
 
 #ifdef WITH_MPI
 
@@ -626,25 +839,49 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
   /* Re-set the proxies. */
   for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0;
 
-  /* Put the parts into the corresponding proxies. */
-  for (size_t k = 0; k < N; k++) {
-    const int node_id = e->s->cells[ind[k]].nodeID;
+  /* Put the parts and gparts into the corresponding proxies. */
+  for (size_t k = 0; k < *Npart; k++) {
+    /* Get the target node and proxy ID. */
+    const int node_id = e->s->cells[ind_part[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
-    if (pid < 0)
+    if (pid < 0) {
       error(
           "Do not have a proxy for the requested nodeID %i for part with "
           "id=%llu, x=[%e,%e,%e].",
-          node_id, s->parts[offset + k].id, s->parts[offset + k].x[0],
-          s->parts[offset + k].x[1], s->parts[offset + k].x[2]);
-    proxy_parts_load(&e->proxies[pid], &s->parts[offset + k],
-                     &s->xparts[offset + k], 1);
+          node_id, s->parts[offset_parts + k].id,
+          s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1],
+          s->parts[offset_parts + k].x[2]);
+    }
+
+    /* Re-link the associated gpart with the buffer offset of the part. */
+    if (s->parts[offset_parts + k].gpart != NULL) {
+      s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_in;
+    }
+
+    /* Load the part and xpart into the proxy. */
+    proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k],
+                     &s->xparts[offset_parts + k], 1);
+  }
+  for (size_t k = 0; k < *Ngpart; k++) {
+    const int node_id = e->s->cells[ind_gpart[k]].nodeID;
+    if (node_id < 0 || node_id >= e->nr_nodes)
+      error("Bad node ID %i.", node_id);
+    const int pid = e->proxy_ind[node_id];
+    if (pid < 0)
+      error(
+          "Do not have a proxy for the requested nodeID %i for part with "
+          "id=%lli, x=[%e,%e,%e].",
+          node_id, s->gparts[offset_parts + k].id,
+          s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1],
+          s->gparts[offset_gparts + k].x[2]);
+    proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1);
   }
 
   /* Launch the proxies. */
-  MPI_Request reqs_in[2 * engine_maxproxies];
-  MPI_Request reqs_out[2 * engine_maxproxies];
+  MPI_Request reqs_in[3 * engine_maxproxies];
+  MPI_Request reqs_out[3 * engine_maxproxies];
   for (int k = 0; k < e->nr_proxies; k++) {
     proxy_parts_exch1(&e->proxies[k]);
     reqs_in[k] = e->proxies[k].req_parts_count_in;
@@ -668,11 +905,18 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
 
   /* Count the total number of incoming particles and make sure we have
      enough space to accommodate them. */
-  size_t count_in = 0;
-  for (int k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in;
-  if (e->verbose) message("sent out %zi particles, got %zi back.", N, count_in);
-  if (offset + count_in > s->size_parts) {
-    s->size_parts = (offset + count_in) * 1.05;
+  int count_parts_in = 0;
+  int count_gparts_in = 0;
+  for (int k = 0; k < e->nr_proxies; k++) {
+    count_parts_in += e->proxies[k].nr_parts_in;
+    count_gparts_in += e->proxies[k].nr_gparts_in;
+  }
+  if (e->verbose) {
+    message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart,
+            count_parts_in, count_gparts_in);
+  }
+  if (offset_parts + count_parts_in > s->size_parts) {
+    s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow;
     struct part *parts_new = NULL;
     struct xpart *xparts_new = NULL;
     if (posix_memalign((void **)&parts_new, part_align,
@@ -680,37 +924,61 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
         posix_memalign((void **)&xparts_new, part_align,
                        sizeof(struct xpart) * s->size_parts) != 0)
       error("Failed to allocate new part data.");
-    memcpy(parts_new, s->parts, sizeof(struct part) * offset);
-    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset);
+    memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts);
+    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts);
     free(s->parts);
     free(s->xparts);
     s->parts = parts_new;
     s->xparts = xparts_new;
   }
+  if (offset_gparts + count_gparts_in > s->size_gparts) {
+    s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow;
+    struct gpart *gparts_new = NULL;
+    if (posix_memalign((void **)&gparts_new, gpart_align,
+                       sizeof(struct gpart) * s->size_gparts) != 0)
+      error("Failed to allocate new gpart data.");
+    memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts);
+    free(s->gparts);
+    s->gparts = gparts_new;
+  }
 
   /* Collect the requests for the particle data from the proxies. */
   int nr_in = 0, nr_out = 0;
   for (int k = 0; k < e->nr_proxies; k++) {
     if (e->proxies[k].nr_parts_in > 0) {
-      reqs_in[2 * k] = e->proxies[k].req_parts_in;
-      reqs_in[2 * k + 1] = e->proxies[k].req_xparts_in;
+      reqs_in[3 * k] = e->proxies[k].req_parts_in;
+      reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in;
+      nr_in += 2;
+    } else {
+      reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL;
+    }
+    if (e->proxies[k].nr_gparts_in > 0) {
+      reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in;
       nr_in += 1;
-    } else
-      reqs_in[2 * k] = reqs_in[2 * k + 1] = MPI_REQUEST_NULL;
+    } else {
+      reqs_in[3 * k + 2] = MPI_REQUEST_NULL;
+    }
     if (e->proxies[k].nr_parts_out > 0) {
-      reqs_out[2 * k] = e->proxies[k].req_parts_out;
-      reqs_out[2 * k + 1] = e->proxies[k].req_xparts_out;
+      reqs_out[3 * k] = e->proxies[k].req_parts_out;
+      reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out;
+      nr_out += 2;
+    } else {
+      reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL;
+    }
+    if (e->proxies[k].nr_gparts_out > 0) {
+      reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out;
       nr_out += 1;
-    } else
-      reqs_out[2 * k] = reqs_out[2 * k + 1] = MPI_REQUEST_NULL;
+    } else {
+      reqs_out[3 * k + 2] = MPI_REQUEST_NULL;
+    }
   }
 
   /* Wait for each part array to come in and collect the new
      parts from the proxies. */
-  size_t count = 0;
-  for (int k = 0; k < 2 * (nr_in + nr_out); k++) {
-    int err = 0, pid = MPI_UNDEFINED;
-    if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid,
+  int count_parts = 0, count_gparts = 0;
+  for (int k = 0; k < nr_in; k++) {
+    int err, pid;
+    if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid,
                            MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
       char buff[MPI_MAX_ERROR_STRING];
       int res;
@@ -718,26 +986,46 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
       error("MPI_Waitany failed (%s).", buff);
     }
     if (pid == MPI_UNDEFINED) break;
-    // message( "request from proxy %i has arrived." , pid );
-    if (reqs_in[pid & ~1] == MPI_REQUEST_NULL &&
-        reqs_in[pid | 1] == MPI_REQUEST_NULL) {
+    // message( "request from proxy %i has arrived." , pid / 3 );
+    pid = 3 * (pid / 3);
+
+    /* If all the requests for a given proxy have arrived... */
+    if (reqs_in[pid + 0] == MPI_REQUEST_NULL &&
+        reqs_in[pid + 1] == MPI_REQUEST_NULL &&
+        reqs_in[pid + 2] == MPI_REQUEST_NULL) {
+      /* Copy the particle data to the part/xpart/gpart arrays. */
       struct proxy *p = &e->proxies[pid >> 1];
-      memcpy(&s->parts[offset + count], p->parts_in,
+      memcpy(&s->parts[offset_parts + count_parts], p->parts_in,
              sizeof(struct part) * p->nr_parts_in);
-      memcpy(&s->xparts[offset + count], p->xparts_in,
+      memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in,
              sizeof(struct xpart) * p->nr_parts_in);
+      memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in,
+             sizeof(struct gpart) * p->nr_gparts_in);
       /* for (int k = offset; k < offset + count; k++)
          message(
             "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.",
             s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1],
             s->parts[k].x[2], s->parts[k].h, p->nodeID); */
-      count += p->nr_parts_in;
+
+      /* Re-link the gparts. */
+      for (int k = 0; k < p->nr_gparts_in; k++) {
+        struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k];
+        if (gp->id >= 0) {
+          struct part *p = &s->parts[offset_gparts + count_parts + gp->id];
+          gp->part = p;
+          p->gpart = gp;
+        }
+      }
+
+      /* Advance the counters. */
+      count_parts += p->nr_parts_in;
+      count_gparts += p->nr_gparts_in;
     }
   }
 
   /* Wait for all the sends to have finished too. */
   if (nr_out > 0)
-    if (MPI_Waitall(2 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
+    if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
         MPI_SUCCESS)
       error("MPI_Waitall on sends failed.");
 
@@ -746,11 +1034,11 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
             clocks_getunit());
 
   /* Return the number of harvested parts. */
-  return count;
+  *Npart = count_parts;
+  *Ngpart = count_gparts;
 
 #else
   error("SWIFT was not compiled with MPI support.");
-  return 0;
 #endif
 }
 
@@ -1288,6 +1576,7 @@ int engine_marktasks(struct engine *e) {
       else if (t->type == task_type_kick) {
         t->skip = (t->ci->ti_end_min > ti_end);
         t->ci->updated = 0;
+        t->ci->g_updated = 0;
       }
 
       /* Drift? */
@@ -1344,6 +1633,7 @@ void engine_print_task_counts(struct engine *e) {
   printf(" skipped=%i ]\n", counts[task_type_count]);
   fflush(stdout);
   message("nr_parts = %zi.", e->s->nr_parts);
+  message("nr_gparts = %zi.", e->s->nr_gparts);
 }
 
 /**
@@ -1473,7 +1763,7 @@ void engine_collect_kick(struct cell *c) {
   if (c->kick != NULL) return;
 
   /* Counters for the different quantities. */
-  int updated = 0;
+  int updated = 0, g_updated = 0;
   double e_kin = 0.0, e_int = 0.0, e_pot = 0.0;
   float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f};
   int ti_end_min = max_nr_timesteps, ti_end_max = 0;
@@ -1496,6 +1786,7 @@ void engine_collect_kick(struct cell *c) {
         ti_end_min = min(ti_end_min, cp->ti_end_min);
         ti_end_max = max(ti_end_max, cp->ti_end_max);
         updated += cp->updated;
+        g_updated += cp->g_updated;
         e_kin += cp->e_kin;
         e_int += cp->e_int;
         e_pot += cp->e_pot;
@@ -1513,6 +1804,7 @@ void engine_collect_kick(struct cell *c) {
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
   c->updated = updated;
+  c->g_updated = g_updated;
   c->e_kin = e_kin;
   c->e_int = e_int;
   c->e_pot = e_pot;
@@ -1576,7 +1868,15 @@ void engine_init_particles(struct engine *e) {
 
   /* Make sure all particles are ready to go */
   /* i.e. clean-up any stupid state in the ICs */
-  space_map_cells_pre(s, 1, cell_init_parts, NULL);
+  if ((e->policy & engine_policy_hydro) == engine_policy_hydro) {
+    space_map_cells_pre(s, 1, cell_init_parts, NULL);
+  }
+  if (((e->policy & engine_policy_self_gravity) ==
+       engine_policy_self_gravity) ||
+      ((e->policy & engine_policy_external_gravity) ==
+       engine_policy_external_gravity)) {
+    space_map_cells_pre(s, 1, cell_init_gparts, NULL);
+  }
 
   engine_prepare(e);
 
@@ -1643,7 +1943,7 @@ void engine_init_particles(struct engine *e) {
  */
 void engine_step(struct engine *e) {
 
-  int updates = 0;
+  int updates = 0, g_updates = 0;
   int ti_end_min = max_nr_timesteps, ti_end_max = 0;
   double e_pot = 0.0, e_int = 0.0, e_kin = 0.0;
   float mom[3] = {0.0, 0.0, 0.0};
@@ -1670,6 +1970,7 @@ void engine_step(struct engine *e) {
       e_int += c->e_int;
       e_pot += c->e_pot;
       updates += c->updated;
+      g_updates += c->g_updated;
       mom[0] += c->mom[0];
       mom[1] += c->mom[1];
       mom[2] += c->mom[2];
@@ -1695,18 +1996,20 @@ void engine_step(struct engine *e) {
     ti_end_max = in_i[0];
   }
   {
-    double in_d[4], out_d[4];
+    double in_d[5], out_d[5];
     out_d[0] = updates;
-    out_d[1] = e_kin;
-    out_d[2] = e_int;
-    out_d[3] = e_pot;
-    if (MPI_Allreduce(out_d, in_d, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) !=
+    out_d[1] = g_updates;
+    out_d[2] = e_kin;
+    out_d[3] = e_int;
+    out_d[4] = e_pot;
+    if (MPI_Allreduce(out_d, in_d, 5, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) !=
         MPI_SUCCESS)
       error("Failed to aggregate energies.");
     updates = in_d[0];
-    e_kin = in_d[1];
-    e_int = in_d[2];
-    e_pot = in_d[3];
+    g_updates = in_d[1];
+    e_kin = in_d[2];
+    e_int = in_d[3];
+    e_pot = in_d[4];
   }
 #endif
 
@@ -1731,8 +2034,8 @@ void engine_step(struct engine *e) {
   if (e->nodeID == 0) {
 
     /* Print some information to the screen */
-    printf("%d %e %e %d %.3f\n", e->step, e->time, e->timeStep, updates,
-           e->wallclock_time);
+    printf("%d %e %e %d %d %.3f\n", e->step, e->time, e->timeStep, updates,
+           g_updates, e->wallclock_time);
     fflush(stdout);
 
     /* Write some energy statistics */
@@ -1934,7 +2237,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   engine_makeproxies(e);
 
   /* Re-allocate the local parts. */
-  if (e->nodeID == 0)
+  if (e->verbose)
     message("Re-allocating parts array from %zi to %zi.", s->size_parts,
             (size_t)(s->nr_parts * 1.2));
   s->size_parts = s->nr_parts * 1.2;
@@ -1942,7 +2245,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   struct xpart *xparts_new = NULL;
   if (posix_memalign((void **)&parts_new, part_align,
                      sizeof(struct part) * s->size_parts) != 0 ||
-      posix_memalign((void **)&xparts_new, part_align,
+      posix_memalign((void **)&xparts_new, xpart_align,
                      sizeof(struct xpart) * s->size_parts) != 0)
     error("Failed to allocate new part data.");
   memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts);
@@ -1951,6 +2254,50 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   free(s->xparts);
   s->parts = parts_new;
   s->xparts = xparts_new;
+
+  /* Re-link the gparts. */
+  for (size_t k = 0; k < s->nr_parts; k++)
+    if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
+
+  /* Re-allocate the local gparts. */
+  if (e->verbose)
+    message("Re-allocating gparts array from %zi to %zi.", s->size_gparts,
+            (size_t)(s->nr_gparts * 1.2));
+  s->size_gparts = s->nr_gparts * 1.2;
+  struct gpart *gparts_new = NULL;
+  if (posix_memalign((void **)&gparts_new, gpart_align,
+                     sizeof(struct gpart) * s->size_gparts) != 0)
+    error("Failed to allocate new gpart data.");
+  memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts);
+  free(s->gparts);
+  s->gparts = gparts_new;
+
+  /* Re-link the parts. */
+  for (size_t k = 0; k < s->nr_gparts; k++)
+    if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k];
+
+  /* Verify that the links are correct */
+  /* MATTHIEU: To be commented out once we are happy */
+  for (size_t k = 0; k < s->nr_gparts; ++k) {
+
+    if (s->gparts[k].id > 0) {
+
+      if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !");
+
+      if (s->gparts[k].x[0] != s->gparts[k].part->x[0] ||
+          s->gparts[k].x[1] != s->gparts[k].part->x[1] ||
+          s->gparts[k].x[2] != s->gparts[k].part->x[2])
+        error("Linked particles are not at the same position !");
+    }
+  }
+  for (size_t k = 0; k < s->nr_parts; ++k) {
+
+    if (s->parts[k].gpart != NULL) {
+
+      if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !");
+    }
+  }
+
 #else
   error("SWIFT was not compiled with MPI support.");
 #endif
@@ -2126,6 +2473,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
 
   /* Print information about the hydro scheme */
   if (e->nodeID == 0) message("Hydrodynamic scheme: %s", SPH_IMPLEMENTATION);
+  if (e->nodeID == 0) message("Hydrodynamic kernel: %s", kernel_name);
 
   /* Check we have sensible time bounds */
   if (timeBegin >= timeEnd)
@@ -2134,10 +2482,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
         "(t_beg = %e)",
         timeEnd, timeBegin);
 
-  /* Check we have sensible time step bounds */
+  /* Check we have sensible time-step values */
   if (e->dt_min > e->dt_max)
     error(
-        "Minimal time step size must be smaller than maximal time step size ");
+        "Minimal time-step size (%e) must be smaller than maximal time-step "
+        "size (%e)",
+        e->dt_min, e->dt_max);
 
   /* Deal with timestep */
   e->timeBase = (timeEnd - timeBegin) / max_nr_timesteps;
@@ -2183,8 +2533,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
 
 /* Construct types for MPI communications */
 #ifdef WITH_MPI
-  part_create_mpi_type(&e->part_mpi_type);
-  xpart_create_mpi_type(&e->xpart_mpi_type);
+  part_create_mpi_types();
 #endif
 
   /* First of all, init the barrier and lock it. */
diff --git a/src/engine.h b/src/engine.h
index 741ae1f553494e435394f529606b4cb794b0e3d2..4d1860b9eed0203bf9bf75711ec6e6549d837fe7 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -62,6 +62,8 @@ extern const char *engine_policy_names[];
 #define engine_maxtaskspercell 96
 #define engine_maxproxies 64
 #define engine_tasksreweight 10
+#define engine_parts_size_grow 1.05
+#define engine_redistribute_alloc_margin 1.2
 
 /* The rank of the engine as a global variable (for messages). */
 extern int engine_rank;
@@ -160,12 +162,6 @@ struct engine {
 
   /* Are we talkative ? */
   int verbose;
-
-#ifdef WITH_MPI
-  /* MPI data type for the particle transfers */
-  MPI_Datatype part_mpi_type;
-  MPI_Datatype xpart_mpi_type;
-#endif
 };
 
 /* Function prototypes. */
@@ -182,7 +178,9 @@ void engine_init_particles(struct engine *e);
 void engine_step(struct engine *e);
 void engine_maketasks(struct engine *e);
 void engine_split(struct engine *e, struct partition *initial_partition);
-int engine_exchange_strays(struct engine *e, int offset, size_t *ind, size_t N);
+void engine_exchange_strays(struct engine *e, size_t offset_parts,
+                            int *ind_part, size_t *Npart, size_t offset_gparts,
+                            int *ind_gpart, size_t *Ngpart);
 void engine_rebuild(struct engine *e);
 void engine_repartition(struct engine *e);
 void engine_makeproxies(struct engine *e);
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index 4458e4012c5ea343295a97903214f732f5b8a5a7..8fcfe6490c539732b885a9c102f011aebb0aaeff 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -22,35 +22,43 @@
 /**
  * @brief Computes the gravity time-step of a given particle
  *
- * @param p Pointer to the particle data
- * @param xp Pointer to the extended particle data
+ * @param gp Pointer to the g-particle data
  *
  */
 
-__attribute__((always_inline)) INLINE static float gravity_compute_timestep(
-    struct part* p, struct xpart* xp) {
+__attribute__((always_inline))
+    INLINE static float gravity_compute_timestep(struct gpart* gp) {
 
   /* Currently no limit is imposed */
   return FLT_MAX;
 }
 
-__attribute__((always_inline)) INLINE static void gravity_init_gpart(struct gpart* g) {
-  
-  /* zero gravity acceleration */
-  g->a_grav_external[0] = 0.f;
-  g->a_grav_external[1] = 0.f;
-  g->a_grav_external[2] = 0.f;
-
-}
-
+/**
+ * @brief Initialises the g-particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void gravity_first_init_gpart(struct gpart* gp) {}
 
+/**
+ * @brief Prepares a g-particle for the gravity calculation
+ *
+ * Zeroes all the relevant arrays in preparation for the sums taking place in
+ * the variaous tasks
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void gravity_init_part(struct gpart* gp) {
 
-__attribute__((always_inline)) INLINE static float external_gravity_compute_timestep(struct gpart* g) {
-  float dtmin = FLT_MAX;
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-  dtmin = fminf(dtmin, external_gravity_pointmass_timestep(g));
-#endif
-  return dtmin;
+  /* Zero the acceleration */
+  gp->a_grav[0] = 0.f;
+  gp->a_grav[1] = 0.f;
+  gp->a_grav[2] = 0.f;
 }
 
 __attribute__((always_inline)) INLINE static void external_gravity(struct gpart *g)
@@ -59,4 +67,12 @@ __attribute__((always_inline)) INLINE static void external_gravity(struct gpart
   external_gravity_pointmass(g);
 #endif
 
-}
+/**
+ * @brief Kick the additional variables
+ *
+ * @param gp The particle to act upon
+ * @param dt The time-step for this kick
+ * @param half_dt The half time-step for this kick
+ */
+__attribute__((always_inline)) INLINE static void gravity_kick_extra(
+    struct gpart* gp, float dt, float half_dt) {}
diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h
index 98e0c40a5700b4da70f27fb0955592bb5d2287c3..654745bfeb70dddba772af9e23797713376377a7 100644
--- a/src/gravity/Default/gravity_debug.h
+++ b/src/gravity/Default/gravity_debug.h
@@ -24,5 +24,5 @@ __attribute__((always_inline))
       "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "mass=%.3e t_begin=%d, t_end=%d\n",
       p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2],
-      p->a[0], p->a[1], p->a[2], p->mass, p->ti_begin, p->ti_end);
+      p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, p->ti_end);
 }
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index 56da9039ff0d19825f880f765e598fcd62bff516..a9924dab275dbcf9e689dbb3a3f6627b9a2471e7 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -50,8 +50,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav(
   /* Aggregate the accelerations. */
   for (k = 0; k < 3; k++) {
     w = acc * dx[k];
-    pi->a[k] -= w * mj;
-    pj->a[k] += w * mi;
+    pi->a_grav[k] -= w * mj;
+    pj->a_grav[k] += w * mi;
   }
 }
 
@@ -101,8 +101,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_grav(
     ai.v = w.v * mj.v;
     aj.v = w.v * mi.v;
     for (j = 0; j < VEC_SIZE; j++) {
-      pi[j]->a[k] -= ai.f[j];
-      pj[j]->a[k] += aj.f[j];
+      pi[j]->a_grav[k] -= ai.f[j];
+      pj[j]->a_grav[k] += aj.f[j];
     }
   }
 
diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h
index 9e096d5138aa75ca6de946f2d1140500b7d6c355..47aadd19585a71d04443403245cc76e28f46d80b 100644
--- a/src/gravity/Default/gravity_part.h
+++ b/src/gravity/Default/gravity_part.h
@@ -29,7 +29,7 @@ struct gpart {
   float v_full[3];
 
   /* Particle acceleration. */
-  float a[3];
+  float a_grav[3];
 
   /* Gravity acceleration */
   float a_grav_external[3];
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index d31b6be383b80a2698b63d27308f6fee9b23518f..09f796a8f37a9c015135f4aab3f821c2e862bdc9 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -93,8 +93,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
   dv[2] = pi->v[2] - pj->v[2];
   const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2];
 
-  pi->div_v += faci * dvdr;
-  pj->div_v += facj * dvdr;
+  pi->div_v -= faci * dvdr;
+  pj->div_v -= facj * dvdr;
 
   /* Compute dv cross r */
   curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1];
@@ -211,10 +211,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   /* Balsara term */
   const float balsara_i =
       fabsf(pi->div_v) /
-      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi);
+      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi);
   const float balsara_j =
       fabsf(pj->div_v) /
-      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj);
+      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj);
 
   /* Are the particles moving towards each others ? */
   const float omega_ij = fminf(dvdr, 0.f);
@@ -309,10 +309,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   /* Balsara term */
   const float balsara_i =
       fabsf(pi->div_v) /
-      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi);
+      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi);
   const float balsara_j =
       fabsf(pj->div_v) /
-      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj);
+      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj);
 
   /* Are the particles moving towards each others ? */
   const float omega_ij = fminf(dvdr, 0.f);
diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h
index 6afb9d8d38a4fc7f1d38b7286720ddb7f3c51ab4..b3b81a9a0dfe41e7bfafe51050d6f7cf7157e31c 100644
--- a/src/hydro/Minimal/hydro_iact.h
+++ b/src/hydro/Minimal/hydro_iact.h
@@ -16,8 +16,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
-#ifndef SWIFT_RUNNER_IACT_H
-#define SWIFT_RUNNER_IACT_H
+#ifndef SWIFT_RUNNER_IACT_MINIMAL_H
+#define SWIFT_RUNNER_IACT_MINIMAL_H
 
 /* Includes. */
 #include "const.h"
@@ -38,33 +38,31 @@
 __attribute__((always_inline)) INLINE static void runner_iact_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
-  float r = sqrtf(r2);
-  float xi, xj;
-  float h_inv;
   float wi, wj, wi_dx, wj_dx;
-  float mi, mj;
+
+  const float r = sqrtf(r2);
 
   /* Get the masses. */
-  mi = pi->mass;
-  mj = pj->mass;
+  const float mi = pi->mass;
+  const float mj = pj->mass;
 
   /* Compute density of pi. */
-  h_inv = 1.0 / hi;
-  xi = r * h_inv;
+  const float hi_inv = 1.f / hi;
+  const float xi = r * hi_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
   pi->rho += mj * wi;
-  pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx);
+  pi->rho_dh -= mj * (3.f * wi + xi * wi_dx);
   pi->density.wcount += wi;
   pi->density.wcount_dh -= xi * wi_dx;
 
   /* Compute density of pj. */
-  h_inv = 1.f / hj;
-  xj = r * h_inv;
+  const float hj_inv = 1.f / hj;
+  const float xj = r * hj_inv;
   kernel_deval(xj, &wj, &wj_dx);
 
   pj->rho += mi * wj;
-  pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx);
+  pj->rho_dh -= mi * (3.f * wj + xj * wj_dx);
   pj->density.wcount += wj;
   pj->density.wcount_dh -= xj * wj_dx;
 }
@@ -76,24 +74,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
-  float r;
-  float xi;
-  float h_inv;
   float wi, wi_dx;
-  float mj;
 
   /* Get the masses. */
-  mj = pj->mass;
+  const float mj = pj->mass;
 
   /* Get r and r inverse. */
-  r = sqrtf(r2);
+  const float r = sqrtf(r2);
 
-  h_inv = 1.f / hi;
-  xi = r * h_inv;
+  const float h_inv = 1.f / hi;
+  const float xi = r * h_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
   pi->rho += mj * wi;
-  pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx);
+  pi->rho_dh -= mj * (3.f * wi + xi * wi_dx);
   pi->density.wcount += wi;
   pi->density.wcount_dh -= xi * wi_dx;
 }
@@ -148,7 +142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   /* Compute sound speeds */
   const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi);
   const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj);
-  float v_sig = ci + cj + 3.f * omega_ij;
+  const float v_sig = ci + cj + 3.f * omega_ij;
 
   /* SPH acceleration term */
   const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv;
@@ -225,7 +219,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   /* Compute sound speeds */
   const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi);
   const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj);
-  float v_sig = ci + cj + 3.f * omega_ij;
+  const float v_sig = ci + cj + 3.f * omega_ij;
 
   /* SPH acceleration term */
   const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv;
@@ -245,4 +239,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
 }
 
-#endif /* SWIFT_RUNNER_IACT_H */
+#endif /* SWIFT_RUNNER_IACT_MINIMAL_H */
diff --git a/src/multipole.h b/src/multipole.h
index 91ba6df965ce9d3b088d538411b7f0a8555ba0e4..b7c20ddff5c3f1afc00af501a53b9659c8728ce8 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -127,7 +127,7 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mp(
 
 /* Compute the forces on both multipoles. */
 #if multipole_order == 1
-  for (k = 0; k < 3; k++) p->a[k] += dx[k] * acc;
+  for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc;
 #else
 #error( "Multipoles of order %i not yet implemented." , multipole_order )
 #endif
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000000000000000000000000000000000000..06dc819842d54d952704e4e0c40ebec5b561f691
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,265 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+/* Needs to be included so that strtok returns char * instead of a int *. */
+#include <string.h>
+#include <stdlib.h>
+
+/* This object's header. */
+#include "parser.h"
+
+/* Local headers. */
+#include "error.h"
+
+/* Private functions. */
+static int count_char(char *str, char val);
+static void parse_line(FILE *fp, struct swift_params *params);
+
+/**
+ * @brief Reads an input file and stores each parameter in a structure.
+ *
+ * @param file_name Name of file to be read
+ * @param params Structure to be populated from file
+ */
+
+void parser_read_file(const char *file_name, struct swift_params *params) {
+
+  FILE *fp;
+
+  params->count = 0;
+
+  /* Open file for reading */
+  fp = fopen(file_name, "r");
+
+  if (fp == NULL) {
+    error("Error opening parameter file: %s", file_name);
+  }
+
+  /* Read until the end of the file is reached.*/
+  while (!feof(fp)) {
+    parse_line(fp, params);
+  }
+
+  fclose(fp);
+}
+
+/**
+ * @brief Counts the number of times a specific character appears in a string.
+ *
+ * @param str String to be checked
+ * @param val Character to be counted
+ */
+
+static int count_char(char *str, char val) {
+
+  int count = 0;
+
+  /* Check if the line contains the character */
+  while (*str) {
+    if (*str++ == val) ++count;
+  }
+
+  return count;
+}
+
+/**
+ * @brief Parses a line from a file and stores any parameters in a structure.
+ *
+ * @param fp File pointer to file to be read
+ * @param params Structure to be populated from file
+ *
+ */
+
+static void parse_line(FILE *fp, struct swift_params *params) {
+
+  char line[PARSER_MAX_LINE_SIZE];
+  char trim_line[PARSER_MAX_LINE_SIZE];
+
+  /* Read a line of the file */
+  if (fgets(line, PARSER_MAX_LINE_SIZE, fp) != NULL) {
+
+    char *token;
+    /* Remove comments */
+    token = strtok(line, PARSER_COMMENT_CHAR);
+    strcpy(trim_line, token);
+
+    /* Check if the line contains a value */
+    if (strchr(trim_line, PARSER_VALUE_CHAR)) {
+      /* Check for more than one parameter on the same line. */
+      if (count_char(trim_line, PARSER_VALUE_CHAR) > 1) {
+        error("Found more than one parameter in '%s', only one allowed.", line);
+      } else {
+        /* Take first token as the parameter name. */
+        token = strtok(trim_line, PARSER_VALUE_STRING);
+        strcpy(params->data[params->count].name, token);
+
+        /* Take second token as the parameter value. */
+        token = strtok(NULL, " #\n");
+        strcpy(params->data[params->count++].value, token);
+      }
+    }
+  }
+}
+
+/**
+ * @brief Retrieve integer parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @param retParam Value of the parameter found
+ *
+ */
+
+void parser_get_param_int(struct swift_params *params, char *name,
+                          int *retParam) {
+
+  char str[128];
+
+  for (int i = 0; i < params->count; i++) {
+
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%d%s", retParam, str) != 1) {
+        error(
+            "Tried parsing int '%s' but found '%s' with illegal integer "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+
+      return;
+    }
+  }
+
+  message("Cannot find '%s' in the structure.", name);
+}
+
+/**
+ * @brief Retrieve float parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @param retParam Value of the parameter found
+ *
+ */
+
+void parser_get_param_float(struct swift_params *params, char *name,
+                            float *retParam) {
+
+  char str[128];
+
+  for (int i = 0; i < params->count; i++) {
+
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%f%s", retParam, str) != 1) {
+        error(
+            "Tried parsing float '%s' but found '%s' with illegal float "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+
+      return;
+    }
+  }
+
+  message("Cannot find '%s' in the structure.", name);
+}
+
+/**
+ * @brief Retrieve double parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @param retParam Value of the parameter found
+ *
+ */
+
+void parser_get_param_double(struct swift_params *params, char *name,
+                             double *retParam) {
+
+  char str[128];
+
+  for (int i = 0; i < params->count; i++) {
+
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%lf", retParam) != 1) {
+        error(
+            "Tried parsing double '%s' but found '%s' with illegal double "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+
+      return;
+    }
+  }
+
+  message("Cannot find '%s' in the structure.", name);
+}
+
+/**
+ * @brief Retrieve string parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @param retParam Value of the parameter found
+ *
+ */
+
+void parser_get_param_string(struct swift_params *params, char *name,
+                             char *retParam) {
+
+  for (int i = 0; i < params->count; i++) {
+
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+      strcpy(retParam, params->data[i].value);
+      return;
+    }
+  }
+}
+
+/**
+ * @brief Prints the contents of the parameter structure.
+ *
+ * @param params Structure that holds the parameters
+ *
+ */
+
+void parser_print_params(struct swift_params *params) {
+
+  printf("\n--------------------------\n");
+  printf("|  SWIFT Parameter File  |\n");
+  printf("--------------------------\n");
+
+  for (int i = 0; i < params->count; i++) {
+    printf("Parameter name: %s\n", params->data[i].name);
+    printf("Parameter value: %s\n", params->data[i].value);
+  }
+}
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000000000000000000000000000000000000..2fb4148944cd423da016341744cb6d58e222182e
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_PARSER_H
+#define SWIFT_PARSER_H
+
+#include <stdio.h>
+
+#define PARSER_MAX_LINE_SIZE 128
+#define PARSER_MAX_NO_OF_PARAMS 512
+
+#define PARSER_COMMENT_CHAR "#"
+#define PARSER_VALUE_CHAR ':'
+#define PARSER_VALUE_STRING ":"
+#define PARSER_END_OF_FILE "..."
+
+struct parameter {
+  char name[PARSER_MAX_LINE_SIZE];
+  char value[PARSER_MAX_LINE_SIZE];
+};
+
+struct swift_params {
+  struct parameter data[PARSER_MAX_NO_OF_PARAMS];
+  int count;
+};
+
+/* Public API. */
+void parser_read_file(const char *file_name, struct swift_params *params);
+void parser_print_params(struct swift_params *params);
+void parser_get_param_int(struct swift_params *params, char *name,
+                          int *retParam);
+void parser_get_param_float(struct swift_params *params, char *name,
+                            float *retParam);
+void parser_get_param_double(struct swift_params *params, char *name,
+                             double *retParam);
+void parser_get_param_string(struct swift_params *params, char *name,
+                             char *retParam);
+
+#endif /* SWIFT_PARSER_H */
diff --git a/src/part.c b/src/part.c
index 6a99325ef23a7062fafb387fa3f3bd6b2203d057..b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21 100644
--- a/src/part.c
+++ b/src/part.c
@@ -26,33 +26,21 @@
 #endif
 
 /* This object's header. */
+#include "error.h"
 #include "part.h"
 
 #ifdef WITH_MPI
-/**
- * @brief Registers and returns an MPI type for the particles
- *
- * @param part_type The type container
- */
-void part_create_mpi_type(MPI_Datatype* part_type) {
-
-  /* This is not the recommended way of doing this.
-     One should define the structure field by field
-     But as long as we don't do serialization via MPI-IO
-     we don't really care.
-     Also we would have to modify this function everytime something
-     is added to the part structure. */
-  MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE,
-                      part_type);
-  MPI_Type_commit(part_type);
-}
+/* MPI data type for the particle transfers */
+MPI_Datatype part_mpi_type;
+MPI_Datatype xpart_mpi_type;
+MPI_Datatype gpart_mpi_type;
+#endif
 
+#ifdef WITH_MPI
 /**
- * @brief Registers and returns an MPI type for the xparticles
- *
- * @param xpart_type The type container
+ * @brief Registers MPI particle types.
  */
-void xpart_create_mpi_type(MPI_Datatype* xpart_type) {
+void part_create_mpi_types() {
 
   /* This is not the recommended way of doing this.
      One should define the structure field by field
@@ -60,9 +48,20 @@ void xpart_create_mpi_type(MPI_Datatype* xpart_type) {
      we don't really care.
      Also we would have to modify this function everytime something
      is added to the part structure. */
-  MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE,
-                      xpart_type);
-  MPI_Type_commit(xpart_type);
+  if (MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE,
+                          &part_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&part_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for parts.");
+  }
+  if (MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char),
+                          MPI_BYTE, &xpart_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&xpart_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for xparts.");
+  }
+  if (MPI_Type_contiguous(sizeof(struct gpart) / sizeof(unsigned char),
+                          MPI_BYTE, &gpart_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for gparts.");
+  }
 }
-
 #endif
diff --git a/src/part.h b/src/part.h
index 2117840204dbc262faf0501b3c45a167a092d6de..e76a367448a032dc6fafd982f7c6e62decf15de1 100644
--- a/src/part.h
+++ b/src/part.h
@@ -35,8 +35,8 @@
 
 /* Some constants. */
 #define part_align 64
-#define gpart_align 32
 #define xpart_align 32
+#define gpart_align 32
 
 /* Import the right particle definition */
 #if defined(MINIMAL_SPH)
@@ -54,8 +54,12 @@
 #include "./gravity/Default/gravity_part.h"
 
 #ifdef WITH_MPI
-void part_create_mpi_type(MPI_Datatype* part_type);
-void xpart_create_mpi_type(MPI_Datatype* xpart_type);
+/* MPI data type for the particle transfers */
+extern MPI_Datatype part_mpi_type;
+extern MPI_Datatype xpart_mpi_type;
+extern MPI_Datatype gpart_mpi_type;
+
+void part_create_mpi_types();
 #endif
 
 #endif /* SWIFT_PART_H */
diff --git a/src/proxy.c b/src/proxy.c
index f58847988946c347367a0f172048f8cc96f26914..02263a5653bdcdd2d1bf0a86523ed1a599d4bf21 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -189,20 +189,21 @@ void proxy_parts_exch1(struct proxy *p) {
 #ifdef WITH_MPI
 
   /* Send the number of particles. */
-  if (MPI_Isend(&p->nr_parts_out, 1, MPI_INT, p->nodeID,
+  p->buff_out[0] = p->nr_parts_out;
+  p->buff_out[1] = p->nr_gparts_out;
+  if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID,
                 p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD,
                 &p->req_parts_count_out) != MPI_SUCCESS)
     error("Failed to isend nr of parts.");
-  // message( "isent particle count (%i) from node %i to node %i." ,
-  // p->nr_parts_out , p->mynodeID , p->nodeID ); fflush(stdout);
+  /* message( "isent particle counts [%i, %i] from node %i to node %i." ,
+  p->buff_out[0], p->buff_out[1], p->mynodeID , p->nodeID ); fflush(stdout); */
 
   /* Send the particle buffers. */
   if (p->nr_parts_out > 0) {
-    if (MPI_Isend(p->parts_out, sizeof(struct part) * p->nr_parts_out, MPI_BYTE,
-                  p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_parts,
+    if (MPI_Isend(p->parts_out, p->nr_parts_out, part_mpi_type, p->nodeID,
+                  p->mynodeID * proxy_tag_shift + proxy_tag_parts,
                   MPI_COMM_WORLD, &p->req_parts_out) != MPI_SUCCESS ||
-        MPI_Isend(p->xparts_out, sizeof(struct xpart) * p->nr_parts_out,
-                  MPI_BYTE, p->nodeID,
+        MPI_Isend(p->xparts_out, p->nr_parts_out, xpart_mpi_type, p->nodeID,
                   p->mynodeID * proxy_tag_shift + proxy_tag_xparts,
                   MPI_COMM_WORLD, &p->req_xparts_out) != MPI_SUCCESS)
       error("Failed to isend part data.");
@@ -213,14 +214,20 @@ void proxy_parts_exch1(struct proxy *p) {
               p->parts_out[k].id, p->parts_out[k].x[0], p->parts_out[k].x[1],
               p->parts_out[k].x[2], p->parts_out[k].h, p->nodeID);*/
   }
+  if (p->nr_gparts_out > 0) {
+    if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID,
+                  p->mynodeID * proxy_tag_shift + proxy_tag_gparts,
+                  MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS)
+      error("Failed to isend part data.");
+    // message( "isent gpart data (%i) to node %i." , p->nr_parts_out ,
+    // p->nodeID ); fflush(stdout);
+  }
 
   /* Receive the number of particles. */
-  if (MPI_Irecv(&p->nr_parts_in, 1, MPI_INT, p->nodeID,
+  if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID,
                 p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD,
                 &p->req_parts_count_in) != MPI_SUCCESS)
     error("Failed to irecv nr of parts.");
-// message( "irecv particle count on node %i from node %i." , p->mynodeID ,
-// p->nodeID ); fflush(stdout);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -231,6 +238,10 @@ void proxy_parts_exch2(struct proxy *p) {
 
 #ifdef WITH_MPI
 
+  /* Unpack the incomming parts counts. */
+  p->nr_parts_in = p->buff_in[0];
+  p->nr_gparts_in = p->buff_in[1];
+
   /* Is there enough space in the buffer? */
   if (p->nr_parts_in > p->size_parts_in) {
     do {
@@ -244,19 +255,36 @@ void proxy_parts_exch2(struct proxy *p) {
                                                p->size_parts_in)) == NULL)
       error("Failed to re-allocate parts_in buffers.");
   }
+  if (p->nr_gparts_in > p->size_gparts_in) {
+    do {
+      p->size_gparts_in *= proxy_buffgrow;
+    } while (p->nr_gparts_in > p->size_gparts_in);
+    free(p->gparts_in);
+    if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) *
+                                               p->size_gparts_in)) == NULL)
+      error("Failed to re-allocate gparts_in buffers.");
+  }
 
   /* Receive the particle buffers. */
   if (p->nr_parts_in > 0) {
-    if (MPI_Irecv(p->parts_in, sizeof(struct part) * p->nr_parts_in, MPI_BYTE,
-                  p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_parts,
-                  MPI_COMM_WORLD, &p->req_parts_in) != MPI_SUCCESS ||
-        MPI_Irecv(p->xparts_in, sizeof(struct xpart) * p->nr_parts_in, MPI_BYTE,
-                  p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_xparts,
+    if (MPI_Irecv(p->parts_in, p->nr_parts_in, part_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD,
+                  &p->req_parts_in) != MPI_SUCCESS ||
+        MPI_Irecv(p->xparts_in, p->nr_parts_in, xpart_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_xparts,
                   MPI_COMM_WORLD, &p->req_xparts_in) != MPI_SUCCESS)
       error("Failed to irecv part data.");
     // message( "irecv particle data (%i) from node %i." , p->nr_parts_in ,
     // p->nodeID ); fflush(stdout);
   }
+  if (p->nr_gparts_in > 0) {
+    if (MPI_Irecv(p->gparts_in, p->nr_gparts_in, gpart_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_gparts,
+                  MPI_COMM_WORLD, &p->req_gparts_in) != MPI_SUCCESS)
+      error("Failed to irecv gpart data.");
+    // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in ,
+    // p->nodeID ); fflush(stdout);
+  }
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -303,6 +331,37 @@ void proxy_parts_load(struct proxy *p, const struct part *parts,
   p->nr_parts_out += N;
 }
 
+/**
+ * @brief Load parts onto a proxy for exchange.
+ *
+ * @param p The #proxy.
+ * @param gparts Pointer to an array of #gpart to send.
+ * @param N The number of parts.
+ */
+
+void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) {
+
+  /* Is there enough space in the buffer? */
+  if (p->nr_gparts_out + N > p->size_gparts_out) {
+    do {
+      p->size_gparts_out *= proxy_buffgrow;
+    } while (p->nr_gparts_out + N > p->size_gparts_out);
+    struct gpart *tp;
+    if ((tp = (struct gpart *)malloc(sizeof(struct gpart) *
+                                     p->size_gparts_out)) == NULL)
+      error("Failed to re-allocate gparts_out buffers.");
+    memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out);
+    free(p->gparts_out);
+    p->gparts_out = tp;
+  }
+
+  /* Copy the parts and xparts data to the buffer. */
+  memcpy(&p->gparts_out[p->nr_gparts_out], gparts, sizeof(struct gpart) * N);
+
+  /* Increase the counters. */
+  p->nr_gparts_out += N;
+}
+
 /**
  * @brief Initialize the given proxy.
  *
@@ -352,4 +411,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) {
       error("Failed to allocate parts_out buffers.");
   }
   p->nr_parts_out = 0;
+
+  /* Allocate the gpart send and receive buffers, if needed. */
+  if (p->gparts_in == NULL) {
+    p->size_gparts_in = proxy_buffinit;
+    if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) *
+                                               p->size_gparts_in)) == NULL)
+      error("Failed to allocate gparts_in buffers.");
+  }
+  p->nr_gparts_in = 0;
+  if (p->gparts_out == NULL) {
+    p->size_gparts_out = proxy_buffinit;
+    if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) *
+                                                p->size_gparts_out)) == NULL)
+      error("Failed to allocate gparts_out buffers.");
+  }
+  p->nr_gparts_out = 0;
 }
diff --git a/src/proxy.h b/src/proxy.h
index 1a0272dd6c948df0130f733bb573477eeff1d0db..5a747187e05a78a109ce4523ebb3c9d5fe2ad717 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -32,7 +32,8 @@
 #define proxy_tag_count 0
 #define proxy_tag_parts 1
 #define proxy_tag_xparts 2
-#define proxy_tag_cells 3
+#define proxy_tag_gparts 3
+#define proxy_tag_cells 4
 
 /* Data structure for the proxy. */
 struct proxy {
@@ -53,14 +54,21 @@ struct proxy {
   /* The parts and xparts buffers for input and output. */
   struct part *parts_in, *parts_out;
   struct xpart *xparts_in, *xparts_out;
+  struct gpart *gparts_in, *gparts_out;
   int size_parts_in, size_parts_out;
   int nr_parts_in, nr_parts_out;
+  int size_gparts_in, size_gparts_out;
+  int nr_gparts_in, nr_gparts_out;
+
+  /* Buffer to hold the incomming/outgoing particle counts. */
+  int buff_out[2], buff_in[2];
 
 /* MPI request handles. */
 #ifdef WITH_MPI
   MPI_Request req_parts_count_out, req_parts_count_in;
   MPI_Request req_parts_out, req_parts_in;
   MPI_Request req_xparts_out, req_xparts_in;
+  MPI_Request req_gparts_out, req_gparts_in;
   MPI_Request req_cells_count_out, req_cells_count_in;
   MPI_Request req_cells_out, req_cells_in;
 #endif
@@ -70,6 +78,7 @@ struct proxy {
 void proxy_init(struct proxy *p, int mynodeID, int nodeID);
 void proxy_parts_load(struct proxy *p, const struct part *parts,
                       const struct xpart *xparts, int N);
+void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N);
 void proxy_parts_exch1(struct proxy *p);
 void proxy_parts_exch2(struct proxy *p);
 void proxy_addcell_in(struct proxy *p, struct cell *c);
diff --git a/src/runner.c b/src/runner.c
index cd4ed426f3318bc5aa981d9fa07ca4d17bf1ebfc..3341f3213261ff8d81625e9dc2103148b169bd32 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -171,7 +171,7 @@ void runner_dograv_external(struct runner *r, struct cell *c) {
 
 
 /**
- * @Brief Sort the entries in ascending order using QuickSort.
+ * @brief Sort the entries in ascending order using QuickSort.
  *
  * @param sort The entries
  * @param N The number of entries.
@@ -559,9 +559,10 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) {
 
 void runner_doinit(struct runner *r, struct cell *c, int timer) {
 
-  struct part *p, *parts = c->parts;
-  struct gpart *g, *gparts = c->gparts;
-  const int count = c->count, gcount = c->gcount;
+  struct part *const parts = c->parts;
+  struct gpart *const gparts = c->gparts;
+  const int count = c->count;
+  const int gcount = c->gcount;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC;
@@ -577,25 +578,25 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) {
     for (int i = 0; i < count; i++) {
 
       /* Get a direct pointer on the part. */
-      p = &parts[i];
+      struct part *const p = &parts[i];
 
       if (p->ti_end <= ti_current) {
 
         /* Get ready for a density calculation */
         hydro_init_part(p);
       }
-	 }
+    }
 
-    /* Loop over the parts in this cell. */
+    /* Loop over the gparts in this cell. */
     for (int i = 0; i < gcount; i++) {
 
       /* Get a direct pointer on the part. */
-      g = &gparts[i];
+      struct gpart *const gp = &gparts[i];
 
-      if (g->ti_end <= ti_current) {
+      if (gp->ti_end <= ti_current) {
 
         /* Get ready for a density calculation */
-        gravity_init_gpart(g);
+        gravity_init_part(gp);
       }
     }
   }
@@ -753,7 +754,7 @@ void runner_doghost(struct runner *r, struct cell *c) {
 }
 
 /**
- * @brief Drift particles forward in time
+ * @brief Drift particles and g-particles forward in time
  *
  * @param r The runner thread.
  * @param c The cell.
@@ -765,14 +766,12 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
   const int nr_gparts = c->gcount;
   const double timeBase = r->e->timeBase;
   const double dt = (r->e->ti_current - r->e->ti_old) * timeBase;
-  const float ti_old = r->e->ti_old;
-  const float ti_current = r->e->ti_current;
-  struct part *restrict p, *restrict parts = c->parts;
-  struct xpart *restrict xp, *restrict xparts = c->xparts;
-  struct gpart *restrict g,
-  *restrict gparts = c->gparts;
+  const int ti_old = r->e->ti_old;
+  const int ti_current = r->e->ti_current;
+  struct part *const parts = c->parts;
+  struct xpart *const xparts = c->xparts;
+  struct gpart *const gparts = c->gparts;
   float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
-  float w;
 
   TIMER_TIC
 #ifdef TASK_VERBOSE
@@ -782,12 +781,24 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
   /* No children? */
   if (!c->split) {
 
-    /* Loop over all the particles in the cell */
+    /* Loop over all the g-particles in the cell */
+    for (int k = 0; k < nr_gparts; ++k) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *const gp = &gparts[k];
+
+      /* Drift... */
+      gp->x[0] += gp->v_full[0] * dt;
+      gp->x[1] += gp->v_full[1] * dt;
+      gp->x[2] += gp->v_full[2] * dt;
+    }
+
+    /* Loop over all the particles in the cell (more work for these !) */
     for (int k = 0; k < nr_parts; k++) {
 
       /* Get a handle on the part. */
-      p = &parts[k];
-      xp = &xparts[k];
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
 
       /* Useful quantity */
       const float h_inv = 1.0f / p->h;
@@ -803,18 +814,18 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
       p->v[2] += p->a_hydro[2] * dt;
 
       /* Predict smoothing length */
-      w = p->h_dt * h_inv * dt;
-      if (fabsf(w) < 0.2f)
-        p->h *= approx_expf(w); /* 4th order expansion of exp(w) */
+      const float w1 = p->h_dt * h_inv * dt;
+      if (fabsf(w1) < 0.2f)
+        p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
       else
-        p->h *= expf(w);
+        p->h *= expf(w1);
 
       /* Predict density */
-      w = -3.0f * p->h_dt * h_inv * dt;
-      if (fabsf(w) < 0.2f)
-        p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */
+      const float w2 = -3.0f * p->h_dt * h_inv * dt;
+      if (fabsf(w2) < 0.2f)
+        p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
       else
-        p->rho *= expf(w);
+        p->rho *= expf(w2);
 
       /* Predict the values of the extra fields */
       hydro_predict_extra(p, xp, ti_old, ti_current, timeBase);
@@ -829,17 +840,6 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
       h_max = fmaxf(p->h, h_max);
     }
 
-    /* Loop over all gparts in the cell */
-    for (int k = 0; k < nr_gparts; k++) {
-      g = &gparts[k];
-      if (g->id == 0)
-        message(" dt= %f tx= %f tv=%f \n",dt, g->tx, g->tv);
-      g->x[0] += g->v_full[0] * dt;
-      g->x[1] += g->v_full[1] * dt;
-      g->x[2] += g->v_full[2] * dt;
-      g->tx   += dt;
-    }
-
     /* Now, get the maximal particle motion from its square */
     dx_max = sqrtf(dx2_max);
   }
@@ -882,21 +882,17 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
   const double timeBase_inv = 1.0 / r->e->timeBase;
   const int count = c->count;
   const int gcount = c->gcount;
+  struct part *const parts = c->parts;
+  struct xpart *const xparts = c->xparts;
+  struct gpart *const gparts = c->gparts;
   const int is_fixdt =
       (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt;
 
-  int new_dti;
-  int dti_timeline;
-
-  int updated = 0;
+  int updated = 0, g_updated = 0;
   int ti_end_min = max_nr_timesteps, ti_end_max = 0;
   double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0;
   float mom[3] = {0.0f, 0.0f, 0.0f};
   float ang[3] = {0.0f, 0.0f, 0.0f};
-  float x[3], v_full[3];
-  struct part *restrict p, *restrict parts = c->parts;
-  struct xpart *restrict xp, *restrict xparts = c->xparts;
-  struct gpart *restrict g, *restrict gparts = c->gparts;
 
   TIMER_TIC
 
@@ -907,17 +903,79 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
   /* No children? */
   if (!c->split) {
 
+    /* Loop over the g-particles and kick the active ones. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *const gp = &gparts[k];
+
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if (gp->id < 0 && (is_fixdt || gp->ti_end <= ti_current)) {
+
+        /* First, finish the force calculation */
+        gravity_end_force(gp);
+
+        /* Now we are ready to compute the next time-step size */
+        int new_dti;
+
+        if (is_fixdt) {
+
+          /* Now we have a time step, proceed with the kick */
+          new_dti = global_dt_max * timeBase_inv;
+
+        } else {
+
+          /* Compute the next timestep (gravity condition) */
+          float new_dt = gravity_compute_timestep(gp);
+
+          /* Limit timestep within the allowed range */
+          new_dt = fminf(new_dt, global_dt_max);
+          new_dt = fmaxf(new_dt, global_dt_min);
+
+          /* Convert to integer time */
+          new_dti = new_dt * timeBase_inv;
+
+          /* Recover the current timestep */
+          const int current_dti = gp->ti_end - gp->ti_begin;
+
+          /* Limit timestep increase */
+          if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);
+
+          /* Put this timestep on the time line */
+          int dti_timeline = max_nr_timesteps;
+          while (new_dti < dti_timeline) dti_timeline /= 2;
+
+          /* Now we have a time step, proceed with the kick */
+          new_dti = dti_timeline;
+        }
+
+        /* Compute the time step for this kick */
+        const int ti_start = (gp->ti_begin + gp->ti_end) / 2;
+        const int ti_end = gp->ti_end + new_dti / 2;
+        const double dt = (ti_end - ti_start) * timeBase;
+        const double half_dt = (ti_end - gp->ti_end) * timeBase;
+
+        /* Kick particles in momentum space */
+        gp->v_full[0] += gp->a_grav[0] * dt;
+        gp->v_full[1] += gp->a_grav[1] * dt;
+        gp->v_full[2] += gp->a_grav[2] * dt;
+
+        /* Extra kick work */
+        gravity_kick_extra(gp, dt, half_dt);
+
+        /* Number of updated g-particles */
+        g_updated++;
+      }
+    }
+
+    /* Now do the hydro ones... */
+
     /* Loop over the particles and kick the active ones. */
     for (int k = 0; k < count; k++) {
 
       /* Get a handle on the part. */
-      p = &parts[k];
-      xp = &xparts[k];
-
-      const float m = p->mass;
-      x[0] = p->x[0];
-      x[1] = p->x[1];
-      x[2] = p->x[2];
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
 
       /* If particle needs to be kicked */
       if (is_fixdt || p->ti_end <= ti_current) {
@@ -927,8 +985,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
 
         /* And do the same of the extra variable */
         hydro_end_force(p);
+        if (p->gpart != NULL) gravity_end_force(p->gpart);
 
         /* Now we are ready to compute the next time-step size */
+        int new_dti;
 
         if (is_fixdt) {
 
@@ -937,9 +997,13 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
 
         } else {
 
-          /* Compute the next timestep */
+          /* Compute the next timestep (hydro condition) */
           const float new_dt_hydro = hydro_compute_timestep(p, xp);
-          const float new_dt_grav = gravity_compute_timestep(p, xp);
+
+          /* Compute the next timestep (gravity condition) */
+          float new_dt_grav = FLT_MAX;
+          if (p->gpart != NULL)
+            new_dt_grav = gravity_compute_timestep(p->gpart);
 
           float new_dt = fminf(new_dt_hydro, new_dt_grav);
 
@@ -964,7 +1028,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
           if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);
 
           /* Put this timestep on the time line */
-          dti_timeline = max_nr_timesteps;
+          int dti_timeline = max_nr_timesteps;
           while (new_dti < dti_timeline) dti_timeline /= 2;
 
           /* Now we have a time step, proceed with the kick */
@@ -974,34 +1038,51 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
         /* Compute the time step for this kick */
         const int ti_start = (p->ti_begin + p->ti_end) / 2;
         const int ti_end = p->ti_end + new_dti / 2;
-        const float dt = (ti_end - ti_start) * timeBase;
-        const float half_dt = (ti_end - p->ti_end) * timeBase;
+        const double dt = (ti_end - ti_start) * timeBase;
+        const double half_dt = (ti_end - p->ti_end) * timeBase;
 
         /* Move particle forward in time */
         p->ti_begin = p->ti_end;
         p->ti_end = p->ti_begin + new_dti;
 
+        /* Get the acceleration */
+        float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]};
+        if (p->gpart != NULL) {
+          a_tot[0] += p->gpart->a_grav[0];
+          a_tot[1] += p->gpart->a_grav[1];
+          a_tot[1] += p->gpart->a_grav[2];
+        }
+
         /* Kick particles in momentum space */
-        xp->v_full[0] += p->a_hydro[0] * dt;
-        xp->v_full[1] += p->a_hydro[1] * dt;
-        xp->v_full[2] += p->a_hydro[2] * dt;
+        xp->v_full[0] += a_tot[0] * dt;
+        xp->v_full[1] += a_tot[1] * dt;
+        xp->v_full[2] += a_tot[2] * dt;
+
+        if (p->gpart != NULL) {
+          p->gpart->v_full[0] = xp->v_full[0];
+          p->gpart->v_full[1] = xp->v_full[1];
+          p->gpart->v_full[2] = xp->v_full[2];
+        }
 
-        p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0];
-        p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1];
-        p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2];
+        /* Go back by half-step for the hydro velocity */
+        p->v[0] = xp->v_full[0] - half_dt * a_tot[0];
+        p->v[1] = xp->v_full[1] - half_dt * a_tot[1];
+        p->v[2] = xp->v_full[2] - half_dt * a_tot[2];
 
         /* Extra kick work */
         hydro_kick_extra(p, xp, dt, half_dt);
+        if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt);
 
         /* Number of updated particles */
         updated++;
+        if (p->gpart != NULL) g_updated++;
       }
 
       /* Now collect quantities for statistics */
 
-      v_full[0] = xp->v_full[0];
-      v_full[1] = xp->v_full[1];
-      v_full[2] = xp->v_full[2];
+      const double x[3] = {p->x[0], p->x[1], p->x[2]};
+      const float v_full[3] = {xp->v_full[0], xp->v_full[1], xp->v_full[2]};
+      const float m = p->mass;
 
       /* Collect mass */
       mass += m;
@@ -1026,75 +1107,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
       ti_end_min = min(p->ti_end, ti_end_min);
       ti_end_max = max(p->ti_end, ti_end_max);
     }
-    /* For the moment we loop of gpart particles comppletely independently - this should be changed */
-
-    for(int k = 0; k < gcount; k++) {
-      g = &gparts[k];
-
-      x[0] = g->x[0];
-      x[1] = g->x[1];
-      x[2] = g->x[2];
-
-      /* If particle needs to be kicked */
-      if (is_fixdt || g->ti_end <= ti_current) {
-
-        if (is_fixdt) {
-
-          /* Now we have a time step, proceed with the kick */
-          new_dti = global_dt_max * timeBase_inv;
-
-        }
-        else {
-          /* need to calculate gravity step - todo */
-			 const float new_dt_external =  external_gravity_compute_timestep(g);
-			 
-          float new_dt = fminf(new_dt_external, global_dt_max);
-          new_dt = fmaxf(new_dt_external, global_dt_min);
-
-          /* Convert to integer time */
-          new_dti = new_dt * timeBase_inv;
-
-			 /* Recover the current timestep */
-          const int current_dti = g->ti_end - g->ti_begin;
-
-          /* Limit timestep increase */
-          if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);
-
-          /* Put this timestep on the time line */
-          dti_timeline = max_nr_timesteps;
-          while (new_dti < dti_timeline) dti_timeline /= 2;
-
-          /* Now we have a time step, proceed with the kick */
-          new_dti = dti_timeline;
-       }
-
-        /* Compute the time step for this kick */
-        const int ti_start = (g->ti_begin + g->ti_end) / 2;
-        const int ti_end = g->ti_end + new_dti / 2;
-        const float dt = (ti_end - ti_start) * timeBase;
-
-        /* Move particle forward in time */
-        g->ti_begin = g->ti_end;
-        g->ti_end   = g->ti_begin + new_dti;
-
-        if(g->id == 0)
-          message(" dt= %f tx= %f tv=%f \n",dt, g->tx, g->tv);
-
-        /* Kick particles in momentum space */
-        g->v_full[0] += g->a_grav_external[0] * dt;
-        g->v_full[1] += g->a_grav_external[1] * dt;
-        g->v_full[2] += g->a_grav_external[2] * dt;
-        g->tv        += dt;
-
-        /* Minimal time for next end of time-step */
-        ti_end_min = min(g->ti_end, ti_end_min);
-        ti_end_max = max(g->ti_end, ti_end_max);
 
-        /* Number of updated particles */
-        updated++;
-
-      }
-    }
   }
 
   /* Otherwise, aggregate data from children. */
@@ -1103,13 +1116,14 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
     /* Loop over the progeny. */
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
+        struct cell *const cp = c->progeny[k];
 
         /* Recurse */
         runner_dokick(r, cp, 0);
 
         /* And aggregate */
         updated += cp->updated;
+        g_updated += cp->g_updated;
         e_kin += cp->e_kin;
         e_int += cp->e_int;
         e_pot += cp->e_pot;
@@ -1127,6 +1141,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
 
   /* Store the values. */
   c->updated = updated;
+  c->g_updated = g_updated;
   c->e_kin = e_kin;
   c->e_int = e_int;
   c->e_pot = e_pot;
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index cf5d56e94169b44e6cd2974a3422a0bc5e4610ac..de339db6133fcc829bdc6ee0ce9e537b68982422 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1235,7 +1235,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 #else
 
           /* Does pi need to be updated too? */
-          if (pi->dt <= dt_step) {
+          if (pi->ti_end <= ti_current) {
 
             /* Add this interaction to the symmetric queue. */
             r2q2[icount2] = r2;
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index f374339da75e31b39a5295fcd8bbc23c34d8d67d..02626295a49f314fef840bc044a476f5c9cf332d 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -267,9 +267,9 @@ void runner_dograv_down(struct runner *r, struct cell *c) {
     /* Apply the multipole acceleration to all gparts. */
     for (int k = 0; k < c->gcount; k++) {
       struct gpart *p = &c->gparts[k];
-      p->a[0] += m->a[0];
-      p->a[1] += m->a[1];
-      p->a[2] += m->a[2];
+      p->a_grav[0] += m->a[0];
+      p->a_grav[1] += m->a[1];
+      p->a_grav[2] += m->a[2];
     }
   }
 }
@@ -594,5 +594,4 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj,
 
   if (gettimer) TIMER_TOC(timer_dosub_grav);
 }
-
 #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */
diff --git a/src/scheduler.c b/src/scheduler.c
index c7267197ad67abdd7983e8ea6c819016d31faf63..38a1cd8c663307e0c0378d8bec2e0cd3d8f37fa8 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -1096,7 +1096,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         break;
       case task_type_recv:
 #ifdef WITH_MPI
-        err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type,
+        err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
                         t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
         if (err != MPI_SUCCESS) {
           mpi_error(err, "Failed to emit irecv for particle data.");
@@ -1111,7 +1111,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         break;
       case task_type_send:
 #ifdef WITH_MPI
-        err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type,
+        err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
                         t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
         if (err != MPI_SUCCESS) {
           mpi_error(err, "Failed to emit isend for particle data.");
@@ -1352,12 +1352,6 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
   s->tasks = NULL;
   s->tasks_ind = NULL;
   scheduler_reset(s, nr_tasks);
-
-/* Construct types for MPI communications */
-#ifdef WITH_MPI
-  part_create_mpi_type(&s->part_mpi_type);
-  xpart_create_mpi_type(&s->xpart_mpi_type);
-#endif
 }
 
 /**
diff --git a/src/scheduler.h b/src/scheduler.h
index 9c2a5bd7307817bd536930200ad08e7e37502e08..64c694aea295c13810a20b626055fc6c15eb0af8 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -100,12 +100,6 @@ struct scheduler {
 
   /* The node we are working on. */
   int nodeID;
-
-#ifdef WITH_MPI
-  /* MPI data type for the particle transfers */
-  MPI_Datatype part_mpi_type;
-  MPI_Datatype xpart_mpi_type;
-#endif
 };
 
 /* Function prototypes. */
diff --git a/src/space.c b/src/space.c
index 1db5f6bc238364943de3ce951713cd9efa2fc929..353bab9eda5b775deb60623c721aad88998d1f58 100644
--- a/src/space.c
+++ b/src/space.c
@@ -312,7 +312,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
 void space_rebuild(struct space *s, double cell_max, int verbose) {
 
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   /* Be verbose about this. */
   message( "re)building space..." ); fflush(stdout);
@@ -324,23 +324,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   int nr_gparts = s->nr_gparts;
   struct cell *restrict cells = s->cells;
 
-  double ih[3], dim[3];
-  int cdim[3];
-  ih[0] = s->ih[0];
-  ih[1] = s->ih[1];
-  ih[2] = s->ih[2];
-  dim[0] = s->dim[0];
-  dim[1] = s->dim[1];
-  dim[2] = s->dim[2];
-  cdim[0] = s->cdim[0];
-  cdim[1] = s->cdim[1];
-  cdim[2] = s->cdim[2];
+  const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]};
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
 
   /* Run through the particles and get their cell index. */
   // tic = getticks();
   const size_t ind_size = s->size_parts;
-  size_t *ind;
-  if ((ind = (size_t *)malloc(sizeof(size_t) * ind_size)) == NULL)
+  int *ind;
+  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
     error("Failed to allocate temporary particle indices.");
   for (int k = 0; k < nr_parts; k++) {
     struct part *restrict p = &s->parts[k];
@@ -353,37 +345,91 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
       cells[ind[k]].count++;
   }
+  // message( "getting particle indices took %.3f %s." ,
+  // clocks_from_ticks(getticks() - tic), clocks_getunit()):
+
+  /* Run through the gravity particles and get their cell index. */
+  // tic = getticks();
+  const size_t gind_size = s->size_gparts;
+  int *gind;
+  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
+    error("Failed to allocate temporary g-particle indices.");
+  for (int k = 0; k < nr_gparts; k++) {
+    struct gpart *restrict gp = &s->gparts[k];
+    for (int j = 0; j < 3; j++)
+      if (gp->x[j] < 0.0)
+        gp->x[j] += dim[j];
+      else if (gp->x[j] >= dim[j])
+        gp->x[j] -= dim[j];
+    gind[k] =
+        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
+    cells[gind[k]].gcount++;
+  }
 // message( "getting particle indices took %.3f %s." ,
-// clocks_from_ticks(getticks() - tic), clocks_getunit()):
+// clocks_from_ticks(getticks() - tic), clocks_getunit());
 
 #ifdef WITH_MPI
   /* Move non-local parts to the end of the list. */
-  const int nodeID = s->e->nodeID;
+  const int local_nodeID = s->e->nodeID;
   for (int k = 0; k < nr_parts; k++)
-    if (cells[ind[k]].nodeID != nodeID) {
+    if (cells[ind[k]].nodeID != local_nodeID) {
       cells[ind[k]].count -= 1;
       nr_parts -= 1;
-      struct part tp = s->parts[k];
+      const struct part tp = s->parts[k];
       s->parts[k] = s->parts[nr_parts];
       s->parts[nr_parts] = tp;
-      struct xpart txp = s->xparts[k];
+      if (s->parts[k].gpart != NULL) {
+        s->parts[k].gpart->part = &s->parts[k];
+      }
+      if (s->parts[nr_parts].gpart != NULL) {
+        s->parts[nr_parts].gpart->part = &s->parts[nr_parts];
+      }
+      const struct xpart txp = s->xparts[k];
       s->xparts[k] = s->xparts[nr_parts];
       s->xparts[nr_parts] = txp;
-      int t = ind[k];
+      const int t = ind[k];
       ind[k] = ind[nr_parts];
       ind[nr_parts] = t;
     }
 
+  /* Move non-local gparts to the end of the list. */
+  for (int k = 0; k < nr_gparts; k++)
+    if (cells[gind[k]].nodeID != local_nodeID) {
+      cells[gind[k]].gcount -= 1;
+      nr_gparts -= 1;
+      const struct gpart tp = s->gparts[k];
+      s->gparts[k] = s->gparts[nr_gparts];
+      s->gparts[nr_gparts] = tp;
+      if (s->gparts[k].id > 0) {
+        s->gparts[k].part->gpart = &s->gparts[k];
+      }
+      if (s->gparts[nr_gparts].id > 0) {
+        s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts];
+      }
+      const int t = gind[k];
+      gind[k] = gind[nr_gparts];
+      gind[nr_gparts] = t;
+    }
+
   /* Exchange the strays, note that this potentially re-allocates
      the parts arrays. */
-  s->nr_parts =
-      nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts],
-                                        s->nr_parts - nr_parts);
+  /* TODO: This function also exchanges gparts, but this is shorted-out
+     until they are fully implemented. */
+  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
+  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
+  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
+                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);
+
+  /* Add post-processing, i.e. re-linking/creating of gparts here. */
+
+  /* Set the new particle counts. */
+  s->nr_parts = nr_parts + nr_parts_exchanged;
+  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
 
   /* Re-allocate the index array if needed.. */
   if (s->nr_parts > ind_size) {
-    size_t *ind_new;
-    if ((ind_new = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL)
+    int *ind_new;
+    if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
       error("Failed to allocate temporary particle indices.");
     memcpy(ind_new, ind, sizeof(size_t) * nr_parts);
     free(ind);
@@ -392,7 +438,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* Assign each particle to its cell. */
   for (int k = nr_parts; k < s->nr_parts; k++) {
-    struct part *p = &s->parts[k];
+    const struct part *const p = &s->parts[k];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
     cells[ind[k]].count += 1;
@@ -423,65 +469,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* We no longer need the indices as of here. */
   free(ind);
 
-  /* Run through the gravity particles and get their cell index. */
-  // tic = getticks();
-  const size_t gind_size = s->size_gparts;
-  size_t *gind;
-  if ((gind = (size_t *)malloc(sizeof(size_t) * gind_size)) == NULL)
-    error("Failed to allocate temporary g-particle indices.");
-  for (int k = 0; k < nr_gparts; k++) {
-    struct gpart *gp = &s->gparts[k];
-    for (int j = 0; j < 3; j++)
-      if (gp->x[j] < 0.0)
-        gp->x[j] += dim[j];
-      else if (gp->x[j] >= dim[j])
-        gp->x[j] -= dim[j];
-    gind[k] =
-        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    cells[gind[k]].gcount++;
-  }
-// message( "getting particle indices took %.3f %s." ,
-// clocks_from_ticks(getticks() - tic), clocks_getunit());
-
 #ifdef WITH_MPI
 
-  /* Move non-local gparts to the end of the list. */
-  for (int k = 0; k < nr_gparts; k++)
-    if (cells[ind[k]].nodeID != nodeID) {
-      cells[ind[k]].gcount -= 1;
-      nr_gparts -= 1;
-      struct gpart tp = s->gparts[k];
-      s->gparts[k] = s->gparts[nr_gparts];
-      s->gparts[nr_gparts] = tp;
-      int t = ind[k];
-      ind[k] = ind[nr_gparts];
-      ind[nr_gparts] = t;
-    }
-
-  /* Exchange the strays, note that this potentially re-allocates
-     the parts arrays. */
-  // s->nr_gparts =
-  //    nr_gparts + engine_exchange_strays(s->e, nr_gparts, &ind[nr_gparts],
-  //                                        s->nr_gparts - nr_gparts);
-  if (nr_gparts > 0)
-    error("Need to implement the exchange of strays for the gparts");
-
   /* Re-allocate the index array if needed.. */
   if (s->nr_gparts > gind_size) {
-    size_t *gind_new;
-    if ((gind_new = (size_t *)malloc(sizeof(size_t) * s->nr_gparts)) == NULL)
+    int *gind_new;
+    if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
       error("Failed to allocate temporary g-particle indices.");
-    memcpy(gind_new, gind, sizeof(size_t) * nr_gparts);
+    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
     free(gind);
     gind = gind_new;
   }
 
   /* Assign each particle to its cell. */
   for (int k = nr_gparts; k < s->nr_gparts; k++) {
-    struct gpart *p = &s->gparts[k];
+    const struct gpart *const p = &s->gparts[k];
     gind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[gind[k]].count += 1;
+    cells[gind[k]].gcount += 1;
     /* if ( cells[ ind[k] ].nodeID != nodeID )
         error( "Received part that does not belong to me (nodeID=%i)." , cells[
        ind[k] ].nodeID ); */
@@ -501,6 +506,28 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* We no longer need the indices as of here. */
   free(gind);
 
+  /* Verify that the links are correct */
+  /* MATTHIEU: To be commented out once we are happy */
+  for (size_t k = 0; k < nr_gparts; ++k) {
+
+    if (s->gparts[k].id > 0) {
+
+      if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !");
+
+      if (s->gparts[k].x[0] != s->gparts[k].part->x[0] ||
+          s->gparts[k].x[1] != s->gparts[k].part->x[1] ||
+          s->gparts[k].x[2] != s->gparts[k].part->x[2])
+        error("Linked particles are not at the same position !");
+    }
+  }
+  for (size_t k = 0; k < nr_parts; ++k) {
+
+    if (s->parts[k].gpart != NULL) {
+
+      if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !");
+    }
+  }
+
   /* Hook the cells up to the parts. */
   // tic = getticks();
   struct part *finger = s->parts;
@@ -536,7 +563,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
  */
 void space_split(struct space *s, struct cell *cells, int verbose) {
 
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   for (int k = 0; k < s->nr_cells; k++)
     scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k,
@@ -560,7 +587,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
  * @param verbose Are we talkative ?
  */
 
-void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
+void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose) {
 
   ticks tic = getticks();
@@ -608,7 +635,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
 void space_do_parts_sort() {
 
   /* Pointers to the sorting data. */
-  size_t *ind = space_sort_struct.ind;
+  int *ind = space_sort_struct.ind;
   struct part *parts = space_sort_struct.parts;
   struct xpart *xparts = space_sort_struct.xparts;
 
@@ -730,7 +757,7 @@ void space_do_parts_sort() {
   } /* main loop. */
 }
 
-void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
+void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min,
                        int max) {
 
   struct qstack {
diff --git a/src/space.h b/src/space.h
index 91485ff7e2ebe9da8ab927748589ae9f71320803..e761595838ae78b0d8a67cca676cfa59f3f700f6 100644
--- a/src/space.h
+++ b/src/space.h
@@ -64,9 +64,6 @@ struct space {
   /* The minimum and maximum cutoff radii. */
   double h_max, cell_min;
 
-  /* Current time step for particles. */
-  float dt_step;
-
   /* Current maximum displacement for particles. */
   float dx_max;
 
@@ -106,6 +103,8 @@ struct space {
   /* Buffers for parts that we will receive from foreign cells. */
   struct part *parts_foreign;
   size_t nr_parts_foreign, size_parts_foreign;
+  struct gpart *gparts_foreign;
+  size_t nr_gparts_foreign, size_gparts_foreign;
 };
 
 /* Interval stack necessary for parallel particle sorting. */
@@ -117,7 +116,7 @@ struct qstack {
 struct parallel_sort {
   struct part *parts;
   struct xpart *xparts;
-  size_t *ind;
+  int *ind;
   struct qstack *stack;
   unsigned int stack_size;
   volatile unsigned int first, last, waiting;
@@ -125,9 +124,9 @@ struct parallel_sort {
 extern struct parallel_sort space_sort_struct;
 
 /* function prototypes. */
-void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
+void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose);
-void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
+void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min,
                        int max);
 struct cell *space_getcell(struct space *s);
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
diff --git a/src/swift.h b/src/swift.h
index 9ab090dccd195ff4927d3e614e446b36d273f824..e568a28c888295affc9ec45b6d059d34f5b4bf04 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -27,7 +27,6 @@
 #include "cell.h"
 #include "clocks.h"
 #include "const.h"
-#include "const.h"
 #include "cycle.h"
 #include "debug.h"
 #include "engine.h"
@@ -38,7 +37,9 @@
 #include "map.h"
 #include "multipole.h"
 #include "parallel_io.h"
+#include "parser.h"
 #include "part.h"
+#include "partition.h"
 #include "queue.h"
 #include "runner.h"
 #include "scheduler.h"
@@ -47,9 +48,8 @@
 #include "space.h"
 #include "task.h"
 #include "timers.h"
-#include "units.h"
 #include "tools.h"
-#include "partition.h"
+#include "units.h"
 #include "version.h"
 
 #endif /* SWIFT_SWIFT_H */
diff --git a/src/tools.c b/src/tools.c
index 5feba7759f730faea1f38ceb9835f2076bc37a56..d25b7401a1e0515c650333b41193d54b5e155d39 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -236,6 +236,53 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) {
   }
 }
 
+void self_all_density(struct runner *r, struct cell *ci) {
+  float r2, hi, hj, hig2, hjg2, dxi[3];  //, dxj[3];
+  struct part *pi, *pj;
+
+  /* Implements a double-for loop and checks every interaction */
+  for (int i = 0; i < ci->count; ++i) {
+
+    pi = &ci->parts[i];
+    hi = pi->h;
+    hig2 = hi * hi * kernel_gamma2;
+
+    for (int j = i + 1; j < ci->count; ++j) {
+
+      pj = &ci->parts[j];
+      hj = pj->h;
+      hjg2 = hj * hj * kernel_gamma2;
+
+      if (pi == pj) continue;
+
+      /* Pairwise distance */
+      r2 = 0.0f;
+      for (int k = 0; k < 3; k++) {
+        dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k];
+        r2 += dxi[k] * dxi[k];
+      }
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+
+        /* Interact */
+        runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj);
+      }
+
+      /* Hit or miss? */
+      if (r2 < hjg2) {
+
+        dxi[0] = -dxi[0];
+        dxi[1] = -dxi[1];
+        dxi[2] = -dxi[2];
+
+        /* Interact */
+        runner_iact_nonsym_density(r2, dxi, hj, hi, pj, pi);
+      }
+    }
+  }
+}
+
 void pairs_single_grav(double *dim, long long int pid,
                        struct gpart *__restrict__ parts, int N, int periodic) {
 
@@ -253,9 +300,9 @@ void pairs_single_grav(double *dim, long long int pid,
       break;
   if (k == N) error("Part not found.");
   pi = parts[k];
-  pi.a[0] = 0.0f;
-  pi.a[1] = 0.0f;
-  pi.a[2] = 0.0f;
+  pi.a_grav[0] = 0.0f;
+  pi.a_grav[1] = 0.0f;
+  pi.a_grav[2] = 0.0f;
 
   /* Loop over all particle pairs. */
   for (k = 0; k < N; k++) {
@@ -273,15 +320,15 @@ void pairs_single_grav(double *dim, long long int pid,
     }
     r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2];
     runner_iact_grav(r2, fdx, &pi, &pj);
-    a[0] += pi.a[0];
-    a[1] += pi.a[1];
-    a[2] += pi.a[2];
-    aabs[0] += fabsf(pi.a[0]);
-    aabs[1] += fabsf(pi.a[1]);
-    aabs[2] += fabsf(pi.a[2]);
-    pi.a[0] = 0.0f;
-    pi.a[1] = 0.0f;
-    pi.a[2] = 0.0f;
+    a[0] += pi.a_grav[0];
+    a[1] += pi.a_grav[1];
+    a[2] += pi.a_grav[2];
+    aabs[0] += fabsf(pi.a_grav[0]);
+    aabs[1] += fabsf(pi.a_grav[1]);
+    aabs[2] += fabsf(pi.a_grav[2]);
+    pi.a_grav[0] = 0.0f;
+    pi.a_grav[1] = 0.0f;
+    pi.a_grav[2] = 0.0f;
   }
 
   /* Dump the result. */
diff --git a/src/tools.h b/src/tools.h
index 59646291bda46a7dd0f5a34e158e3e0a6f21d3ca..ccffc77ceb8a967fd40c3737651ba75d529eee0f 100644
--- a/src/tools.h
+++ b/src/tools.h
@@ -33,6 +33,7 @@ void pairs_single_density(double *dim, long long int pid,
                           struct part *__restrict__ parts, int N, int periodic);
 
 void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj);
+void self_all_density(struct runner *r, struct cell *ci);
 
 void pairs_n2(double *dim, struct part *__restrict__ parts, int N,
               int periodic);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f0bfbefd3c7f4591134d1707c4ac9bf63278e855..d66282059d874f345437d779d59ec3edb08e47cb 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,10 +21,12 @@ AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -DTIMER
 AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS)
 
 # List of programs and scripts to run in the test suite
-TESTS = testGreetings testReading.sh testSingle testTimeIntegration
+TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \
+	test27cells.sh test27cellsPerturbed.sh testParser.sh
 
 # List of test programs to compile
-check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration testSPHStep testVectorize
+check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
+		 testSPHStep testPair test27cells testParser
 
 # Sources for the individual programs
 testGreetings_SOURCES = testGreetings.c
@@ -37,7 +39,13 @@ testSPHStep_SOURCES = testSPHStep.c
 
 testSingle_SOURCES = testSingle.c
 
-testVectorize_SOURCES = testVectorize.c
+testPair_SOURCES = testPair.c
+
+test27cells_SOURCES = test27cells.c
+
+testParser_SOURCES = testParser.c
 
 # Files necessary for distribution
-EXTRA_DIST = testReading.sh makeInput.py
+EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
+	     test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \ 
+	     testParserInput.yaml
diff --git a/tests/difffloat.py b/tests/difffloat.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbb7c95a1e77e04bbe21bec6dc6c5d529cd77c70
--- /dev/null
+++ b/tests/difffloat.py
@@ -0,0 +1,103 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
+from numpy import *
+import sys
+
+abs_tol = 1e-7
+rel_tol = 1e-7
+
+# Compares the content of two ASCII tables of floats line by line and
+# reports all differences beyond the given tolerances
+# Comparisons are done both in absolute and relative terms
+
+# Individual tolerances for each column can be provided in a file
+
+file1 = sys.argv[1]
+file2 = sys.argv[2]
+fileTol = ""
+
+if len(sys.argv) == 4:
+    fileTol = sys.argv[3]
+
+data1 = loadtxt(file1)
+data2 = loadtxt(file2)
+if fileTol != "":
+    dataTol = loadtxt(fileTol)
+    n_linesTol = shape(dataTol)[0]
+    n_columnsTol = shape(dataTol)[1]
+
+
+if shape(data1) != shape(data2):
+    print "Non-matching array sizes in the files", file1, "and", file2, "."
+    sys.exit(1)
+
+n_lines = shape(data1)[0]
+n_columns = shape(data1)[1]
+
+if fileTol != "":
+    if n_linesTol != 2:
+        print "Incorrect number of lines in tolerance file '%s'."%fileTol
+    if n_columnsTol != n_columns:
+        print "Incorrect number of columns in tolerance file '%s'."%fileTol
+
+if fileTol == "":
+    print "Absolute difference tolerance:", abs_tol
+    print "Relative difference tolerance:", rel_tol
+    absTol = ones(n_columns) * abs_tol
+    relTol = ones(n_columns) * rel_tol
+else:
+    print "Tolerances read from file"
+    absTol = dataTol[0,:]
+    relTol = dataTol[1,:]
+
+error = False
+for i in range(n_lines):
+    for j in range(n_columns):
+
+        abs_diff = abs(data1[i,j] - data2[i,j])
+
+        sum = abs(data1[i,j] + data2[i,j])
+        if sum > 0:
+            rel_diff = abs(data1[i,j] - data2[i,j]) / sum
+        else:
+            rel_diff = 0.
+
+        if( abs_diff > absTol[j]):
+            print "Absolute difference larger than tolerance (%e) on line %d, column %d:"%(absTol[j], i,j)
+            print "%10s:           a = %e"%("File 1", data1[i,j])
+            print "%10s:           b = %e"%("File 2", data2[i,j])
+            print "%10s:       |a-b| = %e"%("Difference", abs_diff)
+            print ""
+            error = True
+
+        if( rel_diff > relTol[j]):
+            print "Relative difference larger than tolerance (%e) on line %d, column %d:"%(relTol[j], i,j)
+            print "%10s:           a = %e"%("File 1", data1[i,j])
+            print "%10s:           b = %e"%("File 2", data2[i,j])
+            print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff)
+            print ""
+            error = True
+
+
+if error:
+    exit(1)
+else:
+    print "No differences found"
+    exit(0)
diff --git a/tests/test27cells.c b/tests/test27cells.c
new file mode 100644
index 0000000000000000000000000000000000000000..74c38996a81056b10633bf2bbf18cc7cff7e8f0d
--- /dev/null
+++ b/tests/test27cells.c
@@ -0,0 +1,367 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <fenv.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "swift.h"
+
+/**
+ * Returns a random number (uniformly distributed) in [a,b[
+ */
+double random_uniform(double a, double b) {
+  return (rand() / (double)RAND_MAX) * (b - a) + a;
+}
+
+/* n is both particles per axis and box size:
+ * particles are generated on a mesh with unit spacing
+ */
+struct cell *make_cell(size_t n, double *offset, double size, double h,
+                       double density, long long *partId, double pert) {
+  const size_t count = n * n * n;
+  const double volume = size * size * size;
+  struct cell *cell = malloc(sizeof(struct cell));
+  bzero(cell, sizeof(struct cell));
+
+  if (posix_memalign((void **)&cell->parts, part_align,
+                     count * sizeof(struct part)) != 0) {
+    error("couldn't allocate particles, no. of particles: %d", (int)count);
+  }
+  bzero(cell->parts, count * sizeof(struct part));
+
+  /* Construct the parts */
+  struct part *part = cell->parts;
+  for (size_t x = 0; x < n; ++x) {
+    for (size_t y = 0; y < n; ++y) {
+      for (size_t z = 0; z < n; ++z) {
+        part->x[0] =
+            offset[0] +
+            size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[1] =
+            offset[1] +
+            size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[2] =
+            offset[2] +
+            size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        // part->v[0] = part->x[0] - 1.5;
+        // part->v[1] = part->x[1] - 1.5;
+        // part->v[2] = part->x[2] - 1.5;
+        part->v[0] = random_uniform(-0.05, 0.05);
+        part->v[1] = random_uniform(-0.05, 0.05);
+        part->v[2] = random_uniform(-0.05, 0.05);
+        part->h = size * h / (float)n;
+        part->id = ++(*partId);
+        part->mass = density * volume / count;
+        part->ti_begin = 0;
+        part->ti_end = 1;
+        ++part;
+      }
+    }
+  }
+
+  /* Cell properties */
+  cell->split = 0;
+  cell->h_max = h;
+  cell->count = count;
+  cell->dx_max = 0.;
+  cell->h[0] = size;
+  cell->h[1] = size;
+  cell->h[2] = size;
+  cell->loc[0] = offset[0];
+  cell->loc[1] = offset[1];
+  cell->loc[2] = offset[2];
+
+  cell->ti_end_min = 1;
+  cell->ti_end_max = 1;
+
+  cell->sorted = 0;
+  cell->sort = NULL;
+  cell->sortsize = 0;
+  runner_dosort(NULL, cell, 0x1FFF, 0);
+
+  return cell;
+}
+
+void clean_up(struct cell *ci) {
+  free(ci->parts);
+  free(ci->sort);
+  free(ci);
+}
+
+/**
+ * @brief Initializes all particles field to be ready for a density calculation
+ */
+void zero_particle_fields(struct cell *c) {
+
+  for (size_t pid = 0; pid < c->count; pid++) {
+    c->parts[pid].rho = 0.f;
+    c->parts[pid].rho_dh = 0.f;
+    hydro_init_part(&c->parts[pid]);
+  }
+}
+
+/**
+ * @brief Ends the loop by adding the appropriate coefficients
+ */
+void end_calculation(struct cell *c) {
+
+  for (size_t pid = 0; pid < c->count; pid++) {
+    hydro_end_density(&c->parts[pid], 1);
+  }
+}
+
+/**
+ * @brief Dump all the particles to a file
+ */
+void dump_particle_fields(char *fileName, struct cell *main_cell,
+                          struct cell **cells) {
+
+  FILE *file = fopen(fileName, "w");
+
+  /* Write header */
+  fprintf(file,
+          "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s "
+          "%13s %13s %13s\n",
+          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh",
+          "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz");
+
+  fprintf(file, "# Main cell --------------------------------------------\n");
+
+  /* Write main cell */
+  for (size_t pid = 0; pid < main_cell->count; pid++) {
+    fprintf(file,
+            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+            "%13e %13e %13e\n",
+            main_cell->parts[pid].id, main_cell->parts[pid].x[0],
+            main_cell->parts[pid].x[1], main_cell->parts[pid].x[2],
+            main_cell->parts[pid].v[0], main_cell->parts[pid].v[1],
+            main_cell->parts[pid].v[2], main_cell->parts[pid].rho,
+            main_cell->parts[pid].rho_dh, main_cell->parts[pid].density.wcount,
+            main_cell->parts[pid].density.wcount_dh,
+#ifdef GADGET2_SPH
+            main_cell->parts[pid].div_v, main_cell->parts[pid].density.rot_v[0],
+            main_cell->parts[pid].density.rot_v[1],
+            main_cell->parts[pid].density.rot_v[2]
+#else
+            0., 0., 0., 0.
+#endif
+            );
+  }
+
+  /* Write all other cells */
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 3; ++k) {
+
+        struct cell *cj = cells[i * 9 + j * 3 + k];
+        if (cj == main_cell) continue;
+
+        fprintf(file,
+                "# Offset: [%2d %2d %2d] -----------------------------------\n",
+                i - 1, j - 1, k - 1);
+
+        for (size_t pjd = 0; pjd < cj->count; pjd++) {
+          fprintf(
+              file,
+              "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+              "%13e %13e %13e\n",
+              cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
+              cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
+              cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh,
+              cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
+#ifdef GADGET2_SPH
+              cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0],
+              cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]
+#else
+              0., 0., 0., 0.
+#endif
+              );
+        }
+      }
+    }
+  }
+  fclose(file);
+}
+
+/* Just a forward declaration... */
+void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
+void runner_doself1_density(struct runner *r, struct cell *ci);
+
+/* And go... */
+int main(int argc, char *argv[]) {
+
+  size_t runs = 0, particles = 0;
+  double h = 1.12575, size = 1., rho = 1.;
+  double perturbation = 0.;
+  char outputFileNameExtension[200] = "";
+  char outputFileName[200] = "";
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+  /* Get some randomness going */
+  srand(0);
+
+  char c;
+  while ((c = getopt(argc, argv, "m:s:h:p:r:t:d:f:")) != -1) {
+    switch (c) {
+      case 'h':
+        sscanf(optarg, "%lf", &h);
+        break;
+      case 's':
+        sscanf(optarg, "%lf", &size);
+        break;
+      case 'p':
+        sscanf(optarg, "%zu", &particles);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
+        break;
+      case 'd':
+        sscanf(optarg, "%lf", &perturbation);
+        break;
+      case 'm':
+        sscanf(optarg, "%lf", &rho);
+        break;
+      case 'f':
+        strcpy(outputFileNameExtension, optarg);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+  }
+
+  if (h < 0 || particles == 0 || runs == 0) {
+    printf(
+        "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
+        "\nGenerates a cell pair, filled with particles on a Cartesian grid."
+        "\nThese are then interacted using runner_dopair1_density."
+        "\n\nOptions:"
+        "\n-h DISTANCE=1.1255 - Smoothing length"
+        "\n-m rho             - Physical density in the cell"
+        "\n-s size            - Physical size of the cell"
+        "\n-d pert            - Perturbation to apply to the particles [0,1["
+        "\n-f fileName        - Part of the file name used to save the dumps\n",
+        argv[0]);
+    exit(1);
+  }
+
+  /* Help users... */
+  message("Smoothing length: h = %f", h);
+  message("Neighbour target: N = %f", kernel_nwneigh);
+
+  /* Build the infrastructure */
+  struct space space;
+  space.periodic = 0;
+  space.h_max = h;
+
+  struct engine engine;
+  engine.s = &space;
+  engine.time = 0.1f;
+  engine.ti_current = 1;
+
+  struct runner runner;
+  runner.e = &engine;
+
+  /* Construct some cells */
+  struct cell *cells[27];
+  struct cell *main_cell;
+  static long long partId = 0;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 3; ++k) {
+
+        double offset[3] = {i * size, j * size, k * size};
+        cells[i * 9 + j * 3 + k] =
+            make_cell(particles, offset, size, h, rho, &partId, perturbation);
+      }
+    }
+  }
+
+  /* Store the main cell for future use */
+  main_cell = cells[13];
+
+  ticks time = 0;
+  for (size_t i = 0; i < runs; ++i) {
+
+    /* Zero the fields */
+    for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]);
+
+    const ticks tic = getticks();
+
+    /* Run all the pairs */
+    for (int j = 0; j < 27; ++j)
+      if (cells[j] != main_cell)
+        runner_dopair1_density(&runner, main_cell, cells[j]);
+
+    /* And now the self-interaction */
+    runner_doself1_density(&runner, main_cell);
+
+    const ticks toc = getticks();
+    time += toc - tic;
+
+    /* Let's get physical ! */
+    end_calculation(main_cell);
+
+    /* Dump if necessary */
+    if (i % 50 == 0) {
+      sprintf(outputFileName, "swift_dopair_27_%s.dat",
+              outputFileNameExtension);
+      dump_particle_fields(outputFileName, main_cell, cells);
+    }
+  }
+
+  /* Output timing */
+  message("SWIFT calculation took       : %15lli ticks.", time / runs);
+
+  /* Now perform a brute-force version for accuracy tests */
+
+  /* Zero the fields */
+  for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]);
+
+  const ticks tic = getticks();
+
+  /* Run all the brute-force pairs */
+  for (int j = 0; j < 27; ++j)
+    if (cells[j] != main_cell) pairs_all_density(&runner, main_cell, cells[j]);
+
+  /* And now the self-interaction */
+  self_all_density(&runner, main_cell);
+
+  const ticks toc = getticks();
+
+  /* Let's get physical ! */
+  end_calculation(main_cell);
+
+  /* Dump */
+  sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension);
+  dump_particle_fields(outputFileName, main_cell, cells);
+
+  /* Output timing */
+  message("Brute force calculation took : %15lli ticks.", toc - tic);
+
+  /* Clean things to make the sanitizer happy ... */
+  for (int i = 0; i < 27; ++i) clean_up(cells[i]);
+
+  return 0;
+}
diff --git a/tests/test27cells.sh b/tests/test27cells.sh
new file mode 100755
index 0000000000000000000000000000000000000000..09d2513bd3ef404c7bf434948af7f10306c98ede
--- /dev/null
+++ b/tests/test27cells.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_27_standard.dat swift_dopair_27_standard.dat
+
+./test27cells -p 6 -r 1 -d 0 -f standard
+
+python difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat tolerance.dat
+
+exit $?
diff --git a/tests/test27cellsPerturbed.sh b/tests/test27cellsPerturbed.sh
new file mode 100755
index 0000000000000000000000000000000000000000..73d2933984d38f7dcc992f07ec2e016f3544b636
--- /dev/null
+++ b/tests/test27cellsPerturbed.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat
+
+./test27cells -p 6 -r 1 -d 0.1 -f perturbed
+
+python difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat tolerance.dat
+
+exit $?
diff --git a/tests/testPair.c b/tests/testPair.c
new file mode 100644
index 0000000000000000000000000000000000000000..23ce4eb3de460f4e17b7b6f81cb39a628f3d100f
--- /dev/null
+++ b/tests/testPair.c
@@ -0,0 +1,305 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <fenv.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "swift.h"
+
+/**
+ * Returns a random number (uniformly distributed) in [a,b[
+ */
+double random_uniform(double a, double b) {
+  return (rand() / (double)RAND_MAX) * (b - a) + a;
+}
+
+/* n is both particles per axis and box size:
+ * particles are generated on a mesh with unit spacing
+ */
+struct cell *make_cell(size_t n, double *offset, double size, double h,
+                       double density, unsigned long long *partId,
+                       double pert) {
+  const size_t count = n * n * n;
+  const double volume = size * size * size;
+  struct cell *cell = malloc(sizeof(struct cell));
+  bzero(cell, sizeof(struct cell));
+
+  if (posix_memalign((void **)&cell->parts, part_align,
+                     count * sizeof(struct part)) != 0) {
+    error("couldn't allocate particles, no. of particles: %d", (int)count);
+  }
+  bzero(cell->parts, count * sizeof(struct part));
+
+  /* Construct the parts */
+  struct part *part = cell->parts;
+  for (size_t x = 0; x < n; ++x) {
+    for (size_t y = 0; y < n; ++y) {
+      for (size_t z = 0; z < n; ++z) {
+        part->x[0] =
+            offset[0] +
+            size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[1] =
+            offset[1] +
+            size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[2] =
+            offset[2] +
+            size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        // part->v[0] = part->x[0] - 1.5;
+        // part->v[1] = part->x[1] - 1.5;
+        // part->v[2] = part->x[2] - 1.5;
+        part->v[0] = random_uniform(-0.05, 0.05);
+        part->v[1] = random_uniform(-0.05, 0.05);
+        part->v[2] = random_uniform(-0.05, 0.05);
+        part->h = size * h / (float)n;
+        part->id = ++(*partId);
+        part->mass = density * volume / count;
+        part->ti_begin = 0;
+        part->ti_end = 1;
+        ++part;
+      }
+    }
+  }
+
+  /* Cell properties */
+  cell->split = 0;
+  cell->h_max = h;
+  cell->count = count;
+  cell->dx_max = 0.;
+  cell->h[0] = n;
+  cell->h[1] = n;
+  cell->h[2] = n;
+  cell->loc[0] = offset[0];
+  cell->loc[1] = offset[1];
+  cell->loc[2] = offset[2];
+
+  cell->ti_end_min = 1;
+  cell->ti_end_max = 1;
+
+  cell->sorted = 0;
+  cell->sort = NULL;
+  cell->sortsize = 0;
+  runner_dosort(NULL, cell, 0x1FFF, 0);
+
+  return cell;
+}
+
+void clean_up(struct cell *ci) {
+  free(ci->parts);
+  free(ci->sort);
+  free(ci);
+}
+
+/**
+ * @brief Initializes all particles field to be ready for a density calculation
+ */
+void zero_particle_fields(struct cell *c) {
+
+  for (size_t pid = 0; pid < c->count; pid++) {
+    c->parts[pid].rho = 0.f;
+    c->parts[pid].rho_dh = 0.f;
+    hydro_init_part(&c->parts[pid]);
+  }
+}
+
+/**
+ * @brief Dump all the particles to a file
+ */
+void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
+
+  FILE *file = fopen(fileName, "w");
+
+  /* Write header */
+  fprintf(file,
+          "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s "
+          "%13s %13s %13s\n",
+          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh",
+          "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz");
+
+  fprintf(file, "# ci --------------------------------------------\n");
+
+  for (size_t pid = 0; pid < ci->count; pid++) {
+    fprintf(file,
+            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+            "%13e %13e %13e\n",
+            ci->parts[pid].id, ci->parts[pid].x[0], ci->parts[pid].x[1],
+            ci->parts[pid].x[2], ci->parts[pid].v[0], ci->parts[pid].v[1],
+            ci->parts[pid].v[2], ci->parts[pid].rho, ci->parts[pid].rho_dh,
+            ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh,
+#ifdef GADGET2_SPH
+            ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0],
+            ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]
+#else
+            0., 0., 0., 0.
+#endif
+            );
+  }
+
+  fprintf(file, "# cj --------------------------------------------\n");
+
+  for (size_t pjd = 0; pjd < cj->count; pjd++) {
+    fprintf(file,
+            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+            "%13e %13e %13e\n",
+            cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
+            cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
+            cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh,
+            cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
+#ifdef GADGET2_SPH
+            cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0],
+            cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]
+#else
+            0., 0., 0., 0.
+#endif
+            );
+  }
+
+  fclose(file);
+}
+
+/* Just a forward declaration... */
+void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
+
+int main(int argc, char *argv[]) {
+  size_t particles = 0, runs = 0, volume, type = 0;
+  double offset[3] = {0, 0, 0}, h = 1.1255, size = 1., rho = 1.;
+  double perturbation = 0.1;
+  struct cell *ci, *cj;
+  struct space space;
+  struct engine engine;
+  struct runner runner;
+  char c;
+  static unsigned long long partId = 0;
+  char outputFileNameExtension[200] = "";
+  char outputFileName[200] = "";
+  ticks tic, toc, time;
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+  srand(0);
+
+  while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) {
+    switch (c) {
+      case 'h':
+        sscanf(optarg, "%lf", &h);
+        break;
+      case 'p':
+        sscanf(optarg, "%zu", &particles);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
+        break;
+      case 't':
+        sscanf(optarg, "%zu", &type);
+        break;
+      case 'd':
+        sscanf(optarg, "%lf", &perturbation);
+        break;
+      case 'f':
+        strcpy(outputFileNameExtension, optarg);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+  }
+
+  if (h < 0 || particles == 0 || runs == 0 || type > 2) {
+    printf(
+        "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
+        "\nGenerates a cell pair, filled with particles on a Cartesian grid."
+        "\nThese are then interacted using runner_dopair1_density."
+        "\n\nOptions:"
+        "\n-t TYPE=0          - cells share face (0), edge (1) or corner (2)"
+        "\n-h DISTANCE=1.1255 - smoothing length"
+        "\n-d pert            - perturbation to apply to the particles [0,1["
+        "\n-f fileName        - part of the file name used to save the dumps\n",
+        argv[0]);
+    exit(1);
+  }
+
+  space.periodic = 0;
+  space.h_max = h;
+
+  engine.s = &space;
+  engine.time = 0.1f;
+  engine.ti_current = 1;
+  runner.e = &engine;
+
+  volume = particles * particles * particles;
+  message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part));
+
+  ci = make_cell(particles, offset, size, h, rho, &partId, perturbation);
+  for (size_t i = 0; i < type + 1; ++i) offset[i] = 1.;
+  cj = make_cell(particles, offset, size, h, rho, &partId, perturbation);
+
+  time = 0;
+  for (size_t i = 0; i < runs; ++i) {
+
+    /* Zero the fields */
+    zero_particle_fields(ci);
+    zero_particle_fields(cj);
+
+    tic = getticks();
+
+    /* Run the test */
+    runner_dopair1_density(&runner, ci, cj);
+
+    toc = getticks();
+    time += toc - tic;
+
+    /* Dump if necessary */
+    if (i % 50 == 0) {
+      sprintf(outputFileName, "swift_dopair_%s.dat", outputFileNameExtension);
+      dump_particle_fields(outputFileName, ci, cj);
+    }
+  }
+
+  /* Output timing */
+  message("SWIFT calculation took       %lli ticks.", time / runs);
+
+  /* Now perform a brute-force version for accuracy tests */
+
+  /* Zero the fields */
+  zero_particle_fields(ci);
+  zero_particle_fields(cj);
+
+  tic = getticks();
+
+  /* Run the brute-force test */
+  pairs_all_density(&runner, ci, cj);
+
+  toc = getticks();
+
+  /* Dump */
+  sprintf(outputFileName, "brute_force_%s.dat", outputFileNameExtension);
+  dump_particle_fields(outputFileName, ci, cj);
+
+  /* Output timing */
+  message("Brute force calculation took %lli ticks.", toc - tic);
+
+  /* Clean things to make the sanitizer happy ... */
+  clean_up(ci);
+  clean_up(cj);
+
+  return 0;
+}
diff --git a/tests/testPair.sh b/tests/testPair.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f6f505e56a2c7a5c3cff0ec04bd871278634193c
--- /dev/null
+++ b/tests/testPair.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_standard.dat swift_dopair_standard.dat
+
+./testPair -p 6 -r 1 -d 0 -f standard
+
+python difffloat.py brute_force_standard.dat swift_dopair_standard.dat tolerance.dat
+
+exit $?
diff --git a/tests/testPairPerturbed.sh b/tests/testPairPerturbed.sh
new file mode 100755
index 0000000000000000000000000000000000000000..544ba1b032da8426c065dcfb2ce3ee554c5e76a1
--- /dev/null
+++ b/tests/testPairPerturbed.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_perturbed.dat swift_dopair_perturbed.dat
+
+./testPair -p 6 -r 1 -d 0.1 -f perturbed
+
+python difffloat.py brute_force_perturbed.dat swift_dopair_perturbed.dat tolerance.dat
+
+exit $?
diff --git a/tests/testParser.c b/tests/testParser.c
new file mode 100644
index 0000000000000000000000000000000000000000..a4b8789fca056fef659bca78eae9d0effb2ceb66
--- /dev/null
+++ b/tests/testParser.c
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 James Willis (james.s.willis@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "parser.h"
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+
+int main(int argc, char *argv[]) {
+
+  const char *input_file = argv[1];
+
+  /* Create a structure to read file into. */
+  struct swift_params param_file;
+
+  /* Create variables that will be set from the parameter file. */
+  int no_of_threads = 0;
+  int no_of_time_steps = 0;
+  float max_h = 0.0f;
+  double start_time = 0.0;
+  char ic_file[PARSER_MAX_LINE_SIZE];
+
+  /* Read the parameter file. */
+  parser_read_file(input_file, &param_file);
+
+  /* Print the contents of the structure. */
+  parser_print_params(&param_file);
+
+  /* Retrieve parameters and store them in variables defined above.
+   * Have to specify the name of the parameter as it appears in the
+   * input file: testParserInput.yaml.*/
+  parser_get_param_int(&param_file, "no_of_threads", &no_of_threads);
+  parser_get_param_int(&param_file, "no_of_time_steps", &no_of_time_steps);
+  parser_get_param_float(&param_file, "max_h", &max_h);
+  parser_get_param_double(&param_file, "start_time", &start_time);
+  parser_get_param_string(&param_file, "ic_file", ic_file);
+
+  /* Print the variables to check their values are correct. */
+  printf(
+      "no_of_threads: %d, no_of_time_steps: %d, max_h: %f, start_time: %lf, "
+      "ic_file: %s\n",
+      no_of_threads, no_of_time_steps, max_h, start_time, ic_file);
+
+  assert(no_of_threads == 16);
+  assert(no_of_time_steps == 10);
+  assert(fabs(max_h - 1.1255) < 0.00001);
+  assert(fabs(start_time - 1.23456789) < 0.00001);
+  assert(strcmp(ic_file, "ic_file.ini") == 0); /*strcmp returns 0 if correct.*/
+
+  return 0;
+}
diff --git a/tests/testParser.sh b/tests/testParser.sh
new file mode 100755
index 0000000000000000000000000000000000000000..3dad7f386f792ff2beb6e94eb093bad4085023a4
--- /dev/null
+++ b/tests/testParser.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+./testParser testParserInput.yaml
diff --git a/tests/testParserInput.yaml b/tests/testParserInput.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d695e6a8ddd327e31224f36a6e34767ea8d36408
--- /dev/null
+++ b/tests/testParserInput.yaml
@@ -0,0 +1,9 @@
+---
+no_of_threads:      16 # The number of threads that will be used. 
+no_of_time_steps:   10
+max_h:              1.1255
+start_time:         1.23456789
+#Input file
+ic_file:            ic_file.ini
+
+...
diff --git a/tests/testReading.c b/tests/testReading.c
index d2a2a766171a85ace486914f0f39a987d9d8c3d3..9dda4c7bad75d35a8a93e0c2acb0619409a91afd 100644
--- a/tests/testReading.c
+++ b/tests/testReading.c
@@ -22,7 +22,7 @@
 
 int main() {
 
-  int Ngas = -1, Ngpart = -1;
+  size_t Ngas = 0, Ngpart = 0;
   int periodic = -1;
   int i, j, k, n;
   double dim[3];
diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c
index 984b8ea867250d0bda1bc14d2600279a27321b2c..223078ecb637e64d94e37cdf8c0f60a86bdd5ff7 100644
--- a/tests/testSPHStep.c
+++ b/tests/testSPHStep.c
@@ -77,6 +77,10 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) {
 
 #ifdef DEFAULT_SPH
 
+/* Just a forward declaration... */
+void runner_doself1_density(struct runner *r, struct cell *ci);
+void runner_doself2_force(struct runner *r, struct cell *ci);
+
 /* Run a full time step integration for one cell */
 int main() {
 
@@ -132,7 +136,7 @@ int main() {
 
   /* Initialise the particles */
   for (j = 0; j < 27; ++j) {
-    runner_doinit(&r, cells[j]);
+    runner_doinit(&r, cells[j], 0);
   }
 
   /* Compute density */
@@ -145,7 +149,7 @@ int main() {
   runner_doself2_force(&r, ci);
   runner_dokick(&r, ci, 1);
 
-  message("t_end=%f", p->t_end);
+  message("ti_end=%d", p->ti_end);
 
   free(ci->parts);
   free(ci->xparts);
diff --git a/tests/testSingle.c b/tests/testSingle.c
index c85b77ff1c5b2285c33fa7787bbd53deab463039..8771fba0c1912905d3936562fa9dad0223d89220 100644
--- a/tests/testSingle.c
+++ b/tests/testSingle.c
@@ -91,8 +91,8 @@ int main(int argc, char *argv[]) {
   p2.force.POrho2 = p2.u * (const_hydro_gamma - 1.0f) / p2.rho;
 
   /* Dump a header. */
-  printParticle_single(&p1);
-  printParticle_single(&p2);
+  //printParticle_single(&p1, NULL);
+  //printParticle_single(&p2, NULL);
   printf("# r a_1 udt_1 a_2 udt_2\n");
 
   /* Loop over the different radii. */
@@ -103,9 +103,9 @@ int main(int argc, char *argv[]) {
     r2 = dx[0] * dx[0];
 
     /* Clear the particle fields. */
-    p1.a[0] = 0.0f;
+    p1.a_hydro[0] = 0.0f;
     p1.force.u_dt = 0.0f;
-    p2.a[0] = 0.0f;
+    p2.a_hydro[0] = 0.0f;
     p2.force.u_dt = 0.0f;
 
     /* Interact the particles. */
@@ -130,8 +130,8 @@ int main(int argc, char *argv[]) {
 
     /* Output the results. */
     printf(
-        "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], p1.a[0],
-        p1.a[1], p1.a[2], p1.force.u_dt,
+        "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0],
+        p1.a_hydro[0], p1.a_hydro[1], p1.a_hydro[2], p1.force.u_dt,
         /// -dx[0] , p1.rho , p1.density.wcount , p2.rho , p2.density.wcount ,
         w, dwdx, gradw[0], gradw[1], gradw[2]);
 
diff --git a/tests/testVectorize.c b/tests/testVectorize.c
deleted file mode 100644
index a18b6e8af5ac3f7b94bd7be3bdf8fd21e49681ff..0000000000000000000000000000000000000000
--- a/tests/testVectorize.c
+++ /dev/null
@@ -1,212 +0,0 @@
-#include <fenv.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "swift.h"
-
-/* n is both particles per axis and box size:
- * particles are generated on a mesh with unit spacing
- */
-struct cell *make_cell(size_t n, double *offset, double h,
-                       unsigned long long *partId) {
-  size_t count = n * n * n;
-  struct cell *cell = malloc(sizeof *cell);
-  struct part *part;
-  size_t x, y, z, size;
-
-  size = count * sizeof(struct part);
-  if (posix_memalign((void **)&cell->parts, part_align, size) != 0) {
-    error("couldn't allocate particles, no. of particles: %d", (int)count);
-  }
-
-  part = cell->parts;
-  for (x = 0; x < n; ++x) {
-    for (y = 0; y < n; ++y) {
-      for (z = 0; z < n; ++z) {
-        // Add .5 for symmetry: 0.5, 1.5, 2.5 vs. 0, 1, 2
-        part->x[0] = x + offset[0] + 0.5;
-        part->x[1] = y + offset[1] + 0.5;
-        part->x[2] = z + offset[2] + 0.5;
-        part->v[0] = 1.0f;
-        part->v[1] = 1.0f;
-        part->v[2] = 1.0f;
-        part->h = h;
-        part->id = ++(*partId);
-        part->mass = 1.0f;
-        part->ti_begin = 0;
-        part->ti_end = 1;
-        ++part;
-      }
-    }
-  }
-
-  cell->split = 0;
-  cell->h_max = h;
-  cell->count = count;
-  cell->dx_max = 1.;
-  cell->h[0] = n;
-  cell->h[1] = n;
-  cell->h[2] = n;
-
-  cell->sort = malloc(13 * count * sizeof *cell->sort);
-  runner_dosort(NULL, cell, 0x1FFF, 0);
-
-  return cell;
-}
-
-void clean_up(struct cell *ci) {
-  free(ci->parts);
-  free(ci->sort);
-  free(ci);
-}
-
-/**
- * @brief Initializes all particles field to be ready for a density calculation
- */
-void zero_particle_fields(struct cell *c) {
-
-  for (size_t pid = 0; pid < c->count; pid++) {
-    c->parts[pid].rho = 0.f;
-    c->parts[pid].rho_dh = 0.f;
-    hydro_init_part(&c->parts[pid]);
-  }
-}
-
-/**
- * @brief Dump all the particles to a file
- */
-void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
-
-  FILE *file = fopen(fileName, "w");
-
-  fprintf(file,
-          "# ID  rho  rho_dh  wcount  wcount_dh  div_v  curl_v:[x y z]\n");
-
-  for (size_t pid = 0; pid < ci->count; pid++) {
-    fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", ci->parts[pid].id,
-            ci->parts[pid].rho, ci->parts[pid].rho_dh,
-            ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh,
-            ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0],
-            ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]);
-  }
-
-  fprintf(file, "# -----------------------------------\n");
-
-  for (size_t pjd = 0; pjd < cj->count; pjd++) {
-    fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", cj->parts[pjd].id,
-            cj->parts[pjd].rho, cj->parts[pjd].rho_dh,
-            cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
-            cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0],
-            cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]);
-  }
-
-  fclose(file);
-}
-
-/* Just a forward declaration... */
-void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
-
-int main(int argc, char *argv[]) {
-  size_t particles = 0, runs = 0, volume, type = 0;
-  double offset[3] = {0, 0, 0}, h = 1.1255;  // * DIM/PARTS_PER_AXIS == * 1
-  struct cell *ci, *cj;
-  struct space space;
-  struct engine engine;
-  struct runner runner;
-  char c;
-  static unsigned long long partId = 0;
-  ticks tic, toc, time;
-
-  while ((c = getopt(argc, argv, "h:p:r:t:")) != -1) {
-    switch (c) {
-      case 'h':
-        sscanf(optarg, "%lf", &h);
-        break;
-      case 'p':
-        sscanf(optarg, "%zu", &particles);
-        break;
-      case 'r':
-        sscanf(optarg, "%zu", &runs);
-        break;
-      case 't':
-        sscanf(optarg, "%zu", &type);
-        break;
-    }
-  }
-
-  if (h < 0 || particles == 0 || runs == 0 || type > 2) {
-    printf(
-        "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
-        "\nGenerates a cell pair, filled with particles on a Cartesian grid."
-        "\nThese are then interacted using runner_dopair1_density."
-        "\n\nOptions:"
-        "\n-t TYPE=0          - cells share face (0), edge (1) or corner (2)"
-        "\n-h DISTANCE=1.1255 - smoothing length\n",
-        argv[0]);
-    exit(1);
-  }
-
-  volume = particles * particles * particles;
-  message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part));
-
-  ci = make_cell(particles, offset, h, &partId);
-  for (size_t i = 0; i < type + 1; ++i) offset[i] = particles;
-  cj = make_cell(particles, offset, h, &partId);
-
-  for (int i = 0; i < 3; ++i) {
-    space.h_max = h;
-    space.dt_step = 0.1;
-  }
-
-  engine.s = &space;
-  engine.time = 0.1f;
-  runner.e = &engine;
-
-  time = 0;
-  for (size_t i = 0; i < runs; ++i) {
-
-    /* Zero the fields */
-    zero_particle_fields(ci);
-    zero_particle_fields(cj);
-
-    tic = getticks();
-
-    /* Run the test */
-    runner_dopair1_density(&runner, ci, cj);
-
-    toc = getticks();
-    time += toc - tic;
-
-    /* Dump if necessary */
-    if (i % 50 == 0) dump_particle_fields("swift_dopair.dat", ci, cj);
-  }
-
-  /* Output timing */
-  message("SWIFT calculation took       %lli ticks.", time / runs);
-
-  /* Now perform a brute-force version for accuracy tests */
-
-  /* Zero the fields */
-  zero_particle_fields(ci);
-  zero_particle_fields(cj);
-
-  tic = getticks();
-
-  /* Run the test */
-  pairs_all_density(&runner, ci, cj);
-
-  toc = getticks();
-
-  /* Dump */
-  dump_particle_fields("brute_force.dat", ci, cj);
-
-  /* Output timing */
-  message("Brute force calculation took %lli ticks.", toc - tic);
-
-  /* Clean things to make the sanitizer happy ... */
-  clean_up(ci);
-  clean_up(cj);
-
-  return 0;
-}
diff --git a/tests/tolerance.dat b/tests/tolerance.dat
new file mode 100644
index 0000000000000000000000000000000000000000..48de4383eab6214812183be25d3036a324ccbc27
--- /dev/null
+++ b/tests/tolerance.dat
@@ -0,0 +1,3 @@
+#   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-5	      1e-5	    2e-5       3e-4		 1e-5	     1e-5	   1e-5		 1e-5
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-5	      1.2e-5	    1e-5       1e-5		 1e-4	     1e-4	   1e-4		 1e-4