diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 0df1f91194b6d1e7e98cb1b75be7d3eaaca7fc32..0193760d3114aecab91f0c2ad27a9c1dd77dec9a 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -1988,6 +1988,9 @@ INCLUDE_FILE_PATTERNS  =
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
 PREDEFINED             = "__attribute__(x)= "
+PREDEFINED             += HAVE_HDF5
+PREDEFINED             += WITH_MPI
+PREDEFINED             += WITH_VECTORIZATION
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/src/collectgroup.c b/src/collectgroup.c
index 0b4ddc405772a45a1e444ef48b65fcb7d37a248f..b7e5486b59a2ec5e47b7b864071a2bb1e5ce1850 100644
--- a/src/collectgroup.c
+++ b/src/collectgroup.c
@@ -170,7 +170,7 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11,
 }
 
 /**
- * @brief MPI reduce operator for #mpicollectgroup structures.
+ * @brief MPI reduce operator for #mpicollectgroup1 structures.
  */
 static void mpicollectgroup1_reduce(void *in, void *inout, int *len,
                                     MPI_Datatype *datatype) {
diff --git a/src/runner.c b/src/runner.c
index 9be0a9ee2ce23888d04346679ccb36fdc6f13a02..36074144e14ec31741ee5964d8e2b98c9673192b 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1818,13 +1818,8 @@ void *runner_main(void *data) {
           break;
 
         case task_type_pair:
-          if (t->subtype == task_subtype_density) {
-#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
-            runner_dopair1_density_vec(r, ci, cj);
-#else
-            runner_dopair1_density(r, ci, cj);
-#endif
-          }
+          if (t->subtype == task_subtype_density)
+            runner_dopair1_branch_density(r, ci, cj);
 #ifdef EXTRA_HYDRO_LOOP
           else if (t->subtype == task_subtype_gradient)
             runner_dopair1_gradient(r, ci, cj);
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index f4513ca75b994c51a74011f0da4ee6863a625968..a2abd9458f8b490497fe0b9b33c2179b283d639d 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -884,8 +884,11 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
  * @param r The #runner.
  * @param ci The first #cell.
  * @param cj The second #cell.
+ * @param sid The direction of the pair
+ * @param shift The shift vector to apply to the particles in ci.
  */
-void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
+void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
+             const double *shift) {
 
   const struct engine *restrict e = r->e;
 
@@ -900,22 +903,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   TIMER_TIC;
 
-  /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
-
-  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
-    error("Interacting undrifted cells.");
-
-  /* Get the sort ID. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  const int sid = space_getsid(e->s, &ci, &cj, shift);
-
-  /* Have the cells been sorted? */
-  if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
-    runner_do_sort(r, ci, (1 << sid), 1);
-  if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
-    runner_do_sort(r, cj, (1 << sid), 1);
-
   /* Get the cutoff shift. */
   double rshift = 0.0;
   for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
@@ -1116,6 +1103,51 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
   TIMER_TOC(TIMER_DOPAIR);
 }
 
+/**
+ * @brief Determine which version of DOPAIR1 needs to be called depending on the
+ * orientation of the cells or whether DOPAIR1 needs to be called at all.
+ *
+ * @param r #runner
+ * @param ci #cell ci
+ * @param cj #cell cj
+ *
+ */
+void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Anything to do here? */
+  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+  /* Check that cells are drifted. */
+  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
+    error("Interacting undrifted cells.");
+
+  /* Get the sort ID. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
+
+  /* Have the cells been sorted? */
+  if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
+    runner_do_sort(r, ci, (1 << sid), 1);
+  if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
+    runner_do_sort(r, cj, (1 << sid), 1);
+
+  /* Have the cells been sorted? */
+  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
+    error("Trying to interact unsorted cells.");
+
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
+    (DOPAIR1_BRANCH == runner_dopair1_density_branch)
+  if (!sort_is_corner(sid))
+    runner_dopair1_density_vec(r, ci, cj, sid, shift);
+  else
+    DOPAIR1(r, ci, cj, sid, shift);
+#else
+  DOPAIR1(r, ci, cj, sid, shift);
+#endif
+}
+
 /**
  * @brief Compute the interactions between a cell pair (symmetric)
  *
@@ -2290,13 +2322,8 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
         cj->dx_max_sort > cj->dmin * space_maxreldx)
       runner_do_sort(r, cj, (1 << sid), 1);
 
-/* Compute the interactions. */
-#if (DOPAIR1 == runner_dopair1_density) && defined(WITH_VECTORIZATION) && \
-    defined(GADGET2_SPH)
-    runner_dopair1_density_vec(r, ci, cj);
-#else
-    DOPAIR1(r, ci, cj);
-#endif
+    /* Compute the interactions. */
+    DOPAIR1_BRANCH(r, ci, cj);
   }
 
   if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index a302817f7fec4089f1046eec5cf7ff09aadd25a6..b05a150047d7b15de97e0b1f5ea9308bfb7f60f7 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -20,13 +20,12 @@
 /* Config parameters. */
 #include "../config.h"
 
-#include "swift.h"
-
-#include "active.h"
-
 /* This object's header. */
 #include "runner_doiact_vec.h"
 
+/* Local headers. */
+#include "active.h"
+
 #ifdef WITH_VECTORIZATION
 /**
  * @brief Compute the vector remainder interactions from the secondary cache.
@@ -262,40 +261,40 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
   }
 }
 
-/* @brief Populates the arrays max_di and max_dj with the maximum distances of
+/**
+ * @brief Populates the arrays max_di and max_dj with the maximum distances of
  * particles into their neighbouring cells. Also finds the first pi that
  * interacts with any particle in cj and the last pj that interacts with any
  * particle in ci.
+ *
  * @param ci #cell pointer to ci
  * @param cj #cell pointer to cj
  * @param sort_i #entry array for particle distance in ci
  * @param sort_j #entry array for particle distance in cj
- * @param ci_cache #cache for cell ci
- * @param cj_cache #cache for cell cj
  * @param dx_max maximum particle movement allowed in cell
  * @param rshift cutoff shift
+ * @param hi_max Maximal smoothing length in cell ci
+ * @param hj_max Maximal smoothing length in cell cj
+ * @param di_max Maximal position on the axis that can interact in cell ci
+ * @param dj_min Minimal position on the axis that can interact in cell ci
  * @param max_di array to hold the maximum distances of pi particles into cell
  * cj
  * @param max_dj array to hold the maximum distances of pj particles into cell
  * cj
  * @param init_pi first pi to interact with a pj particle
  * @param init_pj last pj to interact with a pi particle
+ * @param e The #engine.
  */
 __attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
     const struct cell *ci, const struct cell *cj,
     const struct entry *restrict sort_i, const struct entry *restrict sort_j,
-    const float dx_max, const float rshift, float *max_di, float *max_dj,
-    int *init_pi, int *init_pj, const struct engine *e) {
+    const float dx_max, const float rshift, const double hi_max,
+    const double hj_max, const double di_max, const double dj_min,
+    float *max_di, float *max_dj, int *init_pi, int *init_pj,
+    const struct engine *e) {
 
-  struct part *restrict parts_i = ci->parts;
-  struct part *restrict parts_j = cj->parts;
-  struct part *p = &parts_i[sort_i[0].i];
-
-  float h, d;
-
-  /* Get the distance of the last pi and the first pj on the sorted axis.*/
-  const float di_max = sort_i[ci->count - 1].d - rshift;
-  const float dj_min = sort_j[0].d;
+  const struct part *restrict parts_i = ci->parts;
+  const struct part *restrict parts_j = cj->parts;
 
   int first_pi = 0, last_pj = cj->count - 1;
 
@@ -304,19 +303,19 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
   /* Populate max_di with distances. */
   int active_id = ci->count - 1;
   for (int k = ci->count - 1; k >= 0; k--) {
-    p = &parts_i[sort_i[k].i];
-    h = p->h;
-    d = sort_i[k].d + h * kernel_gamma + dx_max - rshift;
+    const struct part *pi = &parts_i[sort_i[k].i];
+    const float d = sort_i[k].d + dx_max;
 
-    max_di[k] = d;
+    // max_di[k] = d + h * kernel_gamma - rshift;
+    max_di[k] = d + hi_max;
 
     /* If the particle is out of range set the index to
      * the last active particle within range. */
-    if (d < dj_min) {
+    if (d + hi_max < dj_min) {
       first_pi = active_id;
       break;
     } else {
-      if (part_is_active(p, e)) active_id = k;
+      if (part_is_active(pi, e)) active_id = k;
     }
   }
 
@@ -329,19 +328,20 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
   /* Populate max_dj with distances. */
   active_id = 0;
   for (int k = 0; k < cj->count; k++) {
-    p = &parts_j[sort_j[k].i];
-    h = p->h;
-    d = sort_j[k].d - h * kernel_gamma - dx_max - rshift;
+    const struct part *pj = &parts_j[sort_j[k].i];
+    const float d = sort_j[k].d - dx_max;
 
-    max_dj[k] = d;
+    /*TODO: don't think rshift should be taken off here, waiting on Pedro. */
+    // max_dj[k] = d - h * kernel_gamma - rshift;
+    max_dj[k] = d - hj_max;
 
     /* If the particle is out of range set the index to
      * the last active particle within range. */
-    if (d > di_max) {
+    if (d - hj_max > di_max) {
       last_pj = active_id;
       break;
     } else {
-      if (part_is_active(p, e)) active_id = k;
+      if (part_is_active(pj, e)) active_id = k;
     }
   }
 
@@ -611,9 +611,12 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
  * @param r The #runner.
  * @param ci The first #cell.
  * @param cj The second #cell.
+ * @param sid The direction of the pair
+ * @param shift The shift vector to apply to the particles in ci.
  */
 void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
-                                struct cell *cj) {
+                                struct cell *cj, const int sid,
+                                const double *shift) {
 
 #ifdef WITH_VECTORIZATION
   const struct engine *restrict e = r->e;
@@ -622,22 +625,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 
   TIMER_TIC;
 
-  /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
-
-  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
-    error("Interacting undrifted cells.");
-
-  /* Get the sort ID. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  const int sid = space_getsid(e->s, &ci, &cj, shift);
-
-  /* Have the cells been sorted? */
-  if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
-    runner_do_sort(r, ci, (1 << sid), 1);
-  if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
-    runner_do_sort(r, cj, (1 << sid), 1);
-
   /* Get the cutoff shift. */
   double rshift = 0.0;
   for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
@@ -726,8 +713,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
   /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
   /* Also find the first pi that interacts with any particle in cj and the last
    * pj that interacts with any particle in ci. */
-  populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di,
-                          max_dj, &first_pi, &last_pj, e);
+  populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max,
+                          hj_max, di_max, dj_min, max_di, max_dj, &first_pi,
+                          &last_pj, e);
 
   /* Find the maximum index into cj that is required by a particle in ci. */
   /* Find the maximum index into ci that is required by a particle in cj. */
@@ -770,13 +758,22 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 
   if (cell_is_active(ci, e)) {
 
-    /* Loop over the parts in ci. */
+    /* Loop over the parts in ci until nothing is within range in cj. */
     for (int pid = count_i - 1; pid >= first_pi_loop && max_ind_j >= 0; pid--) {
 
       /* Get a hold of the ith part in ci. */
       struct part *restrict pi = &parts_i[sort_i[pid].i];
       if (!part_is_active(pi, e)) continue;
 
+      /* Set the cache index. */
+      int ci_cache_idx = pid - first_pi_align;
+
+      /* Skip this particle if no particle in cj is within range of it. */
+      const float hi = ci_cache->h[ci_cache_idx];
+      const double di_test =
+          sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
+      if (di_test < dj_min) continue;
+
       /* Determine the exit iteration of the interaction loop. */
       dj = sort_j[max_ind_j].d;
       while (max_ind_j > 0 && max_di[pid] < dj) {
@@ -786,10 +783,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
       }
       int exit_iteration = max_ind_j + 1;
 
-      /* Set the cache index. */
-      int ci_cache_idx = pid - first_pi_align;
-
-      const float hi = ci_cache->h[ci_cache_idx];
       const float hig2 = hi * hi * kernel_gamma2;
 
       vector pix, piy, piz;
@@ -903,13 +896,24 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
   }
 
   if (cell_is_active(cj, e)) {
-    /* Loop over the parts in cj. */
+
+    /* Loop over the parts in cj until nothing is within range in ci. */
     for (int pjd = 0; pjd <= last_pj_loop && max_ind_i < count_i; pjd++) {
 
       /* Get a hold of the jth part in cj. */
       struct part *restrict pj = &parts_j[sort_j[pjd].i];
       if (!part_is_active(pj, e)) continue;
 
+      /* Set the cache index. */
+      int cj_cache_idx = pjd;
+
+      /*TODO: rshift term. */
+      /* Skip this particle if no particle in ci is within range of it. */
+      const float hj = cj_cache->h[cj_cache_idx];
+      const double dj_test =
+          sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
+      if (dj_test > di_max) continue;
+
       /* Determine the exit iteration of the interaction loop. */
       di = sort_i[max_ind_i].d;
       while (max_ind_i < count_i - 1 && max_dj[pjd] > di) {
@@ -919,10 +923,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
       }
       int exit_iteration = max_ind_i;
 
-      /* Set the cache index. */
-      int cj_cache_idx = pjd;
-
-      const float hj = cj_cache->h[cj_cache_idx];
       const float hjg2 = hj * hj * kernel_gamma2;
 
       vector pjx, pjy, pjz;
diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_vec.h
index e252083ae743248a5f23d1772c2e770c5e1c6c14..13c88cebaa47c5051b4119033a055e49a9a1079c 100644
--- a/src/runner_doiact_vec.h
+++ b/src/runner_doiact_vec.h
@@ -35,8 +35,8 @@
 
 /* Function prototypes. */
 void runner_doself1_density_vec(struct runner *r, struct cell *restrict c);
-void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c);
 void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci,
-                                struct cell *restrict cj);
+                                struct cell *restrict cj, const int sid,
+                                const double *shift);
 
 #endif /* SWIFT_RUNNER_VEC_H */
diff --git a/src/serial_io.c b/src/serial_io.c
index a7e342f0a90fcf4c57f334526ff91b1923de4585..58237b49c331c34639eb5ae7ce79fd5272f73057 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -59,19 +59,15 @@
  * @brief Reads a data array from a given HDF5 group.
  *
  * @param grp The group from which to read.
- * @param name The name of the array to read.
- * @param type The #DATA_TYPE of the attribute.
- * @param N The number of particles.
- * @param dim The dimension of the data (1 for scalar, 3 for vector)
- * @param part_c A (char*) pointer on the first occurrence of the field of
- *interest in the parts array
- * @param partSize The size in bytes of the particle structure.
- * @param importance If COMPULSORY, the data must be present in the IC file. If
- *OPTIONAL, the array will be zeroed when the data is not present.
+ * @param props The #io_props of the field to read
+ * @param N The number of particles to read on this rank.
+ * @param N_total The total number of particles on all ranks.
+ * @param offset The offset position where this rank starts reading.
+ * @param internal_units The #unit_system used internally
+ * @param ic_units The #unit_system used in the ICs
  *
  * @todo A better version using HDF5 hyper-slabs to read the file directly into
- *the part array
- * will be written once the structures have been stabilized.
+ * the part array will be written once the structures have been stabilized.
  */
 void readArray(hid_t grp, const struct io_props props, size_t N,
                long long N_total, long long offset,
@@ -274,16 +270,17 @@ void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  * @param fileName The name of the file in which the data is written
  * @param xmfFile The FILE used to write the XMF description
  * @param partTypeGroupName The name of the group containing the particles in
- *the HDF5 file.
- * @param name The name of the array to write.
- * @param type The #DATA_TYPE of the array.
+ * the HDF5 file.
+ * @param props The #io_props of the field to read
  * @param N The number of particles to write.
- * @param dim The dimension of the data (1 for scalar, 3 for vector)
- * @param part_c A (char*) pointer on the first occurrence of the field of
- *interest in the parts array
- * @param partSize The size in bytes of the particle structure.
- * @param us The unit_system currently in use
- * @param convFactor The UnitConversionFactor for this arrayo
+ * @param N_total The total number of particles on all ranks.
+ * @param offset The offset position where this rank starts writing.
+ * @param mpi_rank The MPI rank of this node
+ * @param internal_units The #unit_system used internally
+ * @param snapshot_units The #unit_system used in the snapshots
+ *
+ * @todo A better version using HDF5 hyper-slabs to write the file directly from
+ * the part array will be written once the structures have been stabilized.
  */
 void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
                 char* partTypeGroupName, const struct io_props props, size_t N,
diff --git a/src/single_io.c b/src/single_io.c
index 0b091a5997504e5f5a4cc3b8af7ca06c994e993c..2de42f8a170620c983f7992771f2bd03da80b5a7 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -64,8 +64,7 @@
  * @param ic_units The #unit_system used in the ICs
  *
  * @todo A better version using HDF5 hyper-slabs to read the file directly into
- *the part array
- * will be written once the structures have been stabilized.
+ * the part array will be written once the structures have been stabilized.
  */
 void readArray(hid_t h_grp, const struct io_props prop, size_t N,
                const struct unit_system* internal_units,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 3b3da04dcf17f10a3bd60451813467f7961315bc..b356086b54b6f4f07c8bedafe1fccb543121871e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -97,6 +97,6 @@ EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
 	     test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \
 	     testPeriodicBCPerturbed.sh test125cells.sh test125cellsPerturbed.sh testParserInput.yaml \
 	     difffloat.py tolerance_125_normal.dat tolerance_125_perturbed.dat \
-             tolerance_27_normal.dat tolerance_27_perturbed.dat \
+             tolerance_27_normal.dat tolerance_27_perturbed.dat tolerance_27_perturbed_h.dat \
 	     tolerance_pair_normal.dat tolerance_pair_perturbed.dat \
 	     fft_params.yml tolerance_periodic_BC_normal.dat tolerance_periodic_BC_perturbed.dat
diff --git a/tests/difffloat.py b/tests/difffloat.py
index 0bdc706a1c44ee6c42c54ad37e93f634742e06bc..8c8713a8d2968005a86a9c1e7fdce34bf3e624f7 100644
--- a/tests/difffloat.py
+++ b/tests/difffloat.py
@@ -46,7 +46,13 @@ if len(sys.argv) == 6:
     ignoreSmallRhoDh = int(sys.argv[5])
 else:
     ignoreSmallRhoDh = 0
-    
+
+# Get the particle properties being compared from the header.
+with open(file1, 'r') as f:
+  line = f.readline()
+  if 'ID' in line:
+    part_props = line.split()[1:]
+
 data1 = loadtxt(file1)
 data2 = loadtxt(file2)
 if fileTol != "":
@@ -100,7 +106,7 @@ for i in range(n_lines_to_check):
             rel_diff = 0.
 
         if( abs_diff > 1.1*absTol[j]):
-            print "Absolute difference larger than tolerance (%e) for particle %d, column %d:"%(absTol[j], i,j)
+            print "Absolute difference larger than tolerance (%e) for particle %d, column %s:"%(absTol[j], data1[i,0], part_props[j])
             print "%10s:           a = %e"%("File 1", data1[i,j])
             print "%10s:           b = %e"%("File 2", data2[i,j])
             print "%10s:       |a-b| = %e"%("Difference", abs_diff)
@@ -113,7 +119,7 @@ for i in range(n_lines_to_check):
         if ignoreSmallRhoDh and j == 8 and abs(data1[i,j]) < 2e-4: continue
         
         if( rel_diff > 1.1*relTol[j]):
-            print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j)
+            print "Relative difference larger than tolerance (%e) for particle %d, column %s:"%(relTol[j], data1[i,0], part_props[j])
             print "%10s:           a = %e"%("File 1", data1[i,j])
             print "%10s:           b = %e"%("File 2", data2[i,j])
             print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff)
diff --git a/tests/test125cells.c b/tests/test125cells.c
index e4c73b5e75df56436d277d719b3b83a179924a6f..9c113f98c89e6213d8c01fcd4515da076787e93e 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -432,6 +432,8 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
 
 /* Just a forward declaration... */
 void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
+void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
+                                   struct cell *cj);
 void runner_doself1_density(struct runner *r, struct cell *ci);
 void runner_dopair2_force(struct runner *r, struct cell *ci, struct cell *cj);
 void runner_doself2_force(struct runner *r, struct cell *ci);
@@ -621,6 +623,14 @@ int main(int argc, char *argv[]) {
 /* Do the density calculation */
 #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION))
 
+    /* Initialise the particle cache. */
+#ifdef WITH_VECTORIZATION
+    runner.ci_cache.count = 0;
+    cache_init(&runner.ci_cache, 512);
+    runner.cj_cache.count = 0;
+    cache_init(&runner.cj_cache, 512);
+#endif
+
     /* Run all the pairs (only once !)*/
     for (int i = 0; i < 5; i++) {
       for (int j = 0; j < 5; j++) {
@@ -643,7 +653,7 @@ int main(int argc, char *argv[]) {
 
                 struct cell *cj = cells[iii * 25 + jjj * 5 + kkk];
 
-                if (cj > ci) runner_dopair1_density(&runner, ci, cj);
+                if (cj > ci) runner_dopair1_branch_density(&runner, ci, cj);
               }
             }
           }
diff --git a/tests/test27cells.c b/tests/test27cells.c
index a0f541d17100a13079580aabbef065fa5adbd5e1..fd9d62756ad0e8461f60b2e452c6ca86135a8e89 100644
--- a/tests/test27cells.c
+++ b/tests/test27cells.c
@@ -30,11 +30,9 @@
 /* Local headers. */
 #include "swift.h"
 
-#define ACC_THRESHOLD 1e-5
-
 #if defined(WITH_VECTORIZATION)
 #define DOSELF1 runner_doself1_density_vec
-#define DOPAIR1 runner_dopair1_density_vec
+#define DOPAIR1 runner_dopair1_branch_density
 #define DOSELF1_NAME "runner_doself1_density_vec"
 #define DOPAIR1_NAME "runner_dopair1_density_vec"
 #endif
@@ -45,7 +43,7 @@
 #endif
 
 #ifndef DOPAIR1
-#define DOPAIR1 runner_dopair1_density
+#define DOPAIR1 runner_dopair1_branch_density
 #define DOPAIR1_NAME "runner_dopair1_density"
 #endif
 
@@ -64,18 +62,20 @@ enum velocity_types {
  * @param offset The position of the cell offset from (0,0,0).
  * @param size The cell size.
  * @param h The smoothing length of the particles in units of the inter-particle
- *separation.
+ * separation.
  * @param density The density of the fluid.
  * @param partId The running counter of IDs.
  * @param pert The perturbation to apply to the particles in the cell in units
- *of the inter-particle separation.
+ * of the inter-particle separation.
  * @param vel The type of velocity field (0, random, divergent, rotating)
+ * @param h_pert The perturbation to apply to the smoothing length.
  */
 struct cell *make_cell(size_t n, double *offset, double size, double h,
                        double density, long long *partId, double pert,
-                       enum velocity_types vel) {
+                       enum velocity_types vel, double h_pert) {
   const size_t count = n * n * n;
   const double volume = size * size * size;
+  float h_max = 0.f;
   struct cell *cell = malloc(sizeof(struct cell));
   bzero(cell, sizeof(struct cell));
 
@@ -121,7 +121,11 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
             part->v[2] = 0.f;
             break;
         }
-        part->h = size * h / (float)n;
+        if (h_pert)
+          part->h = size * h * random_uniform(1.f, 1.1f) / (float)n;
+        else
+          part->h = size * h / (float)n;
+        h_max = fmaxf(h_max, part->h);
         part->id = ++(*partId);
 
 #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH)
@@ -156,7 +160,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
 
   /* Cell properties */
   cell->split = 0;
-  cell->h_max = h;
+  cell->h_max = h_max;
   cell->count = count;
   cell->dx_max_part = 0.;
   cell->dx_max_sort = 0.;
@@ -288,33 +292,11 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
   fclose(file);
 }
 
-/**
- * @brief Compares the vectorised result against
- * the serial result of the interaction.
- *
- * @param serial_parts Particle array that has been interacted serially
- * @param vec_parts Particle array to be interacted using vectors
- * @param count No. of particles that have been interacted
- * @param threshold Level of accuracy needed
- *
- * @return Non-zero value if difference found, 0 otherwise
- */
-int check_results(struct part *serial_parts, struct part *vec_parts, int count,
-                  double threshold) {
-  int result = 0;
-
-  for (int i = 0; i < count; i++)
-    result += compare_particles(serial_parts[i], vec_parts[i], threshold);
-
-  return result;
-}
-
 /* Just a forward declaration... */
 void runner_doself1_density(struct runner *r, struct cell *ci);
 void runner_doself1_density_vec(struct runner *r, struct cell *ci);
-void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
-void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
-                                struct cell *cj);
+void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
+                                   struct cell *cj);
 
 /* And go... */
 int main(int argc, char *argv[]) {
@@ -322,8 +304,7 @@ int main(int argc, char *argv[]) {
   engine_pin();
   size_t runs = 0, particles = 0;
   double h = 1.23485, size = 1., rho = 1.;
-  double perturbation = 0.;
-  double threshold = ACC_THRESHOLD;
+  double perturbation = 0., h_pert = 0.;
   char outputFileNameExtension[200] = "";
   char outputFileName[200] = "";
   enum velocity_types vel = velocity_zero;
@@ -339,11 +320,14 @@ int main(int argc, char *argv[]) {
   srand(0);
 
   char c;
-  while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:a:")) != -1) {
+  while ((c = getopt(argc, argv, "m:s:h:p:n:r:t:d:f:v:")) != -1) {
     switch (c) {
       case 'h':
         sscanf(optarg, "%lf", &h);
         break;
+      case 'p':
+        sscanf(optarg, "%lf", &h_pert);
+        break;
       case 's':
         sscanf(optarg, "%lf", &size);
         break;
@@ -365,9 +349,6 @@ int main(int argc, char *argv[]) {
       case 'v':
         sscanf(optarg, "%d", (int *)&vel);
         break;
-      case 'a':
-        sscanf(optarg, "%lf", &threshold);
-        break;
       case '?':
         error("Unknown option.");
         break;
@@ -382,6 +363,7 @@ int main(int argc, char *argv[]) {
         "runner_doself1_density()."
         "\n\nOptions:"
         "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
+        "\n-p                 - Random fractional change in h, h=h*random(1,p)"
         "\n-m rho             - Physical density in the cell"
         "\n-s size            - Physical size of the cell"
         "\n-d pert            - Perturbation to apply to the particles [0,1["
@@ -435,8 +417,9 @@ int main(int argc, char *argv[]) {
     for (int j = 0; j < 3; ++j) {
       for (int k = 0; k < 3; ++k) {
         double offset[3] = {i * size, j * size, k * size};
-        cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho,
-                                             &partId, perturbation, vel);
+        cells[i * 9 + j * 3 + k] =
+            make_cell(particles, offset, size, h, rho, &partId, perturbation,
+                      vel, h_pert);
 
         runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0);
 
@@ -504,10 +487,6 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  /* Store the vectorised particle results. */
-  struct part vec_parts[main_cell->count];
-  for (int i = 0; i < main_cell->count; i++) vec_parts[i] = main_cell->parts[i];
-
   /* Output timing */
   ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] +
                       timings[18] + timings[20] + timings[24] + timings[26];
@@ -552,10 +531,6 @@ int main(int argc, char *argv[]) {
   sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension);
   dump_particle_fields(outputFileName, main_cell, cells);
 
-  /* Check serial results against the vectorised results. */
-  if (check_results(main_cell->parts, vec_parts, main_cell->count, threshold))
-    message("Differences found...");
-
   /* Output timing */
   message("Brute force calculation took : %15lli ticks.", toc - tic);
 
diff --git a/tests/test27cells.sh.in b/tests/test27cells.sh.in
index 4312ce55e13097d4ae40c289b9c5caa885ff37cc..a313a594c10c80598852a92e7920ac846e15ba2d 100755
--- a/tests/test27cells.sh.in
+++ b/tests/test27cells.sh.in
@@ -1,13 +1,14 @@
 #!/bin/bash
 
+# Test for particles with the same smoothing length
 for v in {0..3}
 do
     echo ""
 	
     rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
 
-    echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4"
-    ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4
+    echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v"
+    ./test27cells -n 6 -r 1 -d 0 -f standard -v $v
 
     if [ -e brute_force_27_standard.dat ]
     then
@@ -27,4 +28,32 @@ do
     
 done
 
+# Test for particles with random smoothing lengths
+for v in {0..3}
+do
+    echo ""
+	
+    rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
+
+    echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.1"
+    ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.1
+
+    if [ -e brute_force_27_standard.dat ]
+    then
+	if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_perturbed_h.dat 6
+	then
+	    echo "Accuracy test passed"
+	else
+	    echo "Accuracy test failed"
+	    exit 1
+	fi
+    else
+	echo "Error Missing test output file"
+	exit 1
+    fi
+
+    echo "------------"
+    
+done
+
 exit $?
diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in
index 2f2e1db76346ca8f0ea4c2365ee349e232a1ce53..bbee027bda407c25f40cbf797e2bfcdc6a4827e4 100755
--- a/tests/test27cellsPerturbed.sh.in
+++ b/tests/test27cellsPerturbed.sh.in
@@ -1,13 +1,14 @@
 #!/bin/bash
 
+# Test for particles with the same smoothing length
 for v in {0..3}
 do
     echo ""
 
     rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat
 
-    echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4"
-    ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4
+    echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v"
+    ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v
 
     if [ -e brute_force_27_perturbed.dat ]
     then
@@ -27,4 +28,32 @@ do
 
 done
 
+# Test for particles with random smoothing lengths
+for v in {0..3}
+do
+    echo ""
+
+    rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat
+
+    echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -p 1.1"
+    ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -p 1.1
+
+    if [ -e brute_force_27_perturbed.dat ]
+    then
+	if python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed_h.dat 6 1
+	then
+	    echo "Accuracy test passed"
+	else
+	    echo "Accuracy test failed"
+	    exit 1
+	fi
+    else
+	echo "Error Missing test output file"
+	exit 1
+    fi
+
+    echo "------------"
+
+done
+
 exit $?
diff --git a/tests/testPair.c b/tests/testPair.c
index 92987d2fdb625fec6e186a280837f145787f599b..2ca2092cf1bb41c4910ee2639c22677e2fb1dbd2 100644
--- a/tests/testPair.c
+++ b/tests/testPair.c
@@ -16,14 +16,36 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
+#include "../config.h"
 
+/* Some standard headers. */
 #include <fenv.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+
+/* Local headers. */
 #include "swift.h"
 
+#if defined(WITH_VECTORIZATION)
+#define DOSELF1 runner_doself1_density_vec
+#define DOPAIR1 runner_dopair1_branch_density
+#define DOSELF1_NAME "runner_doself1_density_vec"
+#define DOPAIR1_NAME "runner_dopair1_density_vec"
+#endif
+
+#ifndef DOSELF1
+#define DOSELF1 runner_doself1_density
+#define DOSELF1_NAME "runner_doself1_density"
+#endif
+
+#ifndef DOPAIR1
+#define DOPAIR1 runner_dopair1_branch_density
+#define DOPAIR1_NAME "runner_dopair1_density"
+#endif
+
+
 /* n is both particles per axis and box size:
  * particles are generated on a mesh with unit spacing
  */
@@ -187,6 +209,9 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
 
 /* Just a forward declaration... */
 void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
+void runner_doself1_density_vec(struct runner *r, struct cell *ci);
+void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
+                                   struct cell *cj);
 
 int main(int argc, char *argv[]) {
   size_t particles = 0, runs = 0, volume, type = 0;
@@ -206,6 +231,9 @@ int main(int argc, char *argv[]) {
   unsigned long long cpufreq = 0;
   clocks_set_cpufreq(cpufreq);
 
+  /* Choke on FP-exceptions */
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+
   srand(0);
 
   while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) {
@@ -276,7 +304,7 @@ int main(int argc, char *argv[]) {
 
 #if defined(DEFAULT_SPH) || !defined(WITH_VECTORIZATION)
     /* Run the test */
-    runner_dopair1_density(&runner, ci, cj);
+    DOPAIR1(&runner, ci, cj);
 #endif
 
     toc = getticks();
diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat
index 04e642b28cb3729cb81f8183c3e69595ac651876..a45a419c1b484be226591ddf5ce16c8ef328e9a5 100644
--- a/tests/tolerance_125_perturbed.dat
+++ b/tests/tolerance_125_perturbed.dat
@@ -1,3 +1,3 @@
 #   ID    pos_x    pos_y    pos_z      v_x      v_y      v_z        h      rho    div_v        S        u        P        c      a_x      a_y      a_z     h_dt    v_sig    dS/dt    du/dt
     0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 1e-4	  1e-4	   1e-4	   1e-4	   1e-4	    1e-4     1e-4
-    0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 5e-3	  5e-3	   5e-3	   1e-4	   1e-4	    1e-4     1e-4
+    0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 1e-2	  1e-2	   1e-2	   1e-4	   1e-4	    1e-4     1e-4
diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat
index 9c6ee8c77cc6d53e67db9dbb86be197d49149b10..cd31b4c5aa25059e5abc012ba3d84a1c88a2c746 100644
--- a/tests/tolerance_27_perturbed.dat
+++ b/tests/tolerance_27_perturbed.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-4	    5e-5       2e-3		 4e-6	     3e-6	   3e-6		 3e-6
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      2e-3	    1e-5       1e-4		 4e-5	     2e-3	   2e-3	 	 2e-3
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6       1e-4	    2e-4       3e-3		 1e-5	     3e-6	   3e-6		 7e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      2e-3	    1e-5       1e-4		 6e-5	     2e-3	   2e-3	 	 2e-3
diff --git a/tests/tolerance_27_perturbed_h.dat b/tests/tolerance_27_perturbed_h.dat
new file mode 100644
index 0000000000000000000000000000000000000000..43521a94c4339adbcf43f2e46ae578a101c23bc6
--- /dev/null
+++ b/tests/tolerance_27_perturbed_h.dat
@@ -0,0 +1,3 @@
+#   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6       1e-4	    2e-4       4e-3		 1e-5	     3e-6	   3e-6		 7e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.4e-2	    1e-5       1e-4		 2.5e-4	     3e-3	   3e-3	 	 3e-3