diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 7fe19af853601e49de553a55ee205c654167d67e..c603d4ae4927183ad492244138e4217f833e1fea 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1,6 +1,7 @@
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -77,6 +78,12 @@
 #define _IACT(f) PASTE(runner_iact, f)
 #define IACT _IACT(FUNCTION)
 
+#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
+#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
+
+#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
+#define IACT_VEC _IACT_VEC(FUNCTION)
+
 #define _TIMER_DOSELF(f) PASTE(timer_doself, f)
 #define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
 
@@ -95,12 +102,6 @@
 #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
 #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
 
-#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
-#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
-
-#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
-#define IACT_VEC _IACT_VEC(FUNCTION)
-
 /**
  * @brief Compute the interactions between a cell pair.
  *
@@ -108,18 +109,14 @@
  * @param ci The first #cell.
  * @param cj The second #cell.
  */
-
 void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
                   struct cell *restrict cj) {
 
-  struct engine *e = r->e;
-  int pid, pjd, k, count_i = ci->count, count_j = cj->count;
-  double shift[3] = {0.0, 0.0, 0.0};
-  struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts;
-  struct part *restrict pi, *restrict pj;
-  double pix[3];
-  float dx[3], hi, hig2, r2;
+  const struct engine *e = r->e;
   const int ti_current = e->ti_current;
+
+  error("Don't use in actual runs ! Slow code !");
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -128,44 +125,47 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+
   /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
     if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
       shift[k] = e->s->dim[k];
     else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
       shift[k] = -e->s->dim[k];
   }
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
-
   /* Loop over the parts in ci. */
-  for (pid = 0; pid < count_i; pid++) {
+  for (int pid = 0; pid < count_i; pid++) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts_i[pid];
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    struct part *restrict pi = &parts_i[pid];
+    const float hi = pi->h;
+
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_j; pjd++) {
+    for (int pjd = 0; pjd < count_j; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts_j[pjd];
+      struct part *restrict pj = &parts_j[pjd];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -206,7 +206,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -215,12 +215,10 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
 
 void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 
-  int pid, pjd, k, count = c->count;
-  struct part *restrict parts = c->parts;
-  struct part *restrict pi, *restrict pj;
-  double pix[3] = {0.0, 0.0, 0.0};
-  float dx[3], hi, hig2, r2;
   const int ti_current = r->e->ti_current;
+
+  error("Don't use in actual runs ! Slow code !");
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -229,38 +227,34 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (c->ti_end_min > ti_current) return;
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
+  const int count = c->count;
+  struct part *restrict parts = c->parts;
 
   /* Loop over the parts in ci. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts[pid];
-    pix[0] = pi->x[0];
-    pix[1] = pi->x[1];
-    pix[2] = pi->x[2];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    struct part *restrict pi = &parts[pid];
+    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = pid + 1; pjd < count; pjd++) {
+    for (int pjd = pid + 1; pjd < count; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts[pjd];
+      struct part *restrict pj = &parts[pjd];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -301,7 +295,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -319,18 +313,121 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
  * @param count The number of particles in @c ind.
  * @param cj The second #cell.
  */
+void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
+                         struct part *restrict parts_i, int *restrict ind,
+                         int count, struct cell *restrict cj) {
+
+  struct engine *e = r->e;
+
+  error("Don't use in actual runs ! Slow code !");
+
+#ifdef VECTORIZE
+  int icount = 0;
+  float r2q[VEC_SIZE] __attribute__((aligned(16)));
+  float hiq[VEC_SIZE] __attribute__((aligned(16)));
+  float hjq[VEC_SIZE] __attribute__((aligned(16)));
+  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
+  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
+#endif
+
+  TIMER_TIC
+
+  const int count_j = cj->count;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Loop over the parts_i. */
+  for (int pid = 0; pid < count; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[ind[pid]];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
 
+      /* Hit or miss? */
+      if (r2 < hig2) {
+
+#ifndef VECTORIZE
+
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+
+#else
+
+        /* Add this interaction to the queue. */
+        r2q[icount] = r2;
+        dxq[3 * icount + 0] = dx[0];
+        dxq[3 * icount + 1] = dx[1];
+        dxq[3 * icount + 2] = dx[2];
+        hiq[icount] = hi;
+        hjq[icount] = pj->h;
+        piq[icount] = pi;
+        pjq[icount] = pj;
+        icount += 1;
+
+        /* Flush? */
+        if (icount == VEC_SIZE) {
+          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
+          icount = 0;
+        }
+
+#endif
+      }
+
+    } /* loop over the parts in cj. */
+
+  } /* loop over the parts in ci. */
+
+#ifdef VECTORIZE
+  /* Pick up any leftovers. */
+  if (icount > 0)
+    for (int k = 0; k < icount; k++)
+      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
+#endif
+
+  TIMER_TOC(timer_dopair_subset);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param parts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param count The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
 void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts_i, int *restrict ind, int count,
                    struct cell *restrict cj) {
 
   struct engine *e = r->e;
-  int pid, pjd, sid, k, count_j = cj->count, flipped;
-  double shift[3] = {0.0, 0.0, 0.0};
-  struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
-  double pix[3];
-  float dx[3], hi, hig2, r2, di, dxj;
-  struct entry *sort_j;
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -339,10 +436,15 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
+  const int count_j = cj->count;
+  struct part *restrict parts_j = cj->parts;
+
   /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
     if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
       shift[k] = e->s->dim[k];
     else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
@@ -350,52 +452,50 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
   }
 
   /* Get the sorting index. */
-  for (sid = 0, k = 0; k < 3; k++)
+  int sid = 0;
+  for (int k = 0; k < 3; k++)
     sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                          ? 0
                          : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
 
   /* Switch the cells around? */
-  flipped = runner_flip[sid];
+  const int flipped = runner_flip[sid];
   sid = sortlistID[sid];
 
   /* Have the cells been sorted? */
   if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells.");
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
-
   /* Pick-out the sorted lists. */
-  sort_j = &cj->sort[sid * (cj->count + 1)];
-  dxj = cj->dx_max;
+  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
+  const float dxj = cj->dx_max;
 
   /* Parts are on the left? */
   if (!flipped) {
 
     /* Loop over the parts_i. */
-    for (pid = 0; pid < count; pid++) {
+    for (int pid = 0; pid < count; pid++) {
 
       /* Get a hold of the ith part in ci. */
-      pi = &parts_i[ind[pid]];
-      for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-      hi = pi->h;
-      hig2 = hi * hi * kernel_gamma2;
-      di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
-           pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
+      struct part *restrict pi = &parts_i[ind[pid]];
+      double pix[3];
+      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+
+      const float hi = pi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
+                       pix[1] * runner_shift[sid][1] +
+                       pix[2] * runner_shift[sid][2];
 
       /* Loop over the parts in cj. */
-      for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sort_j[pjd].i];
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -439,25 +539,28 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
   else {
 
     /* Loop over the parts_i. */
-    for (pid = 0; pid < count; pid++) {
+    for (int pid = 0; pid < count; pid++) {
 
       /* Get a hold of the ith part in ci. */
-      pi = &parts_i[ind[pid]];
-      for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-      hi = pi->h;
-      hig2 = hi * hi * kernel_gamma2;
-      di = -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
-           pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
+      struct part *restrict pi = &parts_i[ind[pid]];
+      double pix[3];
+      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+      const float hi = pi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float di =
+          -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
+          pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
 
       /* Loop over the parts in cj. */
-      for (pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
+      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sort_j[pjd].i];
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -499,119 +602,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
-      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
-#endif
-
-  TIMER_TOC(timer_dopair_subset);
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param parts_i The #part to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param count The number of particles in @c ind.
- * @param cj The second #cell.
- */
-
-void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
-                         struct part *restrict parts_i, int *restrict ind,
-                         int count, struct cell *restrict cj) {
-
-  struct engine *e = r->e;
-  int pid, pjd, k, count_j = cj->count;
-  double shift[3] = {0.0, 0.0, 0.0};
-  struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
-  double pix[3];
-  float dx[3], hi, hig2, r2;
-#ifdef VECTORIZE
-  int icount = 0;
-  float r2q[VEC_SIZE] __attribute__((aligned(16)));
-  float hiq[VEC_SIZE] __attribute__((aligned(16)));
-  float hjq[VEC_SIZE] __attribute__((aligned(16)));
-  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
-  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
-#endif
-  TIMER_TIC
-
-  /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
-
-  /* Loop over the parts_i. */
-  for (pid = 0; pid < count; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    pi = &parts_i[ind[pid]];
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
-
-    /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      pj = &parts_j[pjd];
-
-      /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
-        dx[k] = pix[k] - pj->x[k];
-        r2 += dx[k] * dx[k];
-      }
-
-      /* Hit or miss? */
-      if (r2 < hig2) {
-
-#ifndef VECTORIZE
-
-        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
-
-#else
-
-        /* Add this interaction to the queue. */
-        r2q[icount] = r2;
-        dxq[3 * icount + 0] = dx[0];
-        dxq[3 * icount + 1] = dx[1];
-        dxq[3 * icount + 2] = dx[2];
-        hiq[icount] = hi;
-        hjq[icount] = pj->h;
-        piq[icount] = pi;
-        pjq[icount] = pj;
-        icount += 1;
-
-        /* Flush? */
-        if (icount == VEC_SIZE) {
-          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
-          icount = 0;
-        }
-
-#endif
-      }
-
-    } /* loop over the parts in cj. */
-
-  } /* loop over the parts in ci. */
-
-#ifdef VECTORIZE
-  /* Pick up any leftovers. */
-  if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -628,15 +619,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
  * @param ind The list of indices of particles in @c ci to interact with.
  * @param count The number of particles in @c ind.
  */
-
 void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts, int *restrict ind, int count) {
 
-  int pid, pjd, k, count_i = ci->count;
-  struct part *restrict parts_j = ci->parts;
-  struct part *restrict pi, *restrict pj;
-  double pix[3] = {0.0, 0.0, 0.0};
-  float dx[3], hi, hig2, r2;
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -645,35 +630,31 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
+  const int count_i = ci->count;
+  struct part *restrict parts_j = ci->parts;
 
   /* Loop over the parts in ci. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts[ind[pid]];
-    pix[0] = pi->x[0];
-    pix[1] = pi->x[1];
-    pix[2] = pi->x[2];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    struct part *restrict pi = &parts[ind[pid]];
+    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_i; pjd++) {
+    for (int pjd = 0; pjd < count_i; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts_j[pjd];
+      struct part *restrict pj = &parts_j[pjd];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -714,7 +695,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -722,26 +703,17 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 }
 
 /**
- * @brief Compute the interactions between a cell pair.
+ * @brief Compute the interactions between a cell pair (non-symmetric).
  *
  * @param r The #runner.
  * @param ci The first #cell.
  * @param cj The second #cell.
  */
-
 void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   struct engine *restrict e = r->e;
-  int pid, pjd, k, sid;
-  double rshift, shift[3] = {0.0, 0.0, 0.0};
-  struct entry *restrict sort_i, *restrict sort_j;
-  struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
-  double pix[3], pjx[3], di, dj;
-  float dx[3], hi, hig2, hj, hjg2, r2, dx_max;
-  double hi_max, hj_max;
-  double di_max, dj_min;
-  int count_i, count_j;
   const int ti_current = e->ti_current;
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -750,60 +722,64 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the sort ID. */
-  sid = space_getsid(e->s, &ci, &cj, shift);
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
 
   /* Have the cells been sorted? */
   if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
     error("Trying to interact unsorted cells.");
 
   /* Get the cutoff shift. */
-  for (rshift = 0.0, k = 0; k < 3; k++)
-    rshift += shift[k] * runner_shift[sid][k];
+  double rshift = 0.0;
+  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
 
   /* Pick-out the sorted lists. */
-  sort_i = &ci->sort[sid * (ci->count + 1)];
-  sort_j = &cj->sort[sid * (cj->count + 1)];
+  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
+  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
 
   /* Get some other useful values. */
-  hi_max = ci->h_max * kernel_gamma - rshift;
-  hj_max = cj->h_max * kernel_gamma;
-  count_i = ci->count;
-  count_j = cj->count;
-  parts_i = ci->parts;
-  parts_j = cj->parts;
-  di_max = sort_i[count_i - 1].d - rshift;
-  dj_min = sort_j[0].d;
-  dx_max = (ci->dx_max + cj->dx_max);
+  const double hi_max = ci->h_max * kernel_gamma - rshift;
+  const double hj_max = cj->h_max * kernel_gamma;
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+  const double di_max = sort_i[count_i - 1].d - rshift;
+  const double dj_min = sort_j[0].d;
+  const float dx_max = (ci->dx_max + cj->dx_max);
 
   /* Loop over the parts in ci. */
-  for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min;
-       pid--) {
+  for (int pid = count_i - 1;
+       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts_i[sort_i[pid].i];
+    struct part *restrict pi = &parts_i[sort_i[pid].i];
     if (pi->ti_end > ti_current) continue;
-    hi = pi->h;
-    di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
+    const float hi = pi->h;
+    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
     if (di < dj_min) continue;
 
-    hig2 = hi * hi * kernel_gamma2;
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+    for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts_j[sort_j[pjd].i];
+      struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -841,33 +817,31 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-  /* printf( "runner_dopair: first half took %.3f %s...\n" ,
-  clocks_from_ticks(getticks() - tic), clocks_getunit());
-  tic = getticks(); */
-
   /* Loop over the parts in cj. */
-  for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
+  for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
        pjd++) {
 
     /* Get a hold of the jth part in cj. */
-    pj = &parts_j[sort_j[pjd].i];
+    struct part *restrict pj = &parts_j[sort_j[pjd].i];
     if (pj->ti_end > ti_current) continue;
-    hj = pj->h;
-    dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
+    const float hj = pj->h;
+    const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
     if (dj > di_max) continue;
 
-    for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
-    hjg2 = hj * hj * kernel_gamma2;
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
 
     /* Loop over the parts in ci. */
-    for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
+    for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
 
       /* Get a pointer to the jth particle. */
-      pi = &parts_i[sort_i[pid].i];
+      struct part *restrict pi = &parts_i[sort_i[pid].i];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pjx[k] - pi->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -908,28 +882,25 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
   TIMER_TOC(TIMER_DOPAIR);
 }
 
+/**
+ * @brief Compute the interactions between a cell pair (symmetric)
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
 void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   struct engine *restrict e = r->e;
-  int pid, pjd, k, sid;
-  double rshift, shift[3] = {0.0, 0.0, 0.0};
-  struct entry *sort_i, *sort_j;
-  struct entry *sortdt_i = NULL, *sortdt_j = NULL;
-  int countdt_i = 0, countdt_j = 0;
-  struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
-  double pix[3], pjx[3], di, dj;
-  float dx[3], hi, hig2, hj, hjg2, r2, dx_max;
-  double hi_max, hj_max;
-  double di_max, dj_min;
-  int count_i, count_j;
   const int ti_current = e->ti_current;
+
 #ifdef VECTORIZE
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
@@ -944,38 +915,42 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   float dxq2[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the shift ID. */
-  sid = space_getsid(e->s, &ci, &cj, shift);
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
 
   /* Have the cells been sorted? */
   if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
     error("Trying to interact unsorted cells.");
 
   /* Get the cutoff shift. */
-  for (rshift = 0.0, k = 0; k < 3; k++)
-    rshift += shift[k] * runner_shift[sid][k];
+  double rshift = 0.0;
+  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
 
   /* Pick-out the sorted lists. */
-  sort_i = &ci->sort[sid * (ci->count + 1)];
-  sort_j = &cj->sort[sid * (cj->count + 1)];
+  struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
+  struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
 
   /* Get some other useful values. */
-  hi_max = ci->h_max * kernel_gamma - rshift;
-  hj_max = cj->h_max * kernel_gamma;
-  count_i = ci->count;
-  count_j = cj->count;
-  parts_i = ci->parts;
-  parts_j = cj->parts;
-  di_max = sort_i[count_i - 1].d - rshift;
-  dj_min = sort_j[0].d;
-  dx_max = (ci->dx_max + cj->dx_max);
+  const double hi_max = ci->h_max * kernel_gamma - rshift;
+  const double hj_max = cj->h_max * kernel_gamma;
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+  const double di_max = sort_i[count_i - 1].d - rshift;
+  const double dj_min = sort_j[0].d;
+  const double dx_max = (ci->dx_max + cj->dx_max);
 
   /* Collect the number of parts left and right below dt. */
+  int countdt_i = 0, countdt_j = 0;
+  struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
   if (ci->ti_end_max <= ti_current) {
     sortdt_i = sort_i;
     countdt_i = count_i;
@@ -983,7 +958,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     if ((sortdt_i = (struct entry *)alloca(sizeof(struct entry) * count_i)) ==
         NULL)
       error("Failed to allocate dt sortlists.");
-    for (k = 0; k < count_i; k++)
+    for (int k = 0; k < count_i; k++)
       if (parts_i[sort_i[k].i].ti_end <= ti_current) {
         sortdt_i[countdt_i] = sort_i[k];
         countdt_i += 1;
@@ -996,7 +971,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     if ((sortdt_j = (struct entry *)alloca(sizeof(struct entry) * count_j)) ==
         NULL)
       error("Failed to allocate dt sortlists.");
-    for (k = 0; k < count_j; k++)
+    for (int k = 0; k < count_j; k++)
       if (parts_j[sort_j[k].i].ti_end <= ti_current) {
         sortdt_j[countdt_j] = sort_j[k];
         countdt_j += 1;
@@ -1004,31 +979,33 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   }
 
   /* Loop over the parts in ci. */
-  for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min;
-       pid--) {
+  for (int pid = count_i - 1;
+       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts_i[sort_i[pid].i];
-    hi = pi->h;
-    di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
+    struct part *restrict pi = &parts_i[sort_i[pid].i];
+    const float hi = pi->h;
+    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
     if (di < dj_min) continue;
 
-    hig2 = hi * hi * kernel_gamma2;
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Look at valid dt parts only? */
     if (pi->ti_end > ti_current) {
 
       /* Loop over the parts in cj within dt. */
-      for (pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) {
+      for (int pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sortdt_j[pjd].i];
-        hj = pj->h;
+        struct part *restrict pj = &parts_j[sortdt_j[pjd].i];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pj->x[k] - pix[k];
           r2 += dx[k] * dx[k];
         }
@@ -1070,15 +1047,16 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     else {
 
       /* Loop over the parts in cj. */
-      for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sort_j[pjd].i];
-        hj = pj->h;
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -1144,36 +1122,34 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-  /* printf( "runner_dopair: first half took %.3f %s...\n" ,
-  clocks_from_ticks(getticks() - tic), clocks_getunit());
-  tic = getticks(); */
-
   /* Loop over the parts in cj. */
-  for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
+  for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
        pjd++) {
 
     /* Get a hold of the jth part in cj. */
-    pj = &parts_j[sort_j[pjd].i];
-    hj = pj->h;
-    dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
+    struct part *restrict pj = &parts_j[sort_j[pjd].i];
+    const float hj = pj->h;
+    const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
     if (dj > di_max) continue;
 
-    for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
-    hjg2 = hj * hj * kernel_gamma2;
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
 
     /* Is this particle outside the dt? */
     if (pj->ti_end > ti_current) {
 
       /* Loop over the parts in ci. */
-      for (pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) {
+      for (int pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) {
 
         /* Get a pointer to the jth particle. */
-        pi = &parts_i[sortdt_i[pid].i];
-        hi = pi->h;
+        struct part *restrict pi = &parts_i[sortdt_i[pid].i];
+        const float hi = pi->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pi->x[k] - pjx[k];
           r2 += dx[k] * dx[k];
         }
@@ -1214,15 +1190,16 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     else {
 
       /* Loop over the parts in ci. */
-      for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
+      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
 
         /* Get a pointer to the jth particle. */
-        pi = &parts_i[sort_i[pid].i];
-        hi = pi->h;
+        struct part *restrict pi = &parts_i[sort_i[pid].i];
+        const float hi = pi->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pjx[k] - pi->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -1291,10 +1268,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount1 > 0)
-    for (k = 0; k < icount1; k++)
+    for (int k = 0; k < icount1; k++)
       IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
   if (icount2 > 0)
-    for (k = 0; k < icount2; k++)
+    for (int k = 0; k < icount2; k++)
       IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
 #endif
 
@@ -1302,20 +1279,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 }
 
 /**
- * @brief Compute the cell self-interaction.
+ * @brief Compute the cell self-interaction (non-symmetric).
  *
  * @param r The #runner.
  * @param c The #cell.
  */
-
 void DOSELF1(struct runner *r, struct cell *restrict c) {
 
-  int k, pid, pjd, count = c->count;
-  double pix[3];
-  float dx[3], hi, hj, hig2, r2;
-  struct part *restrict parts = c->parts, *restrict pi, *restrict pj;
   const int ti_current = r->e->ti_current;
-  int firstdt = 0, countdt = 0, *indt = NULL, doj;
+
 #ifdef VECTORIZE
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1332,43 +1304,49 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 #endif
   TIMER_TIC
 
-  /* Set up indt if needed. */
-  if (c->ti_end_min > ti_current)
-    return;
-  else if (c->ti_end_max > ti_current) {
-    if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
-      error("Failed to allocate indt.");
-    for (k = 0; k < count; k++)
-      if (parts[k].ti_end <= ti_current) {
-        indt[countdt] = k;
-        countdt += 1;
-      }
-  }
+  if (c->ti_end_min > ti_current) return;
+  if (c->ti_end_max < ti_current) error("Cell in an impossible time-zone");
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+
+  /* Set up indt. */
+  int *indt = NULL;
+  int countdt = 0, firstdt = 0;
+  if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
+    error("Failed to allocate indt.");
+  for (int k = 0; k < count; k++)
+    if (parts[k].ti_end <= ti_current) {
+      indt[countdt] = k;
+      countdt += 1;
+    }
 
   /* Loop over the particles in the cell. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a pointer to the ith particle. */
-    pi = &parts[pid];
+    struct part *restrict pi = &parts[pid];
 
     /* Get the particle position and radius. */
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Is the ith particle inactive? */
     if (pi->ti_end > ti_current) {
 
       /* Loop over the other particles .*/
-      for (pjd = firstdt; pjd < countdt; pjd++) {
+      for (int pjd = firstdt; pjd < countdt; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[indt[pjd]];
-        hj = pj->h;
+        struct part *restrict pj = &parts[indt[pjd]];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pj->x[k] - pix[k];
           r2 += dx[k] * dx[k];
         }
@@ -1413,19 +1391,21 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
       firstdt += 1;
 
       /* Loop over the other particles .*/
-      for (pjd = pid + 1; pjd < count; pjd++) {
+      for (int pjd = pid + 1; pjd < count; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[pjd];
-        hj = pj->h;
+        struct part *restrict pj = &parts[pjd];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
-        doj = (pj->ti_end <= ti_current) && (r2 < hj * hj * kernel_gamma2);
+        const int doj =
+            (pj->ti_end <= ti_current) && (r2 < hj * hj * kernel_gamma2);
 
         /* Hit or miss? */
         if (r2 < hig2 || doj) {
@@ -1516,24 +1496,26 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount1 > 0)
-    for (k = 0; k < icount1; k++)
+    for (int k = 0; k < icount1; k++)
       IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
   if (icount2 > 0)
-    for (k = 0; k < icount2; k++)
+    for (int k = 0; k < icount2; k++)
       IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
 #endif
 
   TIMER_TOC(TIMER_DOSELF);
 }
 
+/**
+ * @brief Compute the cell self-interaction (symmetric).
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
 void DOSELF2(struct runner *r, struct cell *restrict c) {
 
-  int k, pid, pjd, count = c->count;
-  double pix[3];
-  float dx[3], hi, hj, hig2, r2;
-  struct part *restrict parts = c->parts, *restrict pi, *restrict pj;
   const int ti_current = r->e->ti_current;
-  int firstdt = 0, countdt = 0, *indt = NULL;
+
 #ifdef VECTORIZE
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1550,43 +1532,49 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 #endif
   TIMER_TIC
 
-  /* Set up indt if needed. */
-  if (c->ti_end_min > ti_current)
-    return;
-  else if (c->ti_end_max > ti_current) {
-    if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
-      error("Failed to allocate indt.");
-    for (k = 0; k < count; k++)
-      if (parts[k].ti_end <= ti_current) {
-        indt[countdt] = k;
-        countdt += 1;
-      }
-  }
+  if (c->ti_end_min > ti_current) return;
+  if (c->ti_end_max < ti_current) error("Cell in an impossible time-zone");
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+
+  /* Set up indt. */
+  int *indt = NULL;
+  int countdt = 0, firstdt = 0;
+  if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
+    error("Failed to allocate indt.");
+  for (int k = 0; k < count; k++)
+    if (parts[k].ti_end <= ti_current) {
+      indt[countdt] = k;
+      countdt += 1;
+    }
 
   /* Loop over the particles in the cell. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a pointer to the ith particle. */
-    pi = &parts[pid];
+    struct part *restrict pi = &parts[pid];
 
     /* Get the particle position and radius. */
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Is the ith particle not active? */
     if (pi->ti_end > ti_current) {
 
       /* Loop over the other particles .*/
-      for (pjd = firstdt; pjd < countdt; pjd++) {
+      for (int pjd = firstdt; pjd < countdt; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[indt[pjd]];
-        hj = pj->h;
+        struct part *restrict pj = &parts[indt[pjd]];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pj->x[k] - pix[k];
           r2 += dx[k] * dx[k];
         }
@@ -1631,15 +1619,16 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
       firstdt += 1;
 
       /* Loop over the other particles .*/
-      for (pjd = pid + 1; pjd < count; pjd++) {
+      for (int pjd = pid + 1; pjd < count; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[pjd];
-        hj = pj->h;
+        struct part *restrict pj = &parts[pjd];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -1708,10 +1697,10 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount1 > 0)
-    for (k = 0; k < icount1; k++)
+    for (int k = 0; k < icount1; k++)
       IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
   if (icount2 > 0)
-    for (k = 0; k < icount2; k++)
+    for (int k = 0; k < icount2; k++)
       IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
 #endif
 
@@ -2290,17 +2279,14 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) {
 void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
                   int *ind, int count, struct cell *cj, int sid, int gettimer) {
 
-  int j, k;
-  double shift[3];
-  float h;
   struct space *s = r->e->s;
-  struct cell *sub = NULL;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC
 
   /* Find out in which sub-cell of ci the parts are. */
-  for (k = 0; k < 8; k++)
+  struct cell *sub = NULL;
+  for (int k = 0; k < 8; k++)
     if (ci->progeny[k] != NULL) {
       // if ( parts[ ind[ 0 ] ].x[0] >= ci->progeny[k]->loc[0] &&
       //      parts[ ind[ 0 ] ].x[0] <= ci->progeny[k]->loc[0] +
@@ -2326,7 +2312,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
 
       /* Loop over all progeny. */
       DOSUB_SUBSET(r, sub, parts, ind, count, NULL, -1, 0);
-      for (j = 0; j < 8; j++)
+      for (int j = 0; j < 8; j++)
         if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
           DOSUB_SUBSET(r, sub, parts, ind, count, ci->progeny[j], -1, 0);
 
@@ -2342,7 +2328,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
   else {
 
     /* Get the cell dimensions. */
-    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
+    const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
 
     /* Recurse? */
     if (ci->split && cj->split &&
@@ -2350,6 +2336,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
             h / 2) {
 
       /* Get the type of pair if not specified explicitly. */
+      double shift[3] = {0.0, 0.0, 0.0};
       sid = space_getsid(s, &ci, &cj, shift);
 
       /* Different types of flags. */
@@ -2858,7 +2845,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
     else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
 
       /* Get the relative distance between the pairs, wrapping. */
-      for (k = 0; k < 3; k++) {
+      double shift[3] = {0.0, 0.0, 0.0};
+      for (int k = 0; k < 3; k++) {
         if (cj->loc[k] - ci->loc[k] < -s->dim[k] / 2)
           shift[k] = s->dim[k];
         else if (cj->loc[k] - ci->loc[k] > s->dim[k] / 2)
@@ -2866,7 +2854,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
       }
 
       /* Get the sorting index. */
-      for (sid = 0, k = 0; k < 3; k++)
+      int sid = 0;
+      for (int k = 0; k < 3; k++)
         sid =
             3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                            ? 0