diff --git a/examples/Makefile.am b/examples/Makefile.am
index 15fceb236057d21ba878cb09da39aca5bb70b692..735817c24cc52786e4c562e46e3619fb4a9a2e34 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -1,4 +1,3 @@
-
 # This file is part of SWIFT.
 # Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
 #                    Matthieu Schaller (matthieu.schaller@durham.ac.uk).
diff --git a/examples/main.c b/examples/main.c
index 6422ccceef0a5f5c5f3b5acce9ba60b957986aac..fa0ac2567ac253568903ce8c47b2c9c38fee3704 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -93,7 +93,6 @@ void print_help_message() {
  * @brief Main routine that loads a few particles and generates some output.
  *
  */
-
 int main(int argc, char *argv[]) {
 
   struct clocks_time tic, toc;
@@ -482,10 +481,8 @@ int main(int argc, char *argv[]) {
     if (j % 100 == 2) e.forcerepart = reparttype;
 #endif
 
+    /* Reset timers */
     timers_reset(timers_mask_all);
-#ifdef COUNTER
-    for (k = 0; k < runner_counter_count; k++) runner_counter[k] = 0;
-#endif
 
     /* Take a step. */
     engine_step(&e);
diff --git a/src/approx_math.h b/src/approx_math.h
index ef93ea63c383c74caa3eaff65446962872389a35..cbca602b3fafcc5044b0939b2207b8f9d50a7446 100644
--- a/src/approx_math.h
+++ b/src/approx_math.h
@@ -19,6 +19,8 @@
 #ifndef SWIFT_APPROX_MATH_H
 #define SWIFT_APPROX_MATH_H
 
+#include "inline.h"
+
 /**
  * @brief Approximate version of expf(x) using a 4th order Taylor expansion
  *
diff --git a/src/cell.h b/src/cell.h
index 65665c9dcfcd652cc688f652008044d618b10790..ad39354c6ced334c8c6dfa8420dc6c7f649009a3 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -27,8 +27,9 @@
 #include "lock.h"
 #include "multipole.h"
 #include "part.h"
+#include "task.h"
 
-/* Forward declaration of space, needed for cell_unpack. */
+/* Avoid cyclic inclusions */
 struct space;
 
 /* Max tag size set to 2^29 to take into account some MPI implementations
diff --git a/src/common_io.c b/src/common_io.c
index 4399c83f7ebd2bb5355b7df9cc1fdecd301e36aa..03d5618cd0cafe5ff6067cf72107baacffb5d85a 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -43,6 +43,8 @@
 #include "const.h"
 #include "error.h"
 #include "kernel_hydro.h"
+#include "part.h"
+#include "units.h"
 #include "version.h"
 
 const char* particle_type_names[NUM_PARTICLE_TYPES] = {
diff --git a/src/common_io.h b/src/common_io.h
index ed1c96801c904d9952c7b3ca995b847afdc3fb43..fa6811a26671a2ed85c12ada7bb380094f55795d 100644
--- a/src/common_io.h
+++ b/src/common_io.h
@@ -23,13 +23,11 @@
 /* Config parameters. */
 #include "../config.h"
 
-/* Includes. */
-#include "kernel_hydro.h"
+#if defined(HAVE_HDF5)
+
 #include "part.h"
 #include "units.h"
 
-#if defined(HAVE_HDF5)
-
 /**
  * @brief The different types of data used in the GADGET IC files.
  *
@@ -107,6 +105,6 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName,
 void writeCodeDescription(hid_t h_file);
 void writeUnitSystem(hid_t h_file, struct UnitSystem* us);
 
-#endif
+#endif /* defined HDF5 */
 
 #endif /* SWIFT_COMMON_IO_H */
diff --git a/src/debug.c b/src/debug.c
index a3647915d96a9456cb6d8177d144dab56fd0b97e..77238ab135e3f27b8c2c43b0ec18e7745493b138 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -20,11 +20,19 @@
  *
  ******************************************************************************/
 
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
 #include <stdio.h>
 
+/* This object's header. */
+#include "debug.h"
+
+/* Local includes. */
 #include "config.h"
 #include "const.h"
-#include "debug.h"
+#include "inline.h"
 #include "part.h"
 
 /* Import the right hydro definition */
@@ -51,7 +59,6 @@
  *
  * (Should be used for debugging only as it runs in O(N).)
  */
-
 void printParticle(struct part *parts, struct xpart *xparts, long long int id,
                    size_t N) {
 
@@ -69,6 +76,17 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id,
   if (!found) printf("## Particles[???] id=%lld not found\n", id);
 }
 
+/**
+ * @brief Looks for the g-particle with the given id and prints its information
+ * to
+ * the standard output.
+ *
+ * @param gparts The array of g-particles.
+ * @param id The id too look for.
+ * @param N The size of the array of g-particles.
+ *
+ * (Should be used for debugging only as it runs in O(N).)
+ */
 void printgParticle(struct gpart *gparts, long long int id, size_t N) {
 
   int found = 0;
@@ -95,9 +113,7 @@ void printgParticle(struct gpart *gparts, long long int id, size_t N) {
  *
  * @param p The particle to print
  * @param xp The extended data ot the particle to print
- *
  */
-
 void printParticle_single(struct part *p, struct xpart *xp) {
 
   printf("## Particle: id=%lld ", p->id);
@@ -105,6 +121,18 @@ void printParticle_single(struct part *p, struct xpart *xp) {
   printf("\n");
 }
 
+/**
+ * @brief Prints the details of a given particle to stdout
+ *
+ * @param gp The g-particle to print
+ */
+void printgParticle_single(struct gpart *gp) {
+
+  printf("## g-Particle: id=%lld ", gp->id);
+  gravity_debug_particle(gp);
+  printf("\n");
+}
+
 #ifdef HAVE_METIS
 
 /**
diff --git a/src/debug.h b/src/debug.h
index fba5cb68680472715025f9fee39a5bf06abacf81..13be15adb867e8bafe95e2900f68caaa36192510 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -19,14 +19,15 @@
 #ifndef SWIFT_DEBUG_H
 #define SWIFT_DEBUG_H
 
-/* Includes. */
-#include "cell.h"
-#include "part.h"
+struct part;
+struct gpart;
+struct xpart;
 
 void printParticle(struct part *parts, struct xpart *xparts, long long int id,
                    size_t N);
 void printgParticle(struct gpart *parts, long long int id, size_t N);
 void printParticle_single(struct part *p, struct xpart *xp);
+void printgParticle_single(struct gpart *gp);
 
 #ifdef HAVE_METIS
 #include "metis.h"
diff --git a/src/engine.c b/src/engine.c
index aa65f45678fae1e21276422b69f1c76cd7e9eaef..bc2bf839c9a8108bbedaa7de1f9b138348374ffa 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -59,9 +59,12 @@
 #include "parallel_io.h"
 #include "part.h"
 #include "partition.h"
+#include "proxy.h"
+#include "runner.h"
 #include "serial_io.h"
 #include "single_io.h"
 #include "timers.h"
+#include "units.h"
 
 const char *engine_policy_names[13] = {"none",
                                        "rand",
diff --git a/src/engine.h b/src/engine.h
index 6c17de54ba62a95c4a5feb3c126390a5b5877d05..d1bfecc568355a9cf5aa57590ccba1df81b05b8a 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -37,13 +37,10 @@
 #include <stdio.h>
 
 /* Includes. */
-#include "hydro_properties.h"
-#include "lock.h"
+#include "clocks.h"
 #include "parser.h"
 #include "partition.h"
-#include "physical_constants.h"
 #include "potentials.h"
-#include "proxy.h"
 #include "runner.h"
 #include "scheduler.h"
 #include "space.h"
diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h
index 4d651b61bd934388414d54fa813820111d26e682..0a577931b5e0ca67ab07dfe414a548da66e82cdd 100644
--- a/src/hydro/Default/hydro_iact.h
+++ b/src/hydro/Default/hydro_iact.h
@@ -20,12 +20,6 @@
 #ifndef SWIFT_RUNNER_IACT_H
 #define SWIFT_RUNNER_IACT_H
 
-/* Includes. */
-#include "const.h"
-#include "kernel_hydro.h"
-#include "part.h"
-#include "vector.h"
-
 /**
  * @brief SPH interaction functions following the Gadget-2 version of SPH.
  *
@@ -44,7 +38,6 @@
 /**
  * @brief Density loop
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -218,7 +211,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density(
 /**
  * @brief Density loop (non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -267,7 +259,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
 /**
  * @brief Density loop (non-symmetric vectorized version)
  */
-
 __attribute__((always_inline)) INLINE static void
 runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
                                struct part **pi, struct part **pj) {
@@ -360,7 +351,6 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
 /**
  * @brief Force loop
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -456,7 +446,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
 /**
  * @brief Force loop (Vectorized version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
     float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
     struct part **pj) {
@@ -675,7 +664,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
 /**
  * @brief Force loop (non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -766,7 +754,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
 /**
  * @brief Force loop (Vectorized non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
     float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
     struct part **pj) {
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index 8fcae293280e55bb838edf3c243f4e322914216f..8738b4be09931df4c938f1dff3adeed11468dcfc 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -20,12 +20,6 @@
 #ifndef SWIFT_RUNNER_IACT_LEGACY_H
 #define SWIFT_RUNNER_IACT_LEGACY_H
 
-/* Includes. */
-#include "const.h"
-#include "kernel_hydro.h"
-#include "part.h"
-#include "vector.h"
-
 /**
  * @brief SPH interaction functions following the Gadget-2 version of SPH.
  *
@@ -42,7 +36,6 @@
 /**
  * @brief Density loop
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -113,7 +106,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
 /**
  * @brief Density loop (non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -162,7 +154,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
 /**
  * @brief Force loop
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -260,7 +251,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
 /**
  * @brief Force loop (non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h
index f453d8fa1c495472af27ccf98356a3dab0894e98..c9da185b8a29eafe2a58420ae5de3a05ff043225 100644
--- a/src/hydro/Minimal/hydro_iact.h
+++ b/src/hydro/Minimal/hydro_iact.h
@@ -19,12 +19,6 @@
 #ifndef SWIFT_RUNNER_IACT_MINIMAL_H
 #define SWIFT_RUNNER_IACT_MINIMAL_H
 
-/* Includes. */
-#include "const.h"
-#include "kernel_hydro.h"
-#include "part.h"
-#include "vector.h"
-
 /**
  * @brief Minimal conservative implementation of SPH
  *
@@ -70,7 +64,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
 /**
  * @brief Density loop (non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -95,7 +88,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
 /**
  * @brief Force loop
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
@@ -168,7 +160,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
 /**
  * @brief Force loop (non-symmetric version)
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
diff --git a/src/lock.h b/src/lock.h
index 5eb97de15f81d17bad45e85226cb0881d24482ba..ca7f01ee029cd1c57ed8fd0f3237ea54cb43e9a7 100644
--- a/src/lock.h
+++ b/src/lock.h
@@ -24,7 +24,6 @@
 
 /* Includes. */
 #include "atomic.h"
-#include "inline.h"
 
 #ifdef PTHREAD_SPINLOCK
 #include <pthread.h>
diff --git a/src/parallel_io.c b/src/parallel_io.c
index e779b56a85da3db72ea860fb336fa42037fbcd0e..c5cac1cb5efc6e533e599867e39cdd7c7b2c87fa 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -37,7 +37,11 @@
 
 /* Local includes. */
 #include "common_io.h"
+#include "engine.h"
 #include "error.h"
+#include "kernel_hydro.h"
+#include "part.h"
+#include "units.h"
 
 /**
  * @brief Reads a data array from a given HDF5 group.
diff --git a/src/parallel_io.h b/src/parallel_io.h
index 970ad8c41dcbc2df3a85b178f836e16926147788..26757cb679e475d6acd2ce3c408135dfe5e49081 100644
--- a/src/parallel_io.h
+++ b/src/parallel_io.h
@@ -19,6 +19,9 @@
 #ifndef SWIFT_PARALLEL_IO_H
 #define SWIFT_PARALLEL_IO_H
 
+/* Config parameters. */
+#include "../config.h"
+
 /* MPI headers. */
 #ifdef WITH_MPI
 #include <mpi.h>
diff --git a/src/part.h b/src/part.h
index 5d4c9c88a1acadea3d23a3df618c04da389fb61d..e99be6e51a9bd74dd9eec8f590e80989e83ec2e1 100644
--- a/src/part.h
+++ b/src/part.h
@@ -22,9 +22,6 @@
 /* Config parameters. */
 #include "../config.h"
 
-/* Some standard headers. */
-#include <stdlib.h>
-
 /* MPI headers. */
 #ifdef WITH_MPI
 #include <mpi.h>
@@ -38,7 +35,7 @@
 #define xpart_align 32
 #define gpart_align 32
 
-/* Import the right particle definition */
+/* Import the right hydro particle definition */
 #if defined(MINIMAL_SPH)
 #include "./hydro/Minimal/hydro_part.h"
 #elif defined(GADGET2_SPH)
@@ -49,6 +46,7 @@
 #error "Invalid choice of SPH variant"
 #endif
 
+/* Import the right gravity particle definition */
 #include "./gravity/Default/gravity_part.h"
 
 #ifdef WITH_MPI
diff --git a/src/runner.c b/src/runner.c
index addeebda93a4d3f4b031f6fdc1f9eb856adc0a32..d38b52ce38f6fc16d97fc642b3f4d6890d9387d2 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -40,6 +40,7 @@
 /* Local headers. */
 #include "approx_math.h"
 #include "atomic.h"
+#include "cell.h"
 #include "const.h"
 #include "debug.h"
 #include "drift.h"
@@ -135,7 +136,6 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
  * @param sort The entries
  * @param N The number of entries.
  */
-
 void runner_do_sort_ascending(struct entry *sort, int N) {
 
   struct {
@@ -217,7 +217,6 @@ void runner_do_sort_ascending(struct entry *sort, int N) {
  * @param clock Flag indicating whether to record the timing or not, needed
  *      for recursive calls.
  */
-
 void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) {
 
   struct entry *finger;
@@ -423,7 +422,6 @@ void runner_do_init(struct runner *r, struct cell *c, int timer) {
  * @param r The runner thread.
  * @param c The cell.
  */
-
 void runner_do_ghost(struct runner *r, struct cell *c) {
 
   struct part *p, *parts = c->parts;
diff --git a/src/runner.h b/src/runner.h
index 758b6cf57dbc1a46b6fc068a23615f3b28c8707e..6838b959955c4e54e208b8d2d16339e7fdb1740f 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -23,13 +23,12 @@
 #ifndef SWIFT_RUNNER_H
 #define SWIFT_RUNNER_H
 
-/* Includes. */
-#include "cell.h"
-#include "inline.h"
-
 extern const double runner_shift[13][3];
 extern const char runner_flip[27];
 
+struct cell;
+struct engine;
+
 /* A struct representing a runner's thread and its data. */
 struct runner {
 
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index bf16cb8f82f0d1d9a0d9b9d955831ada152f4307..4da83b940d55460c4bf8d2f650534aedf94dbd5d 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1,6 +1,7 @@
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -17,10 +18,6 @@
  *
  ******************************************************************************/
 
-/* Includes. */
-#include "cell.h"
-#include "part.h"
-
 /* Before including this file, define FUNCTION, which is the
    name of the interaction function. This creates the interaction functions
    runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
@@ -77,6 +74,12 @@
 #define _IACT(f) PASTE(runner_iact, f)
 #define IACT _IACT(FUNCTION)
 
+#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
+#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
+
+#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
+#define IACT_VEC _IACT_VEC(FUNCTION)
+
 #define _TIMER_DOSELF(f) PASTE(timer_doself, f)
 #define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
 
@@ -95,12 +98,6 @@
 #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
 #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
 
-#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
-#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
-
-#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
-#define IACT_VEC _IACT_VEC(FUNCTION)
-
 /**
  * @brief Compute the interactions between a cell pair.
  *
@@ -108,18 +105,14 @@
  * @param ci The first #cell.
  * @param cj The second #cell.
  */
-
 void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
                   struct cell *restrict cj) {
 
-  struct engine *e = r->e;
-  int pid, pjd, k, count_i = ci->count, count_j = cj->count;
-  double shift[3] = {0.0, 0.0, 0.0};
-  struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts;
-  struct part *restrict pi, *restrict pj;
-  double pix[3];
-  float dx[3], hi, hig2, r2;
+  const struct engine *e = r->e;
   const int ti_current = e->ti_current;
+
+  error("Don't use in actual runs ! Slow code !");
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -128,44 +121,47 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+
   /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
     if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
       shift[k] = e->s->dim[k];
     else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
       shift[k] = -e->s->dim[k];
   }
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
-
   /* Loop over the parts in ci. */
-  for (pid = 0; pid < count_i; pid++) {
+  for (int pid = 0; pid < count_i; pid++) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts_i[pid];
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    struct part *restrict pi = &parts_i[pid];
+    const float hi = pi->h;
+
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_j; pjd++) {
+    for (int pjd = 0; pjd < count_j; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts_j[pjd];
+      struct part *restrict pj = &parts_j[pjd];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -206,7 +202,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -215,12 +211,10 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
 
 void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 
-  int pid, pjd, k, count = c->count;
-  struct part *restrict parts = c->parts;
-  struct part *restrict pi, *restrict pj;
-  double pix[3] = {0.0, 0.0, 0.0};
-  float dx[3], hi, hig2, r2;
   const int ti_current = r->e->ti_current;
+
+  error("Don't use in actual runs ! Slow code !");
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -229,38 +223,34 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (c->ti_end_min > ti_current) return;
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
+  const int count = c->count;
+  struct part *restrict parts = c->parts;
 
   /* Loop over the parts in ci. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts[pid];
-    pix[0] = pi->x[0];
-    pix[1] = pi->x[1];
-    pix[2] = pi->x[2];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    struct part *restrict pi = &parts[pid];
+    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = pid + 1; pjd < count; pjd++) {
+    for (int pjd = pid + 1; pjd < count; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts[pjd];
+      struct part *restrict pj = &parts[pjd];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -301,7 +291,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -319,18 +309,121 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
  * @param count The number of particles in @c ind.
  * @param cj The second #cell.
  */
+void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
+                         struct part *restrict parts_i, int *restrict ind,
+                         int count, struct cell *restrict cj) {
+
+  struct engine *e = r->e;
+
+  error("Don't use in actual runs ! Slow code !");
+
+#ifdef VECTORIZE
+  int icount = 0;
+  float r2q[VEC_SIZE] __attribute__((aligned(16)));
+  float hiq[VEC_SIZE] __attribute__((aligned(16)));
+  float hjq[VEC_SIZE] __attribute__((aligned(16)));
+  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
+  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
+#endif
+
+  TIMER_TIC
+
+  const int count_j = cj->count;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Loop over the parts_i. */
+  for (int pid = 0; pid < count; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[ind[pid]];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+
+#ifndef VECTORIZE
+
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+
+#else
+
+        /* Add this interaction to the queue. */
+        r2q[icount] = r2;
+        dxq[3 * icount + 0] = dx[0];
+        dxq[3 * icount + 1] = dx[1];
+        dxq[3 * icount + 2] = dx[2];
+        hiq[icount] = hi;
+        hjq[icount] = pj->h;
+        piq[icount] = pi;
+        pjq[icount] = pj;
+        icount += 1;
+
+        /* Flush? */
+        if (icount == VEC_SIZE) {
+          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
+          icount = 0;
+        }
+
+#endif
+      }
+
+    } /* loop over the parts in cj. */
+
+  } /* loop over the parts in ci. */
 
+#ifdef VECTORIZE
+  /* Pick up any leftovers. */
+  if (icount > 0)
+    for (int k = 0; k < icount; k++)
+      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
+#endif
+
+  TIMER_TOC(timer_dopair_subset);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param parts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param count The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
 void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts_i, int *restrict ind, int count,
                    struct cell *restrict cj) {
 
   struct engine *e = r->e;
-  int pid, pjd, sid, k, count_j = cj->count, flipped;
-  double shift[3] = {0.0, 0.0, 0.0};
-  struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
-  double pix[3];
-  float dx[3], hi, hig2, r2, di, dxj;
-  struct entry *sort_j;
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -339,10 +432,15 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
+  const int count_j = cj->count;
+  struct part *restrict parts_j = cj->parts;
+
   /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
     if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
       shift[k] = e->s->dim[k];
     else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
@@ -350,52 +448,50 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
   }
 
   /* Get the sorting index. */
-  for (sid = 0, k = 0; k < 3; k++)
+  int sid = 0;
+  for (int k = 0; k < 3; k++)
     sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                          ? 0
                          : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
 
   /* Switch the cells around? */
-  flipped = runner_flip[sid];
+  const int flipped = runner_flip[sid];
   sid = sortlistID[sid];
 
   /* Have the cells been sorted? */
   if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells.");
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
-
   /* Pick-out the sorted lists. */
-  sort_j = &cj->sort[sid * (cj->count + 1)];
-  dxj = cj->dx_max;
+  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
+  const float dxj = cj->dx_max;
 
   /* Parts are on the left? */
   if (!flipped) {
 
     /* Loop over the parts_i. */
-    for (pid = 0; pid < count; pid++) {
+    for (int pid = 0; pid < count; pid++) {
 
       /* Get a hold of the ith part in ci. */
-      pi = &parts_i[ind[pid]];
-      for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-      hi = pi->h;
-      hig2 = hi * hi * kernel_gamma2;
-      di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
-           pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
+      struct part *restrict pi = &parts_i[ind[pid]];
+      double pix[3];
+      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+
+      const float hi = pi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
+                       pix[1] * runner_shift[sid][1] +
+                       pix[2] * runner_shift[sid][2];
 
       /* Loop over the parts in cj. */
-      for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sort_j[pjd].i];
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -439,25 +535,28 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
   else {
 
     /* Loop over the parts_i. */
-    for (pid = 0; pid < count; pid++) {
+    for (int pid = 0; pid < count; pid++) {
 
       /* Get a hold of the ith part in ci. */
-      pi = &parts_i[ind[pid]];
-      for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-      hi = pi->h;
-      hig2 = hi * hi * kernel_gamma2;
-      di = -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
-           pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
+      struct part *restrict pi = &parts_i[ind[pid]];
+      double pix[3];
+      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+      const float hi = pi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float di =
+          -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
+          pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
 
       /* Loop over the parts in cj. */
-      for (pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
+      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sort_j[pjd].i];
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -499,119 +598,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
-      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
-#endif
-
-  TIMER_TOC(timer_dopair_subset);
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param parts_i The #part to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param count The number of particles in @c ind.
- * @param cj The second #cell.
- */
-
-void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
-                         struct part *restrict parts_i, int *restrict ind,
-                         int count, struct cell *restrict cj) {
-
-  struct engine *e = r->e;
-  int pid, pjd, k, count_j = cj->count;
-  double shift[3] = {0.0, 0.0, 0.0};
-  struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
-  double pix[3];
-  float dx[3], hi, hig2, r2;
-#ifdef VECTORIZE
-  int icount = 0;
-  float r2q[VEC_SIZE] __attribute__((aligned(16)));
-  float hiq[VEC_SIZE] __attribute__((aligned(16)));
-  float hjq[VEC_SIZE] __attribute__((aligned(16)));
-  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
-  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
-#endif
-  TIMER_TIC
-
-  /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
-
-  /* Loop over the parts_i. */
-  for (pid = 0; pid < count; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    pi = &parts_i[ind[pid]];
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
-
-    /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      pj = &parts_j[pjd];
-
-      /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
-        dx[k] = pix[k] - pj->x[k];
-        r2 += dx[k] * dx[k];
-      }
-
-      /* Hit or miss? */
-      if (r2 < hig2) {
-
-#ifndef VECTORIZE
-
-        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
-
-#else
-
-        /* Add this interaction to the queue. */
-        r2q[icount] = r2;
-        dxq[3 * icount + 0] = dx[0];
-        dxq[3 * icount + 1] = dx[1];
-        dxq[3 * icount + 2] = dx[2];
-        hiq[icount] = hi;
-        hjq[icount] = pj->h;
-        piq[icount] = pi;
-        pjq[icount] = pj;
-        icount += 1;
-
-        /* Flush? */
-        if (icount == VEC_SIZE) {
-          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
-          icount = 0;
-        }
-
-#endif
-      }
-
-    } /* loop over the parts in cj. */
-
-  } /* loop over the parts in ci. */
-
-#ifdef VECTORIZE
-  /* Pick up any leftovers. */
-  if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -628,15 +615,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
  * @param ind The list of indices of particles in @c ci to interact with.
  * @param count The number of particles in @c ind.
  */
-
 void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts, int *restrict ind, int count) {
 
-  int pid, pjd, k, count_i = ci->count;
-  struct part *restrict parts_j = ci->parts;
-  struct part *restrict pi, *restrict pj;
-  double pix[3] = {0.0, 0.0, 0.0};
-  float dx[3], hi, hig2, r2;
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -645,35 +626,31 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
-  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
-  %i/%i parts and shift = [ %g %g %g ].\n" ,
-      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
-  cj->loc[2] ,
-      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
-  tic = getticks(); */
+  const int count_i = ci->count;
+  struct part *restrict parts_j = ci->parts;
 
   /* Loop over the parts in ci. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts[ind[pid]];
-    pix[0] = pi->x[0];
-    pix[1] = pi->x[1];
-    pix[2] = pi->x[2];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    struct part *restrict pi = &parts[ind[pid]];
+    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_i; pjd++) {
+    for (int pjd = 0; pjd < count_i; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts_j[pjd];
+      struct part *restrict pj = &parts_j[pjd];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -714,7 +691,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
@@ -722,26 +699,17 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 }
 
 /**
- * @brief Compute the interactions between a cell pair.
+ * @brief Compute the interactions between a cell pair (non-symmetric).
  *
  * @param r The #runner.
  * @param ci The first #cell.
  * @param cj The second #cell.
  */
-
 void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   struct engine *restrict e = r->e;
-  int pid, pjd, k, sid;
-  double rshift, shift[3] = {0.0, 0.0, 0.0};
-  struct entry *restrict sort_i, *restrict sort_j;
-  struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
-  double pix[3], pjx[3], di, dj;
-  float dx[3], hi, hig2, hj, hjg2, r2, dx_max;
-  double hi_max, hj_max;
-  double di_max, dj_min;
-  int count_i, count_j;
   const int ti_current = e->ti_current;
+
 #ifdef VECTORIZE
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
@@ -750,60 +718,64 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
   float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the sort ID. */
-  sid = space_getsid(e->s, &ci, &cj, shift);
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
 
   /* Have the cells been sorted? */
   if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
     error("Trying to interact unsorted cells.");
 
   /* Get the cutoff shift. */
-  for (rshift = 0.0, k = 0; k < 3; k++)
-    rshift += shift[k] * runner_shift[sid][k];
+  double rshift = 0.0;
+  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
 
   /* Pick-out the sorted lists. */
-  sort_i = &ci->sort[sid * (ci->count + 1)];
-  sort_j = &cj->sort[sid * (cj->count + 1)];
+  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
+  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
 
   /* Get some other useful values. */
-  hi_max = ci->h_max * kernel_gamma - rshift;
-  hj_max = cj->h_max * kernel_gamma;
-  count_i = ci->count;
-  count_j = cj->count;
-  parts_i = ci->parts;
-  parts_j = cj->parts;
-  di_max = sort_i[count_i - 1].d - rshift;
-  dj_min = sort_j[0].d;
-  dx_max = (ci->dx_max + cj->dx_max);
+  const double hi_max = ci->h_max * kernel_gamma - rshift;
+  const double hj_max = cj->h_max * kernel_gamma;
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+  const double di_max = sort_i[count_i - 1].d - rshift;
+  const double dj_min = sort_j[0].d;
+  const float dx_max = (ci->dx_max + cj->dx_max);
 
   /* Loop over the parts in ci. */
-  for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min;
-       pid--) {
+  for (int pid = count_i - 1;
+       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts_i[sort_i[pid].i];
+    struct part *restrict pi = &parts_i[sort_i[pid].i];
     if (pi->ti_end > ti_current) continue;
-    hi = pi->h;
-    di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
+    const float hi = pi->h;
+    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
     if (di < dj_min) continue;
 
-    hig2 = hi * hi * kernel_gamma2;
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Loop over the parts in cj. */
-    for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+    for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
       /* Get a pointer to the jth particle. */
-      pj = &parts_j[sort_j[pjd].i];
+      struct part *restrict pj = &parts_j[sort_j[pjd].i];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pix[k] - pj->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -841,33 +813,31 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-  /* printf( "runner_dopair: first half took %.3f %s...\n" ,
-  clocks_from_ticks(getticks() - tic), clocks_getunit());
-  tic = getticks(); */
-
   /* Loop over the parts in cj. */
-  for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
+  for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
        pjd++) {
 
     /* Get a hold of the jth part in cj. */
-    pj = &parts_j[sort_j[pjd].i];
+    struct part *restrict pj = &parts_j[sort_j[pjd].i];
     if (pj->ti_end > ti_current) continue;
-    hj = pj->h;
-    dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
+    const float hj = pj->h;
+    const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
     if (dj > di_max) continue;
 
-    for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
-    hjg2 = hj * hj * kernel_gamma2;
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
 
     /* Loop over the parts in ci. */
-    for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
+    for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
 
       /* Get a pointer to the jth particle. */
-      pi = &parts_i[sort_i[pid].i];
+      struct part *restrict pi = &parts_i[sort_i[pid].i];
 
       /* Compute the pairwise distance. */
-      r2 = 0.0f;
-      for (k = 0; k < 3; k++) {
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
         dx[k] = pjx[k] - pi->x[k];
         r2 += dx[k] * dx[k];
       }
@@ -908,28 +878,25 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount > 0)
-    for (k = 0; k < icount; k++)
+    for (int k = 0; k < icount; k++)
       IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
 #endif
 
   TIMER_TOC(TIMER_DOPAIR);
 }
 
+/**
+ * @brief Compute the interactions between a cell pair (symmetric)
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
 void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   struct engine *restrict e = r->e;
-  int pid, pjd, k, sid;
-  double rshift, shift[3] = {0.0, 0.0, 0.0};
-  struct entry *sort_i, *sort_j;
-  struct entry *sortdt_i = NULL, *sortdt_j = NULL;
-  int countdt_i = 0, countdt_j = 0;
-  struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
-  double pix[3], pjx[3], di, dj;
-  float dx[3], hi, hig2, hj, hjg2, r2, dx_max;
-  double hi_max, hj_max;
-  double di_max, dj_min;
-  int count_i, count_j;
   const int ti_current = e->ti_current;
+
 #ifdef VECTORIZE
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
@@ -944,38 +911,42 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   float dxq2[3 * VEC_SIZE] __attribute__((aligned(16)));
   struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
 #endif
+
   TIMER_TIC
 
   /* Anything to do here? */
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the shift ID. */
-  sid = space_getsid(e->s, &ci, &cj, shift);
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
 
   /* Have the cells been sorted? */
   if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
     error("Trying to interact unsorted cells.");
 
   /* Get the cutoff shift. */
-  for (rshift = 0.0, k = 0; k < 3; k++)
-    rshift += shift[k] * runner_shift[sid][k];
+  double rshift = 0.0;
+  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
 
   /* Pick-out the sorted lists. */
-  sort_i = &ci->sort[sid * (ci->count + 1)];
-  sort_j = &cj->sort[sid * (cj->count + 1)];
+  struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
+  struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
 
   /* Get some other useful values. */
-  hi_max = ci->h_max * kernel_gamma - rshift;
-  hj_max = cj->h_max * kernel_gamma;
-  count_i = ci->count;
-  count_j = cj->count;
-  parts_i = ci->parts;
-  parts_j = cj->parts;
-  di_max = sort_i[count_i - 1].d - rshift;
-  dj_min = sort_j[0].d;
-  dx_max = (ci->dx_max + cj->dx_max);
+  const double hi_max = ci->h_max * kernel_gamma - rshift;
+  const double hj_max = cj->h_max * kernel_gamma;
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+  const double di_max = sort_i[count_i - 1].d - rshift;
+  const double dj_min = sort_j[0].d;
+  const double dx_max = (ci->dx_max + cj->dx_max);
 
   /* Collect the number of parts left and right below dt. */
+  int countdt_i = 0, countdt_j = 0;
+  struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
   if (ci->ti_end_max <= ti_current) {
     sortdt_i = sort_i;
     countdt_i = count_i;
@@ -983,7 +954,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     if ((sortdt_i = (struct entry *)alloca(sizeof(struct entry) * count_i)) ==
         NULL)
       error("Failed to allocate dt sortlists.");
-    for (k = 0; k < count_i; k++)
+    for (int k = 0; k < count_i; k++)
       if (parts_i[sort_i[k].i].ti_end <= ti_current) {
         sortdt_i[countdt_i] = sort_i[k];
         countdt_i += 1;
@@ -996,7 +967,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     if ((sortdt_j = (struct entry *)alloca(sizeof(struct entry) * count_j)) ==
         NULL)
       error("Failed to allocate dt sortlists.");
-    for (k = 0; k < count_j; k++)
+    for (int k = 0; k < count_j; k++)
       if (parts_j[sort_j[k].i].ti_end <= ti_current) {
         sortdt_j[countdt_j] = sort_j[k];
         countdt_j += 1;
@@ -1004,31 +975,33 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   }
 
   /* Loop over the parts in ci. */
-  for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min;
-       pid--) {
+  for (int pid = count_i - 1;
+       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
 
     /* Get a hold of the ith part in ci. */
-    pi = &parts_i[sort_i[pid].i];
-    hi = pi->h;
-    di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
+    struct part *restrict pi = &parts_i[sort_i[pid].i];
+    const float hi = pi->h;
+    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
     if (di < dj_min) continue;
 
-    hig2 = hi * hi * kernel_gamma2;
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Look at valid dt parts only? */
     if (pi->ti_end > ti_current) {
 
       /* Loop over the parts in cj within dt. */
-      for (pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) {
+      for (int pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sortdt_j[pjd].i];
-        hj = pj->h;
+        struct part *restrict pj = &parts_j[sortdt_j[pjd].i];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pj->x[k] - pix[k];
           r2 += dx[k] * dx[k];
         }
@@ -1070,15 +1043,16 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     else {
 
       /* Loop over the parts in cj. */
-      for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts_j[sort_j[pjd].i];
-        hj = pj->h;
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -1144,36 +1118,34 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-  /* printf( "runner_dopair: first half took %.3f %s...\n" ,
-  clocks_from_ticks(getticks() - tic), clocks_getunit());
-  tic = getticks(); */
-
   /* Loop over the parts in cj. */
-  for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
+  for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
        pjd++) {
 
     /* Get a hold of the jth part in cj. */
-    pj = &parts_j[sort_j[pjd].i];
-    hj = pj->h;
-    dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
+    struct part *restrict pj = &parts_j[sort_j[pjd].i];
+    const float hj = pj->h;
+    const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
     if (dj > di_max) continue;
 
-    for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
-    hjg2 = hj * hj * kernel_gamma2;
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
 
     /* Is this particle outside the dt? */
     if (pj->ti_end > ti_current) {
 
       /* Loop over the parts in ci. */
-      for (pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) {
+      for (int pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) {
 
         /* Get a pointer to the jth particle. */
-        pi = &parts_i[sortdt_i[pid].i];
-        hi = pi->h;
+        struct part *restrict pi = &parts_i[sortdt_i[pid].i];
+        const float hi = pi->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pi->x[k] - pjx[k];
           r2 += dx[k] * dx[k];
         }
@@ -1214,15 +1186,16 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
     else {
 
       /* Loop over the parts in ci. */
-      for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
+      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
 
         /* Get a pointer to the jth particle. */
-        pi = &parts_i[sort_i[pid].i];
-        hi = pi->h;
+        struct part *restrict pi = &parts_i[sort_i[pid].i];
+        const float hi = pi->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pjx[k] - pi->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -1291,10 +1264,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount1 > 0)
-    for (k = 0; k < icount1; k++)
+    for (int k = 0; k < icount1; k++)
       IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
   if (icount2 > 0)
-    for (k = 0; k < icount2; k++)
+    for (int k = 0; k < icount2; k++)
       IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
 #endif
 
@@ -1302,20 +1275,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 }
 
 /**
- * @brief Compute the cell self-interaction.
+ * @brief Compute the cell self-interaction (non-symmetric).
  *
  * @param r The #runner.
  * @param c The #cell.
  */
-
 void DOSELF1(struct runner *r, struct cell *restrict c) {
 
-  int k, pid, pjd, count = c->count;
-  double pix[3];
-  float dx[3], hi, hj, hig2, r2;
-  struct part *restrict parts = c->parts, *restrict pi, *restrict pj;
   const int ti_current = r->e->ti_current;
-  int firstdt = 0, countdt = 0, *indt = NULL, doj;
+
 #ifdef VECTORIZE
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1332,43 +1300,49 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 #endif
   TIMER_TIC
 
-  /* Set up indt if needed. */
-  if (c->ti_end_min > ti_current)
-    return;
-  else if (c->ti_end_max > ti_current) {
-    if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
-      error("Failed to allocate indt.");
-    for (k = 0; k < count; k++)
-      if (parts[k].ti_end <= ti_current) {
-        indt[countdt] = k;
-        countdt += 1;
-      }
-  }
+  if (c->ti_end_min > ti_current) return;
+  if (c->ti_end_max < ti_current) error("Cell in an impossible time-zone");
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+
+  /* Set up indt. */
+  int *indt = NULL;
+  int countdt = 0, firstdt = 0;
+  if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
+    error("Failed to allocate indt.");
+  for (int k = 0; k < count; k++)
+    if (parts[k].ti_end <= ti_current) {
+      indt[countdt] = k;
+      countdt += 1;
+    }
 
   /* Loop over the particles in the cell. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a pointer to the ith particle. */
-    pi = &parts[pid];
+    struct part *restrict pi = &parts[pid];
 
     /* Get the particle position and radius. */
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Is the ith particle inactive? */
     if (pi->ti_end > ti_current) {
 
       /* Loop over the other particles .*/
-      for (pjd = firstdt; pjd < countdt; pjd++) {
+      for (int pjd = firstdt; pjd < countdt; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[indt[pjd]];
-        hj = pj->h;
+        struct part *restrict pj = &parts[indt[pjd]];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pj->x[k] - pix[k];
           r2 += dx[k] * dx[k];
         }
@@ -1413,19 +1387,21 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
       firstdt += 1;
 
       /* Loop over the other particles .*/
-      for (pjd = pid + 1; pjd < count; pjd++) {
+      for (int pjd = pid + 1; pjd < count; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[pjd];
-        hj = pj->h;
+        struct part *restrict pj = &parts[pjd];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
-        doj = (pj->ti_end <= ti_current) && (r2 < hj * hj * kernel_gamma2);
+        const int doj =
+            (pj->ti_end <= ti_current) && (r2 < hj * hj * kernel_gamma2);
 
         /* Hit or miss? */
         if (r2 < hig2 || doj) {
@@ -1516,24 +1492,26 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount1 > 0)
-    for (k = 0; k < icount1; k++)
+    for (int k = 0; k < icount1; k++)
       IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
   if (icount2 > 0)
-    for (k = 0; k < icount2; k++)
+    for (int k = 0; k < icount2; k++)
       IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
 #endif
 
   TIMER_TOC(TIMER_DOSELF);
 }
 
+/**
+ * @brief Compute the cell self-interaction (symmetric).
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
 void DOSELF2(struct runner *r, struct cell *restrict c) {
 
-  int k, pid, pjd, count = c->count;
-  double pix[3];
-  float dx[3], hi, hj, hig2, r2;
-  struct part *restrict parts = c->parts, *restrict pi, *restrict pj;
   const int ti_current = r->e->ti_current;
-  int firstdt = 0, countdt = 0, *indt = NULL;
+
 #ifdef VECTORIZE
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1550,43 +1528,49 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 #endif
   TIMER_TIC
 
-  /* Set up indt if needed. */
-  if (c->ti_end_min > ti_current)
-    return;
-  else if (c->ti_end_max > ti_current) {
-    if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
-      error("Failed to allocate indt.");
-    for (k = 0; k < count; k++)
-      if (parts[k].ti_end <= ti_current) {
-        indt[countdt] = k;
-        countdt += 1;
-      }
-  }
+  if (c->ti_end_min > ti_current) return;
+  if (c->ti_end_max < ti_current) error("Cell in an impossible time-zone");
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+
+  /* Set up indt. */
+  int *indt = NULL;
+  int countdt = 0, firstdt = 0;
+  if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
+    error("Failed to allocate indt.");
+  for (int k = 0; k < count; k++)
+    if (parts[k].ti_end <= ti_current) {
+      indt[countdt] = k;
+      countdt += 1;
+    }
 
   /* Loop over the particles in the cell. */
-  for (pid = 0; pid < count; pid++) {
+  for (int pid = 0; pid < count; pid++) {
 
     /* Get a pointer to the ith particle. */
-    pi = &parts[pid];
+    struct part *restrict pi = &parts[pid];
 
     /* Get the particle position and radius. */
-    for (k = 0; k < 3; k++) pix[k] = pi->x[k];
-    hi = pi->h;
-    hig2 = hi * hi * kernel_gamma2;
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
 
     /* Is the ith particle not active? */
     if (pi->ti_end > ti_current) {
 
       /* Loop over the other particles .*/
-      for (pjd = firstdt; pjd < countdt; pjd++) {
+      for (int pjd = firstdt; pjd < countdt; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[indt[pjd]];
-        hj = pj->h;
+        struct part *restrict pj = &parts[indt[pjd]];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pj->x[k] - pix[k];
           r2 += dx[k] * dx[k];
         }
@@ -1631,15 +1615,16 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
       firstdt += 1;
 
       /* Loop over the other particles .*/
-      for (pjd = pid + 1; pjd < count; pjd++) {
+      for (int pjd = pid + 1; pjd < count; pjd++) {
 
         /* Get a pointer to the jth particle. */
-        pj = &parts[pjd];
-        hj = pj->h;
+        struct part *restrict pj = &parts[pjd];
+        const float hj = pj->h;
 
         /* Compute the pairwise distance. */
-        r2 = 0.0f;
-        for (k = 0; k < 3; k++) {
+        float r2 = 0.0f;
+        float dx[3];
+        for (int k = 0; k < 3; k++) {
           dx[k] = pix[k] - pj->x[k];
           r2 += dx[k] * dx[k];
         }
@@ -1708,10 +1693,10 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 #ifdef VECTORIZE
   /* Pick up any leftovers. */
   if (icount1 > 0)
-    for (k = 0; k < icount1; k++)
+    for (int k = 0; k < icount1; k++)
       IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
   if (icount2 > 0)
-    for (k = 0; k < icount2; k++)
+    for (int k = 0; k < icount2; k++)
       IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
 #endif
 
@@ -2290,17 +2275,14 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) {
 void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
                   int *ind, int count, struct cell *cj, int sid, int gettimer) {
 
-  int j, k;
-  double shift[3];
-  float h;
   struct space *s = r->e->s;
-  struct cell *sub = NULL;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC
 
   /* Find out in which sub-cell of ci the parts are. */
-  for (k = 0; k < 8; k++)
+  struct cell *sub = NULL;
+  for (int k = 0; k < 8; k++)
     if (ci->progeny[k] != NULL) {
       // if ( parts[ ind[ 0 ] ].x[0] >= ci->progeny[k]->loc[0] &&
       //      parts[ ind[ 0 ] ].x[0] <= ci->progeny[k]->loc[0] +
@@ -2326,7 +2308,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
 
       /* Loop over all progeny. */
       DOSUB_SUBSET(r, sub, parts, ind, count, NULL, -1, 0);
-      for (j = 0; j < 8; j++)
+      for (int j = 0; j < 8; j++)
         if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
           DOSUB_SUBSET(r, sub, parts, ind, count, ci->progeny[j], -1, 0);
 
@@ -2342,7 +2324,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
   else {
 
     /* Get the cell dimensions. */
-    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
+    const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
 
     /* Recurse? */
     if (ci->split && cj->split &&
@@ -2350,6 +2332,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
             h / 2) {
 
       /* Get the type of pair if not specified explicitly. */
+      double shift[3] = {0.0, 0.0, 0.0};
       sid = space_getsid(s, &ci, &cj, shift);
 
       /* Different types of flags. */
@@ -2858,7 +2841,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
     else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
 
       /* Get the relative distance between the pairs, wrapping. */
-      for (k = 0; k < 3; k++) {
+      double shift[3] = {0.0, 0.0, 0.0};
+      for (int k = 0; k < 3; k++) {
         if (cj->loc[k] - ci->loc[k] < -s->dim[k] / 2)
           shift[k] = s->dim[k];
         else if (cj->loc[k] - ci->loc[k] > s->dim[k] / 2)
@@ -2866,7 +2850,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
       }
 
       /* Get the sorting index. */
-      for (sid = 0, k = 0; k < 3; k++)
+      int sid = 0;
+      for (int k = 0; k < 3; k++)
         sid =
             3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                            ? 0
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 13e6fe68ca223fa95a307fd063208e1f5d5efa6c..e3788dfa1123584c913bca6baa6fc2db6698e6d0 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -19,11 +19,6 @@
 #ifndef SWIFT_RUNNER_DOIACT_GRAV_H
 #define SWIFT_RUNNER_DOIACT_GRAV_H
 
-/* Includes. */
-#include "cell.h"
-#include "clocks.h"
-#include "part.h"
-
 /**
  * @brief Compute the sorted gravity interactions between a cell pair.
  *
diff --git a/src/scheduler.c b/src/scheduler.c
index 496df93adf5a656460f7b39904a3cc58ac3e5caa..8f833dad5085c675f129cb1061fa354f236ec9bc 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -44,6 +44,9 @@
 #include "error.h"
 #include "intrinsics.h"
 #include "kernel_hydro.h"
+#include "queue.h"
+#include "space.h"
+#include "task.h"
 #include "timers.h"
 
 /**
diff --git a/src/scheduler.h b/src/scheduler.h
index a867f7bc36cc8865d28d751565663641d92aa7fb..62af23152e7e2d2bc68e0cc4d7122f4dd0483aa1 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -35,7 +35,6 @@
 #include "cell.h"
 #include "lock.h"
 #include "queue.h"
-#include "space.h"
 #include "task.h"
 
 /* Some constants. */
diff --git a/src/serial_io.c b/src/serial_io.c
index 5abd3ebc28672d68c4135efe5753dc4713c2d3c6..7e78276dc83430655b4ea4de2fb7425e71e07966 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -26,22 +26,22 @@
 /* Some standard headers. */
 #include <hdf5.h>
 #include <math.h>
+#include <mpi.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-/* MPI headers. */
-#ifdef WITH_MPI
-#include <mpi.h>
-#endif
-
 /* This object's header. */
 #include "serial_io.h"
 
 /* Local includes. */
 #include "common_io.h"
+#include "engine.h"
 #include "error.h"
+#include "kernel_hydro.h"
+#include "part.h"
+#include "units.h"
 
 /*-----------------------------------------------------------------------------
  * Routines reading an IC file
diff --git a/src/serial_io.h b/src/serial_io.h
index b7ed6eb62d823829a473f828696c291e552effa3..6b64624772214494a43316d3a8aa3910c7238dc8 100644
--- a/src/serial_io.h
+++ b/src/serial_io.h
@@ -19,6 +19,9 @@
 #ifndef SWIFT_SERIAL_IO_H
 #define SWIFT_SERIAL_IO_H
 
+/* Config parameters. */
+#include "../config.h"
+
 /* MPI headers. */
 #ifdef WITH_MPI
 #include <mpi.h>
diff --git a/src/single_io.c b/src/single_io.c
index fb3bf4368feed9892b098228d85c99f0bc3e724b..3f65aae0b5d495670f2b4862e466ec849f997d63 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -36,8 +36,11 @@
 
 /* Local includes. */
 #include "common_io.h"
-#include "const.h"
+#include "engine.h"
 #include "error.h"
+#include "kernel_hydro.h"
+#include "part.h"
+#include "units.h"
 
 /*-----------------------------------------------------------------------------
  * Routines reading an IC file
diff --git a/src/single_io.h b/src/single_io.h
index adfc5b43941b2c0d69d9ce0924164aff56864a23..d2c87655e1c91b92e8ccf2aa50d2e0bf98f13482 100644
--- a/src/single_io.h
+++ b/src/single_io.h
@@ -19,6 +19,9 @@
 #ifndef SWIFT_SINGLE_IO_H
 #define SWIFT_SINGLE_IO_H
 
+/* Config parameters. */
+#include "../config.h"
+
 /* Includes. */
 #include "engine.h"
 #include "part.h"
diff --git a/src/space.h b/src/space.h
index d53c0f2a5784ef25654309741b4455e3dbcc3e0c..5cfb2cb8368ed60586aad62e0f753aab17bf55d1 100644
--- a/src/space.h
+++ b/src/space.h
@@ -23,16 +23,18 @@
 #ifndef SWIFT_SPACE_H
 #define SWIFT_SPACE_H
 
-/* Includes. */
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
 #include <stddef.h>
 
-/* Local includes. */
+/* Includes. */
 #include "cell.h"
+#include "lock.h"
 #include "parser.h"
 #include "part.h"
-
-/* Forward-declare the engine to avoid cyclic includes. */
-struct engine;
+#include "space.h"
 
 /* Some constants. */
 #define space_maxdepth 10
@@ -158,4 +160,5 @@ void space_do_split(struct space *s, struct cell *c);
 void space_do_parts_sort();
 void space_do_gparts_sort();
 void space_link_cleanup(struct space *s);
+
 #endif /* SWIFT_SPACE_H */
diff --git a/src/tools.c b/src/tools.c
index 0363100331ad5b298279e9ebadcf87eab5f9e896..1a2b794f688047183827e5c2ed6ba80ba1339080 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -19,16 +19,25 @@
  *
  ******************************************************************************/
 
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
 #include <math.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 
+/* This object's header. */
+#include "tools.h"
+
+/* Local includes. */
 #include "cell.h"
 #include "error.h"
+#include "gravity.h"
+#include "hydro.h"
 #include "part.h"
-#include "swift.h"
-#include "tools.h"
+#include "runner.h"
 
 /**
  *  Factorize a given integer, attempts to keep larger pair of factors.
@@ -56,9 +65,7 @@ void factor(int value, int *f1, int *f2) {
  * @param N The number of parts.
  * @param periodic Periodic boundary conditions flag.
  */
-
-void pairs_n2(double *dim, struct part *__restrict__ parts, int N,
-              int periodic) {
+void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic) {
   int i, j, k, count = 0;
   // int mj, mk;
   // double maxratio = 1.0;
@@ -121,8 +128,7 @@ void pairs_n2(double *dim, struct part *__restrict__ parts, int N,
 }
 
 void pairs_single_density(double *dim, long long int pid,
-                          struct part *__restrict__ parts, int N,
-                          int periodic) {
+                          struct part *restrict parts, int N, int periodic) {
   int i, k;
   // int mj, mk;
   // double maxratio = 1.0;
@@ -271,7 +277,7 @@ void self_all_density(struct runner *r, struct cell *ci) {
 }
 
 void pairs_single_grav(double *dim, long long int pid,
-                       struct gpart *__restrict__ parts, int N, int periodic) {
+                       struct gpart *restrict parts, int N, int periodic) {
   int i, k;
   // int mj, mk;
   // double maxratio = 1.0;
@@ -328,7 +334,6 @@ void pairs_single_grav(double *dim, long long int pid,
  *
  * @param N number of intervals in [0,1].
  */
-
 void density_dump(int N) {
   int k;
   float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4];
@@ -363,10 +368,8 @@ void density_dump(int N) {
 /**
  * @brief Compute the force on a single particle brute-force.
  */
-
 void engine_single_density(double *dim, long long int pid,
-                           struct part *__restrict__ parts, int N,
-                           int periodic) {
+                           struct part *restrict parts, int N, int periodic) {
   int i, k;
   double r2, dx[3];
   float fdx[3], ih;
@@ -412,7 +415,7 @@ void engine_single_density(double *dim, long long int pid,
 }
 
 void engine_single_force(double *dim, long long int pid,
-                         struct part *__restrict__ parts, int N, int periodic) {
+                         struct part *restrict parts, int N, int periodic) {
   int i, k;
   double r2, dx[3];
   float fdx[3];
diff --git a/src/tools.h b/src/tools.h
index 5f9f41d033ab03983bde3bb37e87f8a39d2deecd..2a4462f274d8e9acd1f2d8e79996ad92c630d404 100644
--- a/src/tools.h
+++ b/src/tools.h
@@ -22,21 +22,25 @@
 #ifndef SWIFT_TOOL_H
 #define SWIFT_TOOL_H
 
+/* Config parameters. */
+#include "../config.h"
+
+/* Includes. */
 #include "cell.h"
+#include "part.h"
 #include "runner.h"
 
 void factor(int value, int *f1, int *f2);
 void density_dump(int N);
 void pairs_single_grav(double *dim, long long int pid,
-                       struct gpart *__restrict__ parts, int N, int periodic);
+                       struct gpart *restrict parts, int N, int periodic);
 void pairs_single_density(double *dim, long long int pid,
-                          struct part *__restrict__ parts, int N, int periodic);
+                          struct part *restrict parts, int N, int periodic);
 
 void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj);
 void self_all_density(struct runner *r, struct cell *ci);
 
-void pairs_n2(double *dim, struct part *__restrict__ parts, int N,
-              int periodic);
+void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic);
 
 double random_uniform(double a, double b);
 void shuffle_particles(struct part *parts, const int count);