diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index e070bdb565d6d8305b7974137292910dd720afe2..f2d0aa95d1f35f30476e1989349a07be8d9e5b0a 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -55,8 +55,8 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift",
-             "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
+             "drift", "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
              "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort",
              "split_cell", "rewait", "count"]
 
@@ -64,7 +64,8 @@ TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
                "self": "greenyellow",
                "pair": "navy",
-               "sub": "hotpink",
+               "sub_self": "greenyellow",
+               "sub_pair": "navy",
                "init": "indigo",
                "ghost": "cyan",
                "drift": "maroon",
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index 44705a0164034f8a22f1f3aa860a64d3e7656d2e..9a92faf9417c9a302831eb8cb2f4471eb672d59c 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -61,8 +61,8 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift",
-             "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
+             "drift", "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
              "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort",
              "split_cell", "rewait", "count"]
 
@@ -70,7 +70,8 @@ TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
                "self": "greenyellow",
                "pair": "navy",
-               "sub": "hotpink",
+               "sub_self": "greenyellow",
+               "sub_pair": "navy",
                "init": "indigo",
                "ghost": "cyan",
                "drift": "maroon",
diff --git a/src/Makefile.am b/src/Makefile.am
index 295f9efa5a79be493c0f27bdf21fad03817343be..a1ea5b0c875ee3ca813e6c6a55c8cbe92bbf001b 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -20,7 +20,7 @@
 AM_CFLAGS = -DTIMER $(HDF5_CPPFLAGS)
 
 # Assign a "safe" version number
-AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 # -fsanitize=address
+AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0
 
 # The git command, if available.
 GIT_CMD = @GIT_CMD@
@@ -70,7 +70,7 @@ libswiftsim_la_SOURCES = $(AM_SOURCES)
 # Sources and flags for MPI library
 libswiftsim_mpi_la_SOURCES = $(AM_SOURCES)
 libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS)
-libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) $(METIS_LIBS)
+libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) -DWITH_MPI $(METIS_LIBS)
 libswiftsim_mpi_la_SHORTNAME = mpi
 
 
diff --git a/src/engine.c b/src/engine.c
index f0878af71bac0360fa4d3fe8c8dacc6883165db7..c18f9012de44571b5a9f2177b920bb5a720b8e07 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1246,38 +1246,25 @@ void engine_count_and_link_tasks(struct engine *e) {
         t->cj->density = engine_addlink(e, t->cj->density, t);
         atomic_inc(&t->cj->nr_density);
       }
-    } else if (t->type == task_type_sub) {
+    } else if (t->type == task_type_sub_self) {
       atomic_inc(&t->ci->nr_tasks);
-      if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
         t->ci->density = engine_addlink(e, t->ci->density, t);
         atomic_inc(&t->ci->nr_density);
-        if (t->cj != NULL) {
-          t->cj->density = engine_addlink(e, t->cj->density, t);
-          atomic_inc(&t->cj->nr_density);
-        }
+      }
+    } else if (t->type == task_type_sub_pair) {
+      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&t->cj->nr_tasks);
+      if (t->subtype == task_subtype_density) {
+        t->ci->density = engine_addlink(e, t->ci->density, t);
+        atomic_inc(&t->ci->nr_density);
+        t->cj->density = engine_addlink(e, t->cj->density, t);
+        atomic_inc(&t->cj->nr_density);
       }
     }
   }
 }
 
-/**
- * @brief Creates the dependency network for the gravity tasks of a given cell.
- *
- * @param sched The #scheduler.
- * @param gravity The gravity task to link.
- * @param c The cell.
- */
-static inline void engine_make_gravity_dependencies(struct scheduler *sched,
-                                                    struct task *gravity,
-                                                    struct cell *c) {
-
-  /* init --> gravity --> kick */
-  /* grav_up --> gravity ( --> kick) */
-  scheduler_addunlock(sched, c->super->init, gravity);
-  scheduler_addunlock(sched, c->super->grav_up, gravity);
-  scheduler_addunlock(sched, gravity, c->super->kick);
-}
 
 /**
  * @brief Creates all the task dependencies for the gravity
@@ -1445,29 +1432,47 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       }
     }
 
-    /* Otherwise, sub interaction? */
-    else if (t->type == task_type_sub && t->subtype == task_subtype_density) {
+    /* Otherwise, sub-self interaction? */
+    else if (t->type == task_type_sub_self &&
+             t->subtype == task_subtype_density) {
 
       /* Start by constructing the task for the second hydro loop */
       struct task *t2 =
-          scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags,
-                            0, t->ci, t->cj, 0);
+          scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
+                            t->flags, 0, t->ci, t->cj, 0);
 
-      /* Add the link between the new loop and both cells */
+      /* Add the link between the new loop and the cell */
       t->ci->force = engine_addlink(e, t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
-      if (t->cj != NULL) {
-        t->cj->force = engine_addlink(e, t->cj->force, t2);
-        atomic_inc(&t->cj->nr_force);
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
       }
+    }
+
+    /* Otherwise, sub-pair interaction? */
+    else if (t->type == task_type_sub_pair &&
+             t->subtype == task_subtype_density) {
+
+      /* Start by constructing the task for the second hydro loop */
+      struct task *t2 =
+          scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
+                            t->flags, 0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
+      t->cj->force = engine_addlink(e, t->cj->force, t2);
+      atomic_inc(&t->cj->nr_force);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
       }
-      if (t->cj != NULL && t->cj->nodeID == nodeID &&
-          t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
       }
     }
@@ -1646,8 +1651,7 @@ int engine_marktasks(struct engine *e) {
       struct task *t = &tasks[ind[k]];
 
       /* Pair? */
-      if (t->type == task_type_pair ||
-          (t->type == task_type_sub && t->cj != NULL)) {
+      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
         /* Local pointers. */
         const struct cell *ci = t->ci;
@@ -1691,15 +1695,14 @@ int engine_marktasks(struct engine *e) {
 
       /* Single-cell task? */
       else if (t->type == task_type_self || t->type == task_type_ghost ||
-               (t->type == task_type_sub && t->cj == NULL)) {
+               t->type == task_type_sub_self) {
 
         /* Set this task's skip. */
         t->skip = (t->ci->ti_end_min > ti_end);
       }
 
       /* Pair? */
-      else if (t->type == task_type_pair ||
-               (t->type == task_type_sub && t->cj != NULL)) {
+      else if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
         /* Local pointers. */
         const struct cell *ci = t->ci;
@@ -2213,7 +2216,8 @@ void engine_init_particles(struct engine *e) {
 
     mask |= 1 << task_type_self;
     mask |= 1 << task_type_pair;
-    mask |= 1 << task_type_sub;
+    mask |= 1 << task_type_sub_self;
+    mask |= 1 << task_type_sub_pair;
     mask |= 1 << task_type_ghost;
 
     submask |= 1 << task_subtype_density;
@@ -2228,7 +2232,8 @@ void engine_init_particles(struct engine *e) {
     mask |= 1 << task_type_grav_fft;
     mask |= 1 << task_type_self;
     mask |= 1 << task_type_pair;
-    mask |= 1 << task_type_sub;
+    mask |= 1 << task_type_sub_self;
+    mask |= 1 << task_type_sub_pair;
 
     submask |= 1 << task_subtype_grav;
   }
@@ -2351,7 +2356,8 @@ void engine_step(struct engine *e) {
 
     mask |= 1 << task_type_self;
     mask |= 1 << task_type_pair;
-    mask |= 1 << task_type_sub;
+    mask |= 1 << task_type_sub_self;
+    mask |= 1 << task_type_sub_pair;
     mask |= 1 << task_type_ghost;
 
     submask |= 1 << task_subtype_density;
@@ -2367,7 +2373,8 @@ void engine_step(struct engine *e) {
     mask |= 1 << task_type_grav_fft;
     mask |= 1 << task_type_self;
     mask |= 1 << task_type_pair;
-    mask |= 1 << task_type_sub;
+    mask |= 1 << task_type_sub_self;
+    mask |= 1 << task_type_sub_pair;
 
     submask |= 1 << task_subtype_grav;
   }
diff --git a/src/lock.h b/src/lock.h
index 735058e15f09d31396bc97d6906c5853fed17db9..5eb97de15f81d17bad45e85226cb0881d24482ba 100644
--- a/src/lock.h
+++ b/src/lock.h
@@ -23,6 +23,7 @@
 #include <pthread.h>
 
 /* Includes. */
+#include "atomic.h"
 #include "inline.h"
 
 #ifdef PTHREAD_SPINLOCK
@@ -48,14 +49,14 @@
 #define lock_init(l) (*(l) = 0)
 #define lock_destroy(l) 0
 INLINE static int lock_lock(volatile int *l) {
-  while (__sync_val_compare_and_swap(l, 0, 1) != 0)
+  while (atomic_cas(l, 0, 1) != 0)
     ;
   // while( *l );
   return 0;
 }
-#define lock_trylock(l) ((*(l)) ? 1 : __sync_val_compare_and_swap(l, 0, 1))
-#define lock_unlock(l) (__sync_val_compare_and_swap(l, 1, 0) != 1)
-#define lock_unlock_blind(l) __sync_val_compare_and_swap(l, 1, 0)
+#define lock_trylock(l) ((*(l)) ? 1 : atomic_cas(l, 0, 1))
+#define lock_unlock(l) (atomic_cas(l, 1, 0) != 1)
+#define lock_unlock_blind(l) atomic_cas(l, 1, 0)
 #endif
 
 #endif /* SWIFT_LOCK_H */
diff --git a/src/map.c b/src/map.c
index cb911c36f30cfb19d82868f0333b3ff705bab8e9..f4f9ac7cfa7141606b578739517b66e951e65eab 100644
--- a/src/map.c
+++ b/src/map.c
@@ -35,7 +35,6 @@
 /**
  * @brief Mapping function to draw a specific cell (gnuplot).
  */
-
 void map_cells_plot(struct cell *c, void *data) {
 
   int depth = *(int *)data;
@@ -89,20 +88,17 @@ void map_cells_plot(struct cell *c, void *data) {
 /**
  * @brief Mapping function for checking if each part is in its box.
  */
+void map_check(struct part *p, struct cell *c, void *data) {
 
-/* void map_check ( struct part *p , struct cell *c , void *data ) {
-
-    if ( p->x[0] < c->loc[0] || p->x[0] > c->loc[0]+c->h[0] ||
-         p->x[0] < c->loc[0] || p->x[0] > c->loc[0]+c->h[0] ||
-         p->x[0] < c->loc[0] || p->x[0] > c->loc[0]+c->h[0] )
-        printf( "map_check: particle %i is outside of its box.\n" , p->id );
-
-    } */
+  if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->h[0] ||
+      p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->h[0] ||
+      p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->h[0])
+    printf("map_check: particle %lld is outside of its box.\n", p->id);
+}
 
 /**
  * @brief Mapping function for neighbour count.
  */
-
 void map_cellcheck(struct cell *c, void *data) {
 
   int *count = (int *)data;
@@ -142,7 +138,6 @@ void map_cellcheck(struct cell *c, void *data) {
 /**
  * @brief Mapping function for maxdepth cell count.
  */
-
 void map_maxdepth(struct cell *c, void *data) {
 
   int maxdepth = ((int *)data)[0];
@@ -156,7 +151,6 @@ void map_maxdepth(struct cell *c, void *data) {
 /**
  * @brief Mapping function for neighbour count.
  */
-
 void map_count(struct part *p, struct cell *c, void *data) {
 
   double *wcount = (double *)data;
@@ -165,7 +159,6 @@ void map_count(struct part *p, struct cell *c, void *data) {
 
   *wcount += p->density.wcount;
 }
-
 void map_wcount_min(struct part *p, struct cell *c, void *data) {
 
   struct part **p2 = (struct part **)data;
@@ -197,7 +190,6 @@ void map_h_max(struct part *p, struct cell *c, void *data) {
 /**
  * @brief Mapping function for neighbour count.
  */
-
 void map_icount(struct part *p, struct cell *c, void *data) {
 
   // int *count = (int *)data;
@@ -210,7 +202,6 @@ void map_icount(struct part *p, struct cell *c, void *data) {
 /**
  * @brief Mapping function to print the particle position.
  */
-
 void map_dump(struct part *p, struct cell *c, void *data) {
 
   double *shift = (double *)data;
diff --git a/src/partition.c b/src/partition.c
index b79dfd6b81d727b6e53f9e0906c7c59eee6607f1..dbec9f7ebd976a9779d4a54ee536e0e4eef331b2 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -453,9 +453,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
 
     /* Skip un-interesting tasks. */
     if (t->type != task_type_self && t->type != task_type_pair &&
-        t->type != task_type_sub && t->type != task_type_ghost &&
-        t->type != task_type_drift && t->type != task_type_kick &&
-        t->type != task_type_init)
+        t->type != task_type_sub_self && t->type != task_type_sub_self &&
+        t->type != task_type_ghost && t->type != task_type_drift &&
+        t->type != task_type_kick && t->type != task_type_init)
       continue;
 
     /* Get the task weight. */
@@ -496,7 +496,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
 
     /* Self interaction? */
     else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
-             (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) {
+             (t->type == task_type_sub_self && cj == NULL &&
+              ci->nodeID == nodeID)) {
       /* Self interactions add only to vertex weight. */
       if (taskvweights) weights_v[cid] += w;
 
@@ -504,7 +505,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
 
     /* Pair? */
     else if (t->type == task_type_pair ||
-             (t->type == task_type_sub && cj != NULL)) {
+             (t->type == task_type_sub_pair)) {
       /* In-cell pair? */
       if (ci == cj) {
         /* Add weight to vertex for ci. */
diff --git a/src/queue.c b/src/queue.c
index 57aa5701d9cc7bbf34d7f1ea54066da093fc99d1..9883d77e66421eda6093331e0c9a8f6ac0155ded 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -38,15 +38,11 @@
 #include "const.h"
 #include "error.h"
 
-/* The counters. */
-int queue_counter[queue_counter_count];
-
 /**
  * @brief Enqueue all tasks in the incoming DEQ.
  *
  * @param q The #queue, assumed to be locked.
  */
-
 void queue_get_incoming(struct queue *q) {
 
   int *tid = q->tid;
@@ -101,7 +97,6 @@ void queue_get_incoming(struct queue *q) {
  * @param q The #queue.
  * @param t The #task.
  */
-
 void queue_insert(struct queue *q, struct task *t) {
   /* Get an index in the DEQ. */
   const int ind = atomic_inc(&q->last_incoming) % queue_incoming_size;
@@ -133,7 +128,6 @@ void queue_insert(struct queue *q, struct task *t) {
  * @param q The #queue.
  * @param tasks List of tasks to which the queue indices refer to.
  */
-
 void queue_init(struct queue *q, struct task *tasks) {
 
   /* Allocate the task list if needed. */
@@ -169,7 +163,6 @@ void queue_init(struct queue *q, struct task *tasks) {
  * @param prev The previous #task extracted from this #queue.
  * @param blocking Block until access to the queue is granted.
  */
-
 struct task *queue_gettask(struct queue *q, const struct task *prev,
                            int blocking) {
 
diff --git a/src/runner.c b/src/runner.c
index bc4fab5b603b9ddfa9a71dcce88da57cbe862a6f..ecbabb83cfccdd207b850eb4430e7f23a0719f41 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -558,8 +558,13 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
 
           }
 
-          /* Otherwise, sub interaction? */
-          else if (l->t->type == task_type_sub) {
+          /* Otherwise, sub-self interaction? */
+          else if (l->t->type == task_type_sub_self)
+            runner_dosub_subset_density(r, finger, parts, pid, count, NULL, -1,
+                                        1);
+
+          /* Otherwise, sub-pair interaction? */
+          else if (l->t->type == task_type_sub_pair) {
 
             /* Left or right? */
             if (l->t->ci == finger)
@@ -1077,15 +1082,19 @@ void *runner_main(void *data) {
         case task_type_sort:
           runner_do_sort(r, ci, t->flags, 1);
           break;
-        case task_type_sub:
+        case task_type_sub_self:
           if (t->subtype == task_subtype_density)
-            runner_dosub1_density(r, ci, cj, t->flags, 1);
+            runner_dosub_self1_density(r, ci, 1);
           else if (t->subtype == task_subtype_force)
-            runner_dosub2_force(r, ci, cj, t->flags, 1);
-          else if (t->subtype == task_subtype_grav)
-            runner_dosub_grav(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_grav)
-            runner_dosub_grav(r, ci, cj, 1);
+            runner_dosub_self2_force(r, ci, 1);
+          else
+            error("Unknown task subtype.");
+          break;
+        case task_type_sub_pair:
+          if (t->subtype == task_subtype_density)
+            runner_dosub_pair1_density(r, ci, cj, t->flags, 1);
+          else if (t->subtype == task_subtype_force)
+            runner_dosub_pair2_force(r, ci, cj, t->flags, 1);
           else
             error("Unknown task subtype.");
           break;
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index e0c13e4e0adb101d9a8975651f24225551277f43..7fe19af853601e49de553a55ee205c654167d67e 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1,4 +1,3 @@
-
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
@@ -57,11 +56,17 @@
 #define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
 #define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
 
-#define _DOSUB1(f) PASTE(runner_dosub1, f)
-#define DOSUB1 _DOSUB1(FUNCTION)
+#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
+#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)
+
+#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
+#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)
+
+#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
+#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
 
-#define _DOSUB2(f) PASTE(runner_dosub2, f)
-#define DOSUB2 _DOSUB2(FUNCTION)
+#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
+#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
 
 #define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
 #define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
@@ -78,8 +83,11 @@
 #define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
 #define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
 
-#define _TIMER_DOSUB(f) PASTE(timer_dosub, f)
-#define TIMER_DOSUB _TIMER_DOSUB(FUNCTION)
+#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
+#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)
+
+#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
+#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
 
 #define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
 #define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
@@ -1711,7 +1719,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 }
 
 /**
- * @brief Compute grouped sub-cell interactions
+ * @brief Compute grouped sub-cell interactions for pairs
  *
  * @param r The #runner.
  * @param ci The first #cell.
@@ -1722,543 +1730,561 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
  * @todo Hard-code the sid on the recursive calls to avoid the
  * redundant computations to find the sid on-the-fly.
  */
+void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
+                 int gettimer) {
 
-void DOSUB1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
-            int gettimer) {
-
-  int j = 0, k;
-  double shift[3];
-  float h;
   struct space *s = r->e->s;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC
 
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current) return;
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
-    /* Recurse? */
-    if (ci->split) {
+  /* Get the cell dimensions. */
+  const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
 
-      /* Loop over all progeny. */
-      for (k = 0; k < 8; k++)
-        if (ci->progeny[k] != NULL) {
-          DOSUB1(r, ci->progeny[k], NULL, -1, 0);
-          for (j = k + 1; j < 8; j++)
-            if (ci->progeny[j] != NULL)
-              DOSUB1(r, ci->progeny[k], ci->progeny[j], -1, 0);
-        }
+  /* Get the type of pair if not specified explicitly. */
+  // if ( sid < 0 )
+  double shift[3];
+  sid = space_getsid(s, &ci, &cj, shift);
 
-    }
+  /* Recurse? */
+  if (ci->split && cj->split &&
+      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+          h / 2) {
 
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF1(r, ci);
+    /* Different types of flags. */
+    switch (sid) {
 
-  } /* self-interaction. */
+      /* Regular sub-cell interactions of a single cell. */
+      case 0: /* (  1 ,  1 ,  1 ) */
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        break;
 
-  /* Otherwise, it's a pair interaction. */
-  else {
+      case 1: /* (  1 ,  1 ,  0 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        break;
 
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
+      case 2: /* (  1 ,  1 , -1 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        break;
 
-    /* Get the cell dimensions. */
-    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
+      case 3: /* (  1 ,  0 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        break;
 
-    /* Get the type of pair if not specified explicitly. */
-    // if ( sid < 0 )
-    sid = space_getsid(s, &ci, &cj, shift);
+      case 4: /* (  1 ,  0 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[0], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[1], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[2], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[3], -1, 0);
+        break;
 
-    /* Recurse? */
-    if (ci->split && cj->split &&
-        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
-            h / 2) {
+      case 5: /* (  1 ,  0 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        break;
 
-      /* Different types of flags. */
-      switch (sid) {
+      case 6: /* (  1 , -1 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        break;
 
-        /* Regular sub-cell interactions of a single cell. */
-        case 0: /* (  1 ,  1 ,  1 ) */
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          break;
+      case 7: /* (  1 , -1 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        break;
 
-        case 1: /* (  1 ,  1 ,  0 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          break;
+      case 8: /* (  1 , -1 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        break;
 
-        case 2: /* (  1 ,  1 , -1 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          break;
+      case 9: /* (  0 ,  1 ,  1 ) */
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        break;
 
-        case 3: /* (  1 ,  0 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          break;
+      case 10: /* (  0 ,  1 ,  0 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[0], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[4], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[1], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[4], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[5], -1, 0);
+        break;
 
-        case 4: /* (  1 ,  0 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[0], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[1], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[2], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[3], -1, 0);
-          break;
+      case 11: /* (  0 ,  1 , -1 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        break;
 
-        case 5: /* (  1 ,  0 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          break;
+      case 12: /* (  0 ,  0 ,  1 ) */
+        if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[0], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[2], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[4], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[6], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[2], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[6], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[4], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[6], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[6], -1, 0);
+        break;
+    }
 
-        case 6: /* (  1 , -1 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          break;
+  }
 
-        case 7: /* (  1 , -1 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          break;
+  /* Otherwise, compute the pair directly. */
+  else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
 
-        case 8: /* (  1 , -1 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          break;
+    /* Do any of the cells need to be sorted first? */
+    if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
+    if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
 
-        case 9: /* (  0 ,  1 ,  1 ) */
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          break;
+    /* Compute the interactions. */
+    DOPAIR1(r, ci, cj);
+  }
 
-        case 10: /* (  0 ,  1 ,  0 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[0], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[4], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[1], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[4], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[5], -1, 0);
-          break;
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
+}
 
-        case 11: /* (  0 ,  1 , -1 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          break;
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
 
-        case 12: /* (  0 ,  0 ,  1 ) */
-          if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[0], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[2], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[4], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[6], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[2], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[6], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[4], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[6], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[6], -1, 0);
-          break;
-      }
+  const int ti_current = r->e->ti_current;
 
-    }
+  TIMER_TIC
 
-    /* Otherwise, compute the pair directly. */
-    else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current) return;
 
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
-      if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
+  /* Recurse? */
+  if (ci->split) {
 
-      /* Compute the interactions. */
-      DOPAIR1(r, ci, cj);
-    }
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF1(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR1(r, ci->progeny[k], ci->progeny[j], -1, 0);
+      }
+  }
 
-  } /* otherwise, pair interaction. */
+  /* Otherwise, compute self-interaction. */
+  else
+    DOSELF1(r, ci);
 
-  if (gettimer) TIMER_TOC(TIMER_DOSUB);
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
 }
 
-void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
-            int gettimer) {
+/**
+ * @brief Compute grouped sub-cell interactions for pairs (symmetric case)
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param sid The direction linking the cells
+ * @param gettimer Do we have a timer ?
+ *
+ * @todo Hard-code the sid on the recursive calls to avoid the
+ * redundant computations to find the sid on-the-fly.
+ */
+void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
+                 int gettimer) {
 
-  int j, k;
-  double shift[3];
-  float h;
   struct space *s = r->e->s;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC
 
-  /* Is this a single cell? */
-  if (cj == NULL) {
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current) return;
+  /* Get the cell dimensions. */
+  const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
 
-    /* Recurse? */
-    if (ci->split) {
+  /* Get the type of pair if not specified explicitly. */
+  // if ( sid < 0 )
+  double shift[3];
+  sid = space_getsid(s, &ci, &cj, shift);
 
-      /* Loop over all progeny. */
-      for (k = 0; k < 8; k++)
-        if (ci->progeny[k] != NULL) {
-          DOSUB2(r, ci->progeny[k], NULL, -1, 0);
-          for (j = k + 1; j < 8; j++)
-            if (ci->progeny[j] != NULL)
-              DOSUB2(r, ci->progeny[k], ci->progeny[j], -1, 0);
-        }
+  /* Recurse? */
+  if (ci->split && cj->split &&
+      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+          h / 2) {
 
-    }
+    /* Different types of flags. */
+    switch (sid) {
 
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF2(r, ci);
+      /* Regular sub-cell interactions of a single cell. */
+      case 0: /* (  1 ,  1 ,  1 ) */
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        break;
 
-  } /* self-interaction. */
+      case 1: /* (  1 ,  1 ,  0 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        break;
 
-  /* Otherwise, it's a pair interaction. */
-  else {
+      case 2: /* (  1 ,  1 , -1 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        break;
 
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
+      case 3: /* (  1 ,  0 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        break;
 
-    /* Get the cell dimensions. */
-    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
+      case 4: /* (  1 ,  0 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[0], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[1], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[2], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[3], -1, 0);
+        break;
 
-    /* Get the type of pair if not specified explicitly. */
-    // if ( sid < 0 )
-    sid = space_getsid(s, &ci, &cj, shift);
+      case 5: /* (  1 ,  0 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        break;
 
-    /* Recurse? */
-    if (ci->split && cj->split &&
-        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
-            h / 2) {
+      case 6: /* (  1 , -1 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        break;
 
-      /* Different types of flags. */
-      switch (sid) {
+      case 7: /* (  1 , -1 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        break;
 
-        /* Regular sub-cell interactions of a single cell. */
-        case 0: /* (  1 ,  1 ,  1 ) */
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          break;
+      case 8: /* (  1 , -1 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        break;
 
-        case 1: /* (  1 ,  1 ,  0 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          break;
+      case 9: /* (  0 ,  1 ,  1 ) */
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        break;
 
-        case 2: /* (  1 ,  1 , -1 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          break;
+      case 10: /* (  0 ,  1 ,  0 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[0], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[4], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[1], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[4], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[5], -1, 0);
+        break;
 
-        case 3: /* (  1 ,  0 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          break;
+      case 11: /* (  0 ,  1 , -1 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        break;
 
-        case 4: /* (  1 ,  0 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[0], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[1], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[2], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[3], -1, 0);
-          break;
+      case 12: /* (  0 ,  0 ,  1 ) */
+        if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[0], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[2], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[4], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[6], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[2], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[6], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[4], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[6], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[6], -1, 0);
+        break;
+    }
 
-        case 5: /* (  1 ,  0 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          break;
+  }
 
-        case 6: /* (  1 , -1 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          break;
+  /* Otherwise, compute the pair directly. */
+  else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
 
-        case 7: /* (  1 , -1 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          break;
+    /* Do any of the cells need to be sorted first? */
+    if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
+    if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
 
-        case 8: /* (  1 , -1 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          break;
+    /* Compute the interactions. */
+    DOPAIR2(r, ci, cj);
+  }
 
-        case 9: /* (  0 ,  1 ,  1 ) */
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          break;
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
+}
 
-        case 10: /* (  0 ,  1 ,  0 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[0], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[4], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[1], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[4], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[5], -1, 0);
-          break;
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks (symmetric case)
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) {
 
-        case 11: /* (  0 ,  1 , -1 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          break;
+  const int ti_current = r->e->ti_current;
 
-        case 12: /* (  0 ,  0 ,  1 ) */
-          if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[0], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[2], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[4], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[6], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[2], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[6], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[4], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[6], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[6], -1, 0);
-          break;
-      }
+  TIMER_TIC
 
-    }
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current) return;
 
-    /* Otherwise, compute the pair directly. */
-    else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
+  /* Recurse? */
+  if (ci->split) {
 
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
-      if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF2(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR2(r, ci->progeny[k], ci->progeny[j], -1, 0);
+      }
 
-      /* Compute the interactions. */
-      DOPAIR2(r, ci, cj);
-    }
+  }
 
-  } /* otherwise, pair interaction. */
+  /* Otherwise, compute self-interaction. */
+  else
+    DOSELF2(r, ci);
 
-  if (gettimer) TIMER_TOC(TIMER_DOSUB);
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
 }
 
 void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
@@ -2856,5 +2882,5 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
 
   } /* otherwise, pair interaction. */
 
-  if (gettimer) TIMER_TOC(TIMER_DOSUB);
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
 }
diff --git a/src/scheduler.c b/src/scheduler.c
index 3549e107cb63a01b561dc5c5fa26129b24c4b348..037c098da21fbcc597e9e5f38674e1e397d2ee4e 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -174,7 +174,7 @@ void scheduler_splittasks(struct scheduler *s) {
                                 ci->gcount * ci->gcount < space_subsize)) {
 
           /* convert to a self-subtask. */
-          t->type = task_type_sub;
+          t->type = task_type_sub_self;
 
         }
 
@@ -235,7 +235,7 @@ void scheduler_splittasks(struct scheduler *s) {
             sid != 0 && sid != 2 && sid != 6 && sid != 8) {
 
           /* Make this task a sub task. */
-          t->type = task_type_sub;
+          t->type = task_type_sub_pair;
 
         }
 
@@ -784,23 +784,23 @@ void scheduler_reweight(struct scheduler *s) {
             t->weight +=
                 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
           break;
-        case task_type_sub:
-          if (t->cj != NULL) {
-            if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
-              if (t->flags < 0)
-                t->weight += 3 * wscale * t->ci->count * t->cj->count;
-              else
-                t->weight += 3 * wscale * t->ci->count * t->cj->count *
-                             sid_scale[t->flags];
-            } else {
-              if (t->flags < 0)
-                t->weight += 2 * wscale * t->ci->count * t->cj->count;
-              else
-                t->weight += 2 * wscale * t->ci->count * t->cj->count *
-                             sid_scale[t->flags];
-            }
-          } else
-            t->weight += 1 * wscale * t->ci->count * t->ci->count;
+        case task_type_sub_pair:
+          if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
+            if (t->flags < 0)
+              t->weight += 3 * wscale * t->ci->count * t->cj->count;
+            else
+              t->weight += 3 * wscale * t->ci->count * t->cj->count *
+                           sid_scale[t->flags];
+          } else {
+            if (t->flags < 0)
+              t->weight += 2 * wscale * t->ci->count * t->cj->count;
+            else
+              t->weight += 2 * wscale * t->ci->count * t->cj->count *
+                           sid_scale[t->flags];
+          }
+          break;
+        case task_type_sub_self:
+          t->weight += 1 * wscale * t->ci->count * t->ci->count;
           break;
         case task_type_ghost:
           if (t->ci == t->ci->super) t->weight += wscale * t->ci->count;
@@ -967,6 +967,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
        any pre-processing needed. */
     switch (t->type) {
       case task_type_self:
+      case task_type_sub_self:
       case task_type_sort:
       case task_type_ghost:
       case task_type_kick:
@@ -975,11 +976,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         qid = t->ci->super->owner;
         break;
       case task_type_pair:
-      case task_type_sub:
+      case task_type_sub_pair:
         qid = t->ci->super->owner;
-        if (t->cj != NULL &&
-            (qid < 0 ||
-             s->queues[qid].count > s->queues[t->cj->super->owner].count))
+        if (qid < 0 ||
+            s->queues[qid].count > s->queues[t->cj->super->owner].count)
           qid = t->cj->super->owner;
         break;
       case task_type_recv:
diff --git a/src/task.c b/src/task.c
index 68e3f4ee91542d0610485ccd8823ccc1e2a6ab25..8cbbc1bd909fcc65e51679c6f2445ea76e7104a1 100644
--- a/src/task.c
+++ b/src/task.c
@@ -123,6 +123,7 @@ void task_unlock(struct task *t) {
       break;
 
     case task_type_self:
+    case task_type_sub_self:
       if (subtype == task_subtype_grav) {
         cell_gunlocktree(ci);
       } else {
@@ -131,6 +132,7 @@ void task_unlock(struct task *t) {
       break;
 
     case task_type_pair:
+    case task_type_sub_pair:
       if (subtype == task_subtype_grav) {
         cell_gunlocktree(ci);
         cell_gunlocktree(cj);
@@ -140,15 +142,6 @@ void task_unlock(struct task *t) {
       }
       break;
 
-    case task_type_sub:
-      if (subtype == task_subtype_grav) {
-        cell_gunlocktree(ci);
-        if (cj != NULL) cell_gunlocktree(cj);
-      } else {
-        cell_unlocktree(ci);
-        if (cj != NULL) cell_unlocktree(cj);
-      }
-      break;
 
     case task_type_grav_mm:
       cell_gunlocktree(ci);
@@ -198,6 +191,7 @@ int task_lock(struct task *t) {
       break;
 
     case task_type_self:
+    case task_type_sub_self:
       if (subtype == task_subtype_grav) {
         if (cell_glocktree(ci) != 0) return 0;
       } else {
@@ -206,6 +200,7 @@ int task_lock(struct task *t) {
       break;
 
     case task_type_pair:
+    case task_type_sub_pair:
       if (subtype == task_subtype_grav) {
         if (ci->ghold || cj->ghold) return 0;
         if (cell_glocktree(ci) != 0) return 0;
@@ -223,37 +218,6 @@ int task_lock(struct task *t) {
       }
       break;
 
-    case task_type_sub:
-      if (cj == NULL) { /* sub-self */
-
-        if (subtype == task_subtype_grav) {
-          if (cell_glocktree(ci) != 0) return 0;
-        } else {
-          if (cell_locktree(ci) != 0) return 0;
-        }
-
-      } else { /* Sub-pair */
-        if (subtype == task_subtype_grav) {
-          if (ci->ghold || cj->ghold) return 0;
-          if (cell_glocktree(ci) != 0) return 0;
-          if (cell_glocktree(cj) != 0) {
-            cell_gunlocktree(ci);
-            return 0;
-          }
-        } else {
-          if (ci->hold || cj->hold) return 0;
-          if (cell_locktree(ci) != 0) return 0;
-          if (cell_locktree(cj) != 0) {
-            cell_unlocktree(ci);
-            return 0;
-          }
-        }
-      }
-      break;
-
-    case task_type_grav_mm:
-      if (cell_glocktree(ci) != 0) return 0;
-
     default:
       break;
   }
@@ -330,47 +294,6 @@ void task_rmunlock_blind(struct task *ta, struct task *tb) {
   lock_unlock_blind(&ta->lock);
 }
 
-/**
- * @brief Add an unlock_task to the given task.
- *
- * @param ta The unlocking #task.
- * @param tb The #task that will be unlocked.
- */
-void task_addunlock(struct task *ta, struct task *tb) {
-
-  error("Use sched_addunlock instead.");
-
-  /* Add the lock atomically. */
-  ta->unlock_tasks[atomic_inc(&ta->nr_unlock_tasks)] = tb;
-
-  /* Check a posteriori if we did not overshoot. */
-  if (ta->nr_unlock_tasks > task_maxunlock)
-    error("Too many unlock_tasks in task.");
-}
-
-void task_addunlock_old(struct task *ta, struct task *tb) {
-
-  int k;
-
-  lock_lock(&ta->lock);
-
-  /* Check if ta already unlocks tb. */
-  for (k = 0; k < ta->nr_unlock_tasks; k++)
-    if (ta->unlock_tasks[k] == tb) {
-      error("Duplicate unlock.");
-      lock_unlock_blind(&ta->lock);
-      return;
-    }
-
-  if (ta->nr_unlock_tasks == task_maxunlock)
-    error("Too many unlock_tasks in task.");
-
-  ta->unlock_tasks[ta->nr_unlock_tasks] = tb;
-  ta->nr_unlock_tasks += 1;
-
-  lock_unlock_blind(&ta->lock);
-}
-
 /**
  * @brief Prints the list of tasks contained in a given mask
  *
diff --git a/src/task.h b/src/task.h
index 9f3429518d1f8a162551d31ee594c459ca0c5796..b2ee2bd6d766cd2c2603f36a3ca960e8d253ffe1 100644
--- a/src/task.h
+++ b/src/task.h
@@ -37,7 +37,8 @@ enum task_types {
   task_type_sort,
   task_type_self,
   task_type_pair,
-  task_type_sub,
+  task_type_sub_self,
+  task_type_sub_pair,
   task_type_init,
   task_type_ghost,
   task_type_drift,
@@ -97,7 +98,6 @@ struct task {
 void task_rmunlock(struct task *ta, struct task *tb);
 void task_rmunlock_blind(struct task *ta, struct task *tb);
 void task_cleanunlock(struct task *t, int type);
-void task_addunlock(struct task *ta, struct task *tb);
 void task_unlock(struct task *t);
 float task_overlap(const struct task *ta, const struct task *tb);
 int task_lock(struct task *t);
diff --git a/src/timers.h b/src/timers.h
index 92b685ebe9b11d49c4703e5837d35cffdca81c4d..c48961b39737f23eb936d7283f76651d33892991 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -23,6 +23,7 @@
 #define SWIFT_TIMERS_H
 
 /* Includes. */
+#include "atomic.h"
 #include "cycle.h"
 #include "inline.h"
 
@@ -41,9 +42,12 @@ enum {
   timer_dopair_force,
   timer_dopair_grav,
   timer_dograv_external,
-  timer_dosub_density,
-  timer_dosub_force,
-  timer_dosub_grav,
+  timer_dosub_self_density,
+  timer_dosub_self_force,
+  timer_dosub_self_grav,
+  timer_dosub_pair_density,
+  timer_dosub_pair_force,
+  timer_dosub_pair_grav,
   timer_dopair_subset,
   timer_do_ghost,
   timer_dorecv_cell,
@@ -71,7 +75,7 @@ extern ticks timers[timer_count];
 #define TIMER_TOC2(t) timers_toc(t, tic2)
 INLINE static ticks timers_toc(int t, ticks tic) {
   ticks d = (getticks() - tic);
-  __sync_add_and_fetch(&timers[t], d);
+  atomic_add(&timers[t], d);
   return d;
 }
 #else