diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index e070bdb565d6d8305b7974137292910dd720afe2..f2d0aa95d1f35f30476e1989349a07be8d9e5b0a 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -55,8 +55,8 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift",
-             "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
+             "drift", "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
              "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort",
              "split_cell", "rewait", "count"]
 
@@ -64,7 +64,8 @@ TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
                "self": "greenyellow",
                "pair": "navy",
-               "sub": "hotpink",
+               "sub_self": "greenyellow",
+               "sub_pair": "navy",
                "init": "indigo",
                "ghost": "cyan",
                "drift": "maroon",
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index 44705a0164034f8a22f1f3aa860a64d3e7656d2e..9a92faf9417c9a302831eb8cb2f4471eb672d59c 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -61,8 +61,8 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift",
-             "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
+             "drift", "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
              "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort",
              "split_cell", "rewait", "count"]
 
@@ -70,7 +70,8 @@ TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
                "self": "greenyellow",
                "pair": "navy",
-               "sub": "hotpink",
+               "sub_self": "greenyellow",
+               "sub_pair": "navy",
                "init": "indigo",
                "ghost": "cyan",
                "drift": "maroon",
diff --git a/src/Makefile.am b/src/Makefile.am
index 26eb7f636912e69b369476ecb30eda565b1362b8..21b5ef25ca21202caaa9107bfbf09e62aa66011d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,4 +1,3 @@
-
 # This file is part of SWIFT.
 # Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
 #                    Matthieu Schaller (matthieu.schaller@durham.ac.uk).
@@ -17,10 +16,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Add the debug flag to the whole thing
-AM_CFLAGS = -DTIMER -DCOUNTER $(HDF5_CPPFLAGS)
+AM_CFLAGS = -DTIMER $(HDF5_CPPFLAGS)
 
 # Assign a "safe" version number
-AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address
+AM_LDFLAGS = $(HDF5_LDFLAGS) -version-info 0:0:0
 
 # The git command, if available.
 GIT_CMD = @GIT_CMD@
@@ -70,6 +69,7 @@ libswiftsim_la_SOURCES = $(AM_SOURCES)
 # Sources and flags for MPI library
 libswiftsim_mpi_la_SOURCES = $(AM_SOURCES)
 libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS)
+libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) -DWITH_MPI $(METIS_LIBS)
 libswiftsim_mpi_la_SHORTNAME = mpi
 
 
diff --git a/src/engine.c b/src/engine.c
index 3d33d7400e6f1529f43d9c654f8b947cafb9ee48..aa65f45678fae1e21276422b69f1c76cd7e9eaef 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1200,30 +1200,22 @@ void engine_count_and_link_tasks(struct engine *e) {
         t->cj->density = engine_addlink(e, t->cj->density, t);
         atomic_inc(&t->cj->nr_density);
       }
-    } else if (t->type == task_type_sub) {
+    } else if (t->type == task_type_sub_self) {
       atomic_inc(&t->ci->nr_tasks);
-      if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
         t->ci->density = engine_addlink(e, t->ci->density, t);
         atomic_inc(&t->ci->nr_density);
-        if (t->cj != NULL) {
-          t->cj->density = engine_addlink(e, t->cj->density, t);
-          atomic_inc(&t->cj->nr_density);
-        }
+      }
+    } else if (t->type == task_type_sub_pair) {
+      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&t->cj->nr_tasks);
+      if (t->subtype == task_subtype_density) {
+        t->ci->density = engine_addlink(e, t->ci->density, t);
+        atomic_inc(&t->ci->nr_density);
+        t->cj->density = engine_addlink(e, t->cj->density, t);
+        atomic_inc(&t->cj->nr_density);
       }
     }
-
-    /* /\* Link gravity multipole tasks to the up/down tasks. *\/ */
-    /* if (t->type == task_type_grav_mm || */
-    /*     (t->type == task_type_sub && t->subtype == task_subtype_grav)) { */
-    /*   atomic_inc(&t->ci->nr_tasks); */
-    /*   scheduler_addunlock(sched, t->ci->grav_up, t); */
-    /*   scheduler_addunlock(sched, t, t->ci->grav_down); */
-    /*   if (t->cj != NULL && t->ci->grav_up != t->cj->grav_up) { */
-    /*     scheduler_addunlock(sched, t->cj->grav_up, t); */
-    /*     scheduler_addunlock(sched, t, t->cj->grav_down); */
-    /*   } */
-    /* } */
   }
 }
 
@@ -1311,37 +1303,51 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       }
     }
 
-    /* Otherwise, sub interaction? */
-    else if (t->type == task_type_sub && t->subtype == task_subtype_density) {
+    /* Otherwise, sub-self interaction? */
+    else if (t->type == task_type_sub_self &&
+             t->subtype == task_subtype_density) {
 
       /* Start by constructing the task for the second hydro loop */
       struct task *t2 =
-          scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags,
-                            0, t->ci, t->cj, 0);
+          scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
+                            t->flags, 0, t->ci, t->cj, 0);
 
-      /* Add the link between the new loop and both cells */
+      /* Add the link between the new loop and the cell */
       t->ci->force = engine_addlink(e, t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
-      if (t->cj != NULL) {
-        t->cj->force = engine_addlink(e, t->cj->force, t2);
-        atomic_inc(&t->cj->nr_force);
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
       }
+    }
+
+    /* Otherwise, sub-pair interaction? */
+    else if (t->type == task_type_sub_pair &&
+             t->subtype == task_subtype_density) {
+
+      /* Start by constructing the task for the second hydro loop */
+      struct task *t2 =
+          scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
+                            t->flags, 0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
+      t->cj->force = engine_addlink(e, t->cj->force, t2);
+      atomic_inc(&t->cj->nr_force);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
       }
-      if (t->cj != NULL && t->cj->nodeID == nodeID &&
-          t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
       }
     }
 
-    /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */
-    /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */
-    /*   scheduler_addunlock(sched, t->ci->grav_down, t); */
-
     /* External gravity tasks should depend on init and unlock the kick */
     else if (t->type == task_type_grav_external) {
       scheduler_addunlock(sched, t->ci->init, t);
@@ -1544,8 +1550,7 @@ int engine_marktasks(struct engine *e) {
       struct task *t = &tasks[ind[k]];
 
       /* Pair? */
-      if (t->type == task_type_pair ||
-          (t->type == task_type_sub && t->cj != NULL)) {
+      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
         /* Local pointers. */
         const struct cell *ci = t->ci;
@@ -1589,15 +1594,14 @@ int engine_marktasks(struct engine *e) {
 
       /* Single-cell task? */
       else if (t->type == task_type_self || t->type == task_type_ghost ||
-               (t->type == task_type_sub && t->cj == NULL)) {
+               t->type == task_type_sub_self) {
 
         /* Set this task's skip. */
         t->skip = (t->ci->ti_end_min > ti_end);
       }
 
       /* Pair? */
-      else if (t->type == task_type_pair ||
-               (t->type == task_type_sub && t->cj != NULL)) {
+      else if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
         /* Local pointers. */
         const struct cell *ci = t->ci;
@@ -2109,7 +2113,8 @@ void engine_init_particles(struct engine *e) {
 
     mask |= 1 << task_type_self;
     mask |= 1 << task_type_pair;
-    mask |= 1 << task_type_sub;
+    mask |= 1 << task_type_sub_self;
+    mask |= 1 << task_type_sub_pair;
     mask |= 1 << task_type_ghost;
 
     submask |= 1 << task_subtype_density;
@@ -2238,7 +2243,8 @@ void engine_step(struct engine *e) {
 
     mask |= 1 << task_type_self;
     mask |= 1 << task_type_pair;
-    mask |= 1 << task_type_sub;
+    mask |= 1 << task_type_sub_self;
+    mask |= 1 << task_type_sub_pair;
     mask |= 1 << task_type_ghost;
 
     submask |= 1 << task_subtype_density;
diff --git a/src/partition.c b/src/partition.c
index b79dfd6b81d727b6e53f9e0906c7c59eee6607f1..dbec9f7ebd976a9779d4a54ee536e0e4eef331b2 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -453,9 +453,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
 
     /* Skip un-interesting tasks. */
     if (t->type != task_type_self && t->type != task_type_pair &&
-        t->type != task_type_sub && t->type != task_type_ghost &&
-        t->type != task_type_drift && t->type != task_type_kick &&
-        t->type != task_type_init)
+        t->type != task_type_sub_self && t->type != task_type_sub_self &&
+        t->type != task_type_ghost && t->type != task_type_drift &&
+        t->type != task_type_kick && t->type != task_type_init)
       continue;
 
     /* Get the task weight. */
@@ -496,7 +496,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
 
     /* Self interaction? */
     else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
-             (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) {
+             (t->type == task_type_sub_self && cj == NULL &&
+              ci->nodeID == nodeID)) {
       /* Self interactions add only to vertex weight. */
       if (taskvweights) weights_v[cid] += w;
 
@@ -504,7 +505,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
 
     /* Pair? */
     else if (t->type == task_type_pair ||
-             (t->type == task_type_sub && cj != NULL)) {
+             (t->type == task_type_sub_pair)) {
       /* In-cell pair? */
       if (ci == cj) {
         /* Add weight to vertex for ci. */
diff --git a/src/queue.c b/src/queue.c
index a62ded2561fc683bf2f44abcf1110b0fb7eebd0c..ab414bba72b71edf76cd2c0a316ad52d6fee9044 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -38,16 +38,6 @@
 #include "const.h"
 #include "error.h"
 
-/* Counter macros. */
-#ifdef COUNTER
-#define COUNT(c) (__sync_add_and_fetch(&queue_counter[c], 1))
-#else
-#define COUNT(c)
-#endif
-
-/* The counters. */
-int queue_counter[queue_counter_count];
-
 /**
  * @brief Enqueue all tasks in the incoming DEQ.
  *
diff --git a/src/runner.c b/src/runner.c
index 75c3723473cd391c8631435e5a320c356f119340..addeebda93a4d3f4b031f6fdc1f9eb856adc0a32 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -86,9 +86,6 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
 #define FUNCTION force
 #include "runner_doiact.h"
 
-/* Import the gravity loop functions. */
-#include "runner_doiact_grav.h"
-
 /**
  * @brief Calculate gravity acceleration from external potential
  *
@@ -98,8 +95,8 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
  */
 void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
 
-  struct gpart *g, *gparts = c->gparts;
-  int i, k, gcount = c->gcount;
+  struct gpart *restrict gparts = c->gparts;
+  const int gcount = c->gcount;
   const int ti_current = r->e->ti_current;
   const struct external_potential *potential = r->e->external_potential;
   const struct phys_const *constants = r->e->physical_constants;
@@ -108,7 +105,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
 
   /* Recurse? */
   if (c->split) {
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0);
     return;
   }
@@ -118,10 +115,10 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
 #endif
 
   /* Loop over the parts in this cell. */
-  for (i = 0; i < gcount; i++) {
+  for (int i = 0; i < gcount; i++) {
 
     /* Get a direct pointer on the part. */
-    g = &gparts[i];
+    struct gpart *const g = &gparts[i];
 
     /* Is this part within the time step? */
     if (g->ti_end <= ti_current) {
@@ -553,8 +550,13 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
 
           }
 
-          /* Otherwise, sub interaction? */
-          else if (l->t->type == task_type_sub) {
+          /* Otherwise, sub-self interaction? */
+          else if (l->t->type == task_type_sub_self)
+            runner_dosub_subset_density(r, finger, parts, pid, count, NULL, -1,
+                                        1);
+
+          /* Otherwise, sub-pair interaction? */
+          else if (l->t->type == task_type_sub_pair) {
 
             /* Left or right? */
             if (l->t->ci == finger)
@@ -1066,13 +1068,19 @@ void *runner_main(void *data) {
         case task_type_sort:
           runner_do_sort(r, ci, t->flags, 1);
           break;
-        case task_type_sub:
+        case task_type_sub_self:
+          if (t->subtype == task_subtype_density)
+            runner_dosub_self1_density(r, ci, 1);
+          else if (t->subtype == task_subtype_force)
+            runner_dosub_self2_force(r, ci, 1);
+          else
+            error("Unknown task subtype.");
+          break;
+        case task_type_sub_pair:
           if (t->subtype == task_subtype_density)
-            runner_dosub1_density(r, ci, cj, t->flags, 1);
+            runner_dosub_pair1_density(r, ci, cj, t->flags, 1);
           else if (t->subtype == task_subtype_force)
-            runner_dosub2_force(r, ci, cj, t->flags, 1);
-          else if (t->subtype == task_subtype_grav)
-            runner_dosub_grav(r, ci, cj, 1);
+            runner_dosub_pair2_force(r, ci, cj, t->flags, 1);
           else
             error("Unknown task subtype.");
           break;
@@ -1096,21 +1104,6 @@ void *runner_main(void *data) {
         case task_type_recv:
           runner_do_recv_cell(r, ci, 1);
           break;
-        case task_type_grav_pp:
-          if (t->cj == NULL)
-            runner_doself_grav(r, t->ci);
-          else
-            runner_dopair_grav(r, t->ci, t->cj);
-          break;
-        case task_type_grav_mm:
-          runner_dograv_mm(r, t->ci, t->cj);
-          break;
-        case task_type_grav_up:
-          runner_dograv_up(r, t->ci);
-          break;
-        case task_type_grav_down:
-          runner_dograv_down(r, t->ci);
-          break;
         case task_type_grav_external:
           runner_do_grav_external(r, t->ci, 1);
           break;
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index e0c13e4e0adb101d9a8975651f24225551277f43..7fe19af853601e49de553a55ee205c654167d67e 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1,4 +1,3 @@
-
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
@@ -57,11 +56,17 @@
 #define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
 #define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
 
-#define _DOSUB1(f) PASTE(runner_dosub1, f)
-#define DOSUB1 _DOSUB1(FUNCTION)
+#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
+#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)
+
+#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
+#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)
+
+#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
+#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
 
-#define _DOSUB2(f) PASTE(runner_dosub2, f)
-#define DOSUB2 _DOSUB2(FUNCTION)
+#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
+#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
 
 #define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
 #define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
@@ -78,8 +83,11 @@
 #define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
 #define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
 
-#define _TIMER_DOSUB(f) PASTE(timer_dosub, f)
-#define TIMER_DOSUB _TIMER_DOSUB(FUNCTION)
+#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
+#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)
+
+#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
+#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
 
 #define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
 #define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
@@ -1711,7 +1719,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 }
 
 /**
- * @brief Compute grouped sub-cell interactions
+ * @brief Compute grouped sub-cell interactions for pairs
  *
  * @param r The #runner.
  * @param ci The first #cell.
@@ -1722,543 +1730,561 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
  * @todo Hard-code the sid on the recursive calls to avoid the
  * redundant computations to find the sid on-the-fly.
  */
+void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
+                 int gettimer) {
 
-void DOSUB1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
-            int gettimer) {
-
-  int j = 0, k;
-  double shift[3];
-  float h;
   struct space *s = r->e->s;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC
 
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current) return;
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
-    /* Recurse? */
-    if (ci->split) {
+  /* Get the cell dimensions. */
+  const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
 
-      /* Loop over all progeny. */
-      for (k = 0; k < 8; k++)
-        if (ci->progeny[k] != NULL) {
-          DOSUB1(r, ci->progeny[k], NULL, -1, 0);
-          for (j = k + 1; j < 8; j++)
-            if (ci->progeny[j] != NULL)
-              DOSUB1(r, ci->progeny[k], ci->progeny[j], -1, 0);
-        }
+  /* Get the type of pair if not specified explicitly. */
+  // if ( sid < 0 )
+  double shift[3];
+  sid = space_getsid(s, &ci, &cj, shift);
 
-    }
+  /* Recurse? */
+  if (ci->split && cj->split &&
+      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+          h / 2) {
 
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF1(r, ci);
+    /* Different types of flags. */
+    switch (sid) {
 
-  } /* self-interaction. */
+      /* Regular sub-cell interactions of a single cell. */
+      case 0: /* (  1 ,  1 ,  1 ) */
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        break;
 
-  /* Otherwise, it's a pair interaction. */
-  else {
+      case 1: /* (  1 ,  1 ,  0 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        break;
 
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
+      case 2: /* (  1 ,  1 , -1 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        break;
 
-    /* Get the cell dimensions. */
-    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
+      case 3: /* (  1 ,  0 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        break;
 
-    /* Get the type of pair if not specified explicitly. */
-    // if ( sid < 0 )
-    sid = space_getsid(s, &ci, &cj, shift);
+      case 4: /* (  1 ,  0 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[0], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[1], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[2], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[3], -1, 0);
+        break;
 
-    /* Recurse? */
-    if (ci->split && cj->split &&
-        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
-            h / 2) {
+      case 5: /* (  1 ,  0 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        break;
 
-      /* Different types of flags. */
-      switch (sid) {
+      case 6: /* (  1 , -1 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        break;
 
-        /* Regular sub-cell interactions of a single cell. */
-        case 0: /* (  1 ,  1 ,  1 ) */
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          break;
+      case 7: /* (  1 , -1 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        break;
 
-        case 1: /* (  1 ,  1 ,  0 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          break;
+      case 8: /* (  1 , -1 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        break;
 
-        case 2: /* (  1 ,  1 , -1 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          break;
+      case 9: /* (  0 ,  1 ,  1 ) */
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        break;
 
-        case 3: /* (  1 ,  0 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          break;
+      case 10: /* (  0 ,  1 ,  0 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[0], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[4], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[1], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[4], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[5], -1, 0);
+        break;
 
-        case 4: /* (  1 ,  0 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[0], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[1], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[2], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[3], -1, 0);
-          break;
+      case 11: /* (  0 ,  1 , -1 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        break;
 
-        case 5: /* (  1 ,  0 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          break;
+      case 12: /* (  0 ,  0 ,  1 ) */
+        if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[0], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[2], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[4], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[6], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[2], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[6], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[4], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[6], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[6], -1, 0);
+        break;
+    }
 
-        case 6: /* (  1 , -1 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          break;
+  }
 
-        case 7: /* (  1 , -1 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          break;
+  /* Otherwise, compute the pair directly. */
+  else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
 
-        case 8: /* (  1 , -1 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          break;
+    /* Do any of the cells need to be sorted first? */
+    if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
+    if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
 
-        case 9: /* (  0 ,  1 ,  1 ) */
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          break;
+    /* Compute the interactions. */
+    DOPAIR1(r, ci, cj);
+  }
 
-        case 10: /* (  0 ,  1 ,  0 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[0], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[4], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[1], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[4], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[5], -1, 0);
-          break;
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
+}
 
-        case 11: /* (  0 ,  1 , -1 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          break;
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
 
-        case 12: /* (  0 ,  0 ,  1 ) */
-          if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[0], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[2], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[4], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[1], cj->progeny[6], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[2], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[3], cj->progeny[6], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[4], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[5], cj->progeny[6], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
-            DOSUB1(r, ci->progeny[7], cj->progeny[6], -1, 0);
-          break;
-      }
+  const int ti_current = r->e->ti_current;
 
-    }
+  TIMER_TIC
 
-    /* Otherwise, compute the pair directly. */
-    else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current) return;
 
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
-      if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
+  /* Recurse? */
+  if (ci->split) {
 
-      /* Compute the interactions. */
-      DOPAIR1(r, ci, cj);
-    }
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF1(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR1(r, ci->progeny[k], ci->progeny[j], -1, 0);
+      }
+  }
 
-  } /* otherwise, pair interaction. */
+  /* Otherwise, compute self-interaction. */
+  else
+    DOSELF1(r, ci);
 
-  if (gettimer) TIMER_TOC(TIMER_DOSUB);
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
 }
 
-void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
-            int gettimer) {
+/**
+ * @brief Compute grouped sub-cell interactions for pairs (symmetric case)
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param sid The direction linking the cells
+ * @param gettimer Do we have a timer ?
+ *
+ * @todo Hard-code the sid on the recursive calls to avoid the
+ * redundant computations to find the sid on-the-fly.
+ */
+void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
+                 int gettimer) {
 
-  int j, k;
-  double shift[3];
-  float h;
   struct space *s = r->e->s;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC
 
-  /* Is this a single cell? */
-  if (cj == NULL) {
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current) return;
+  /* Get the cell dimensions. */
+  const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
 
-    /* Recurse? */
-    if (ci->split) {
+  /* Get the type of pair if not specified explicitly. */
+  // if ( sid < 0 )
+  double shift[3];
+  sid = space_getsid(s, &ci, &cj, shift);
 
-      /* Loop over all progeny. */
-      for (k = 0; k < 8; k++)
-        if (ci->progeny[k] != NULL) {
-          DOSUB2(r, ci->progeny[k], NULL, -1, 0);
-          for (j = k + 1; j < 8; j++)
-            if (ci->progeny[j] != NULL)
-              DOSUB2(r, ci->progeny[k], ci->progeny[j], -1, 0);
-        }
+  /* Recurse? */
+  if (ci->split && cj->split &&
+      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+          h / 2) {
 
-    }
+    /* Different types of flags. */
+    switch (sid) {
 
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF2(r, ci);
+      /* Regular sub-cell interactions of a single cell. */
+      case 0: /* (  1 ,  1 ,  1 ) */
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        break;
 
-  } /* self-interaction. */
+      case 1: /* (  1 ,  1 ,  0 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        break;
 
-  /* Otherwise, it's a pair interaction. */
-  else {
+      case 2: /* (  1 ,  1 , -1 ) */
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        break;
 
-    /* Should we even bother? */
-    if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
+      case 3: /* (  1 ,  0 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        break;
 
-    /* Get the cell dimensions. */
-    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));
+      case 4: /* (  1 ,  0 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[0], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[1], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[2], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[3], -1, 0);
+        break;
 
-    /* Get the type of pair if not specified explicitly. */
-    // if ( sid < 0 )
-    sid = space_getsid(s, &ci, &cj, shift);
+      case 5: /* (  1 ,  0 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[1], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[3], -1, 0);
+        break;
 
-    /* Recurse? */
-    if (ci->split && cj->split &&
-        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
-            h / 2) {
+      case 6: /* (  1 , -1 ,  1 ) */
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        break;
 
-      /* Different types of flags. */
-      switch (sid) {
+      case 7: /* (  1 , -1 ,  0 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[2], -1, 0);
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[3], -1, 0);
+        break;
 
-        /* Regular sub-cell interactions of a single cell. */
-        case 0: /* (  1 ,  1 ,  1 ) */
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          break;
+      case 8: /* (  1 , -1 , -1 ) */
+        if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0);
+        break;
 
-        case 1: /* (  1 ,  1 ,  0 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          break;
+      case 9: /* (  0 ,  1 ,  1 ) */
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        break;
 
-        case 2: /* (  1 ,  1 , -1 ) */
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          break;
+      case 10: /* (  0 ,  1 ,  0 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[0], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[4], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[1], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[4], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[5], -1, 0);
+        break;
 
-        case 3: /* (  1 ,  0 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          break;
+      case 11: /* (  0 ,  1 , -1 ) */
+        if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[1], -1, 0);
+        if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[5], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0);
+        if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[5], -1, 0);
+        break;
 
-        case 4: /* (  1 ,  0 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[0], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[1], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[2], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[3], -1, 0);
-          break;
+      case 12: /* (  0 ,  0 ,  1 ) */
+        if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[0], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[2], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[4], -1, 0);
+        if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[6], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[2], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0);
+        if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[6], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[4], -1, 0);
+        if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[6], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0);
+        if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
+          DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[6], -1, 0);
+        break;
+    }
 
-        case 5: /* (  1 ,  0 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0);
-          break;
+  }
 
-        case 6: /* (  1 , -1 ,  1 ) */
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          break;
+  /* Otherwise, compute the pair directly. */
+  else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
 
-        case 7: /* (  1 , -1 ,  0 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0);
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0);
-          break;
+    /* Do any of the cells need to be sorted first? */
+    if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
+    if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
 
-        case 8: /* (  1 , -1 , -1 ) */
-          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
-          break;
+    /* Compute the interactions. */
+    DOPAIR2(r, ci, cj);
+  }
 
-        case 9: /* (  0 ,  1 ,  1 ) */
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          break;
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
+}
 
-        case 10: /* (  0 ,  1 ,  0 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[0], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[4], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[1], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[4], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[5], -1, 0);
-          break;
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks (symmetric case)
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) {
 
-        case 11: /* (  0 ,  1 , -1 ) */
-          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0);
-          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
-          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-            DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0);
-          break;
+  const int ti_current = r->e->ti_current;
 
-        case 12: /* (  0 ,  0 ,  1 ) */
-          if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[0], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[2], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[4], -1, 0);
-          if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[1], cj->progeny[6], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[2], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
-          if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[3], cj->progeny[6], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[4], -1, 0);
-          if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[5], cj->progeny[6], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
-          if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
-            DOSUB2(r, ci->progeny[7], cj->progeny[6], -1, 0);
-          break;
-      }
+  TIMER_TIC
 
-    }
+  /* Should we even bother? */
+  if (ci->ti_end_min > ti_current) return;
 
-    /* Otherwise, compute the pair directly. */
-    else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) {
+  /* Recurse? */
+  if (ci->split) {
 
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1);
-      if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF2(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR2(r, ci->progeny[k], ci->progeny[j], -1, 0);
+      }
 
-      /* Compute the interactions. */
-      DOPAIR2(r, ci, cj);
-    }
+  }
 
-  } /* otherwise, pair interaction. */
+  /* Otherwise, compute self-interaction. */
+  else
+    DOSELF2(r, ci);
 
-  if (gettimer) TIMER_TOC(TIMER_DOSUB);
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
 }
 
 void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
@@ -2856,5 +2882,5 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
 
   } /* otherwise, pair interaction. */
 
-  if (gettimer) TIMER_TOC(TIMER_DOSUB);
+  if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
 }
diff --git a/src/scheduler.c b/src/scheduler.c
index d58957f521f3b910e5360594aa1f0210c1dc4532..496df93adf5a656460f7b39904a3cc58ac3e5caa 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -173,7 +173,7 @@ void scheduler_splittasks(struct scheduler *s) {
         if (scheduler_dosub && ci->count < space_subsize / ci->count) {
 
           /* convert to a self-subtask. */
-          t->type = task_type_sub;
+          t->type = task_type_sub_self;
 
         }
 
@@ -235,7 +235,7 @@ void scheduler_splittasks(struct scheduler *s) {
             sid != 0 && sid != 2 && sid != 6 && sid != 8) {
 
           /* Make this task a sub task. */
-          t->type = task_type_sub;
+          t->type = task_type_sub_pair;
 
         }
 
@@ -515,132 +515,6 @@ void scheduler_splittasks(struct scheduler *s) {
 
     } /* pair interaction? */
 
-    /* Gravity interaction? */
-    else if (t->type == task_type_grav_mm) {
-
-      /* Get a handle on the cells involved. */
-      struct cell *ci = t->ci;
-      struct cell *cj = t->cj;
-
-      /* Self-interaction? */
-      if (cj == NULL) {
-
-        /* Ignore this task if the cell has no gparts. */
-        if (ci->gcount == 0) t->type = task_type_none;
-
-        /* If the cell is split, recurse. */
-        else if (ci->split) {
-
-          /* Make a single sub-task? */
-          if (scheduler_dosub && ci->gcount < space_subsize / ci->gcount) {
-
-            t->type = task_type_sub;
-            t->subtype = task_subtype_grav;
-
-          }
-
-          /* Otherwise, just split the task. */
-          else {
-
-            /* Split this task into tasks on its progeny. */
-            t->type = task_type_none;
-            for (int j = 0; j < 8; j++)
-              if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) {
-                if (t->type == task_type_none) {
-                  t->type = task_type_grav_mm;
-                  t->ci = ci->progeny[j];
-                  t->cj = NULL;
-                } else
-                  t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none,
-                                        0, 0, ci->progeny[j], NULL, 0);
-                for (int k = j + 1; k < 8; k++)
-                  if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) {
-                    if (t->type == task_type_none) {
-                      t->type = task_type_grav_mm;
-                      t->ci = ci->progeny[j];
-                      t->cj = ci->progeny[k];
-                    } else
-                      t = scheduler_addtask(s, task_type_grav_mm,
-                                            task_subtype_none, 0, 0,
-                                            ci->progeny[j], ci->progeny[k], 0);
-                  }
-              }
-            redo = (t->type != task_type_none);
-          }
-
-        }
-
-        /* Otherwise, just make a pp task out of it. */
-        else
-          t->type = task_type_grav_pp;
-
-      }
-
-      /* Nope, pair. */
-      else {
-
-        /* Make a sub-task? */
-        if (scheduler_dosub && ci->gcount < space_subsize / cj->gcount) {
-
-          t->type = task_type_sub;
-          t->subtype = task_subtype_grav;
-
-        }
-
-        /* Otherwise, split the task. */
-        else {
-
-          /* Get the opening angle theta. */
-          float dx[3], theta;
-          for (int k = 0; k < 3; k++) {
-            dx[k] = fabs(ci->loc[k] - cj->loc[k]);
-            if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k])
-              dx[k] = -dx[k] + s->space->dim[k];
-            if (dx[k] > 0.0f) dx[k] -= ci->h[k];
-          }
-          theta =
-              (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) /
-              (ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]);
-
-          /* Ignore this task if the cell has no gparts. */
-          if (ci->gcount == 0 || cj->gcount == 0) t->type = task_type_none;
-
-          /* Split the interaction? */
-          else if (theta < const_theta_max * const_theta_max) {
-
-            /* Are both ci and cj split? */
-            if (ci->split && cj->split) {
-
-              /* Split this task into tasks on its progeny. */
-              t->type = task_type_none;
-              for (int j = 0; j < 8; j++)
-                if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) {
-                  for (int k = 0; k < 8; k++)
-                    if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) {
-                      if (t->type == task_type_none) {
-                        t->type = task_type_grav_mm;
-                        t->ci = ci->progeny[j];
-                        t->cj = cj->progeny[k];
-                      } else
-                        t = scheduler_addtask(
-                            s, task_type_grav_mm, task_subtype_none, 0, 0,
-                            ci->progeny[j], cj->progeny[k], 0);
-                    }
-                }
-              redo = (t->type != task_type_none);
-
-            }
-
-            /* Otherwise, make a pp task out of it. */
-            else
-              t->type = task_type_grav_pp;
-          }
-        }
-
-      } /* gravity pair interaction? */
-
-    } /* gravity interaction? */
-
   } /* loop over all tasks. */
 }
 
@@ -899,23 +773,23 @@ void scheduler_reweight(struct scheduler *s) {
             t->weight +=
                 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
           break;
-        case task_type_sub:
-          if (t->cj != NULL) {
-            if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
-              if (t->flags < 0)
-                t->weight += 3 * wscale * t->ci->count * t->cj->count;
-              else
-                t->weight += 3 * wscale * t->ci->count * t->cj->count *
-                             sid_scale[t->flags];
-            } else {
-              if (t->flags < 0)
-                t->weight += 2 * wscale * t->ci->count * t->cj->count;
-              else
-                t->weight += 2 * wscale * t->ci->count * t->cj->count *
-                             sid_scale[t->flags];
-            }
-          } else
-            t->weight += 1 * wscale * t->ci->count * t->ci->count;
+        case task_type_sub_pair:
+          if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
+            if (t->flags < 0)
+              t->weight += 3 * wscale * t->ci->count * t->cj->count;
+            else
+              t->weight += 3 * wscale * t->ci->count * t->cj->count *
+                           sid_scale[t->flags];
+          } else {
+            if (t->flags < 0)
+              t->weight += 2 * wscale * t->ci->count * t->cj->count;
+            else
+              t->weight += 2 * wscale * t->ci->count * t->cj->count *
+                           sid_scale[t->flags];
+          }
+          break;
+        case task_type_sub_self:
+          t->weight += 1 * wscale * t->ci->count * t->ci->count;
           break;
         case task_type_ghost:
           if (t->ci == t->ci->super) t->weight += wscale * t->ci->count;
@@ -1082,6 +956,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
        any pre-processing needed. */
     switch (t->type) {
       case task_type_self:
+      case task_type_sub_self:
       case task_type_sort:
       case task_type_ghost:
       case task_type_kick:
@@ -1090,11 +965,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         qid = t->ci->super->owner;
         break;
       case task_type_pair:
-      case task_type_sub:
+      case task_type_sub_pair:
         qid = t->ci->super->owner;
-        if (t->cj != NULL &&
-            (qid < 0 ||
-             s->queues[qid].count > s->queues[t->cj->super->owner].count))
+        if (qid < 0 ||
+            s->queues[qid].count > s->queues[t->cj->super->owner].count)
           qid = t->cj->super->owner;
         break;
       case task_type_recv:
diff --git a/src/task.c b/src/task.c
index 1aad53f103c0f1ca600c7b093575923284bce3b5..77c5a944c8ca23e870624770c1e0cde4bf6195be 100644
--- a/src/task.c
+++ b/src/task.c
@@ -47,11 +47,11 @@
 
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
-    "none",      "sort",          "self",      "pair",       "sub",
-    "init",      "ghost",         "drift",     "kick",       "kick_fixdt",
-    "send",      "recv",          "grav_pp",   "grav_mm",    "grav_up",
-    "grav_down", "grav_external", "part_sort", "gpart_sort", "split_cell",
-    "rewait"};
+    "none",       "sort",      "self",          "pair",      "sub_self",
+    "sub_pair",   "init",      "ghost",         "drift",     "kick",
+    "kick_fixdt", "send",      "recv",          "grav_pp",   "grav_mm",
+    "grav_up",    "grav_down", "grav_external", "part_sort", "gpart_sort",
+    "split_cell", "rewait"};
 
 const char *subtaskID_names[task_type_count] = {"none", "density", "force",
                                                 "grav"};
@@ -117,13 +117,14 @@ void task_unlock(struct task *t) {
   /* Act based on task type. */
   switch (t->type) {
     case task_type_self:
+    case task_type_sub_self:
     case task_type_sort:
       cell_unlocktree(t->ci);
       break;
     case task_type_pair:
-    case task_type_sub:
+    case task_type_sub_pair:
       cell_unlocktree(t->ci);
-      if (t->cj != NULL) cell_unlocktree(t->cj);
+      cell_unlocktree(t->cj);
       break;
     case task_type_grav_pp:
     case task_type_grav_mm:
@@ -144,55 +145,68 @@ void task_unlock(struct task *t) {
 
 int task_lock(struct task *t) {
 
-  int type = t->type;
+  const int type = t->type;
+  const int subtype = t->subtype;
   struct cell *ci = t->ci, *cj = t->cj;
+#ifdef WITH_MPI
+  int res = 0, err = 0;
+  MPI_Status stat;
+#endif
 
-  /* Communication task? */
-  if (type == task_type_recv || type == task_type_send) {
+  switch (type) {
 
+    /* Communication task? */
+    case task_type_recv:
+    case task_type_send:
 #ifdef WITH_MPI
-    /* Check the status of the MPI request. */
-    int res = 0, err = 0;
-    MPI_Status stat;
-    if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
-      char buff[MPI_MAX_ERROR_STRING];
-      int len;
-      MPI_Error_string(err, buff, &len);
-      error("Failed to test request on send/recv task (tag=%i, %s).", t->flags,
-            buff);
-    }
-    return res;
+      /* Check the status of the MPI request. */
+      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
+        char buff[MPI_MAX_ERROR_STRING];
+        int len;
+        MPI_Error_string(err, buff, &len);
+        error("Failed to test request on send/recv task (tag=%i, %s).",
+              t->flags, buff);
+      }
+      return res;
 #else
-    error("SWIFT was not compiled with MPI support.");
+      error("SWIFT was not compiled with MPI support.");
 #endif
+      break;
 
-  }
+    case task_type_sort:
+      if (cell_locktree(ci) != 0) return 0;
+      break;
 
-  /* Unary lock? */
-  else if (type == task_type_self || type == task_type_sort ||
-           (type == task_type_sub && cj == NULL)) {
-    if (cell_locktree(ci) != 0) return 0;
-  }
+    case task_type_self:
+    case task_type_sub_self:
+      if (subtype == task_subtype_grav) {
+        if (cell_glocktree(ci) != 0) return 0;
+      } else {
+        if (cell_locktree(ci) != 0) return 0;
+      }
+      break;
 
-  /* Otherwise, binary lock. */
-  else if (type == task_type_pair || (type == task_type_sub && cj != NULL)) {
-    if (ci->hold || cj->hold) return 0;
-    if (cell_locktree(ci) != 0) return 0;
-    if (cell_locktree(cj) != 0) {
-      cell_unlocktree(ci);
-      return 0;
-    }
-  }
+    case task_type_pair:
+    case task_type_sub_pair:
+      if (subtype == task_subtype_grav) {
+        if (ci->ghold || cj->ghold) return 0;
+        if (cell_glocktree(ci) != 0) return 0;
+        if (cell_glocktree(cj) != 0) {
+          cell_gunlocktree(ci);
+          return 0;
+        }
+      } else {
+        if (ci->hold || cj->hold) return 0;
+        if (cell_locktree(ci) != 0) return 0;
+        if (cell_locktree(cj) != 0) {
+          cell_unlocktree(ci);
+          return 0;
+        }
+      }
+      break;
 
-  /* Gravity tasks? */
-  else if (type == task_type_grav_mm || type == task_type_grav_pp ||
-           type == task_type_grav_down) {
-    if (ci->ghold || (cj != NULL && cj->ghold)) return 0;
-    if (cell_glocktree(ci) != 0) return 0;
-    if (cj != NULL && cell_glocktree(cj) != 0) {
-      cell_gunlocktree(ci);
-      return 0;
-    }
+    default:
+      break;
   }
 
   /* If we made it this far, we've got a lock. */
@@ -270,48 +284,6 @@ void task_rmunlock_blind(struct task *ta, struct task *tb) {
   lock_unlock_blind(&ta->lock);
 }
 
-/**
- * @brief Add an unlock_task to the given task.
- *
- * @param ta The unlocking #task.
- * @param tb The #task that will be unlocked.
- */
-
-void task_addunlock(struct task *ta, struct task *tb) {
-
-  error("Use sched_addunlock instead.");
-
-  /* Add the lock atomically. */
-  ta->unlock_tasks[atomic_inc(&ta->nr_unlock_tasks)] = tb;
-
-  /* Check a posteriori if we did not overshoot. */
-  if (ta->nr_unlock_tasks > task_maxunlock)
-    error("Too many unlock_tasks in task.");
-}
-
-void task_addunlock_old(struct task *ta, struct task *tb) {
-
-  int k;
-
-  lock_lock(&ta->lock);
-
-  /* Check if ta already unlocks tb. */
-  for (k = 0; k < ta->nr_unlock_tasks; k++)
-    if (ta->unlock_tasks[k] == tb) {
-      error("Duplicate unlock.");
-      lock_unlock_blind(&ta->lock);
-      return;
-    }
-
-  if (ta->nr_unlock_tasks == task_maxunlock)
-    error("Too many unlock_tasks in task.");
-
-  ta->unlock_tasks[ta->nr_unlock_tasks] = tb;
-  ta->nr_unlock_tasks += 1;
-
-  lock_unlock_blind(&ta->lock);
-}
-
 /**
  * @brief Prints the list of tasks contained in a given mask
  *
diff --git a/src/task.h b/src/task.h
index b110c6621741c22770f7e84b1f145432bdfa34d5..51a44b3127694a60da772ca4e728073b15ac1147 100644
--- a/src/task.h
+++ b/src/task.h
@@ -37,7 +37,8 @@ enum task_types {
   task_type_sort,
   task_type_self,
   task_type_pair,
-  task_type_sub,
+  task_type_sub_self,
+  task_type_sub_pair,
   task_type_init,
   task_type_ghost,
   task_type_drift,
@@ -97,7 +98,6 @@ struct task {
 void task_rmunlock(struct task *ta, struct task *tb);
 void task_rmunlock_blind(struct task *ta, struct task *tb);
 void task_cleanunlock(struct task *t, int type);
-void task_addunlock(struct task *ta, struct task *tb);
 void task_unlock(struct task *t);
 float task_overlap(const struct task *ta, const struct task *tb);
 int task_lock(struct task *t);
diff --git a/src/timers.h b/src/timers.h
index 342f46a9703501bfd4a05d25342b4dcff4bef052..c48961b39737f23eb936d7283f76651d33892991 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -42,9 +42,12 @@ enum {
   timer_dopair_force,
   timer_dopair_grav,
   timer_dograv_external,
-  timer_dosub_density,
-  timer_dosub_force,
-  timer_dosub_grav,
+  timer_dosub_self_density,
+  timer_dosub_self_force,
+  timer_dosub_self_grav,
+  timer_dosub_pair_density,
+  timer_dosub_pair_force,
+  timer_dosub_pair_grav,
   timer_dopair_subset,
   timer_do_ghost,
   timer_dorecv_cell,