diff --git a/src/Makefile.am b/src/Makefile.am
index 480953c6aad3857d3ca8c25d61274f71cd973931..665aa4b24c94162fb8f772edd346f3c95a1d7ddb 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h partition_fixed_costs.h \
     clocks.h parser.h physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling_io.h cooling.h cooling_struct.h \
-    statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h entropy_floor.h \
+    statistics.h memswap.h cache.h runner_doiact_hydro_vec.h profiler.h entropy_floor.h \
     dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \
     gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \
     chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \
@@ -69,13 +69,18 @@ EAGLE_FEEDBACK_SOURCES += feedback/EAGLE/feedback.c
 endif
 
 # Common source files
-AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \
-    engine_marktasks.c engine_drift.c engine_unskip.c serial_io.c timers.c debug.c scheduler.c \
+AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_grav.c \
+    runner_doiact_stars.c runner_doiact_black_holes.c runner_ghost.c runner_recv.c \
+    runner_sort.c runner_drift.c runner_black_holes.c runner_time_integration.c \
+    runner_doiact_hydro_vec.c runner_others.c\
+    queue.c task.c cell.c engine.c engine_maketasks.c \
+    engine_marktasks.c engine_drift.c engine_unskip.c engine_collect_end_of_step.c \
+    engine_redistribute.c engine_fof.c serial_io.c timers.c debug.c scheduler.c \
     proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
     threadpool.c cooling.c star_formation.c \
-    statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \
+    statistics.c profiler.c dump.c logger.c \
     part_type.c xmf.c gravity_properties.c gravity.c \
     collectgroup.c hydro_space.c equation_of_state.c \
     chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \
@@ -85,8 +90,10 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
-		 gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h  \
-                 runner_doiact_nosort.h runner_doiact_stars.h runner_doiact_black_holes.h units.h intrinsics.h minmax.h \
+		 gravity_iact.h kernel_long_gravity.h vector.h cache.h \
+	         runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \
+                 runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \
+		 units.h intrinsics.h minmax.h \
                  kick.h timestep.h drift.h adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h \
                  dump.h logger.h sign.h logger_io.h timestep_limiter.h hashmap.h \
 		 gravity.h gravity_io.h gravity_cache.h \
diff --git a/src/engine.c b/src/engine.c
index 61ba7051cdee0156292289b6cfd8a504ea668747..68fa1b1d949e189a13e0f03bcc0c0379e14bc203 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -67,7 +67,6 @@
 #include "logger.h"
 #include "logger_io.h"
 #include "map.h"
-#include "memswap.h"
 #include "memuse.h"
 #include "minmax.h"
 #include "outputlist.h"
@@ -128,22 +127,6 @@ int engine_current_step;
 extern int engine_max_parts_per_ghost;
 extern int engine_max_sparts_per_ghost;
 
-/**
- * @brief Data collected from the cells at the end of a time-step
- */
-struct end_of_step_data {
-
-  size_t updated, g_updated, s_updated, b_updated;
-  size_t inhibited, g_inhibited, s_inhibited, b_inhibited;
-  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
-  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
-  integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max;
-  integertime_t ti_black_holes_end_min, ti_black_holes_end_max,
-      ti_black_holes_beg_max;
-  struct engine *e;
-  struct star_formation_history sfh;
-};
-
 /**
  * @brief Link a density/force task to a cell.
  *
@@ -175,1007 +158,6 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) {
   res->next = atomic_swap(l, res);
 }
 
-#ifdef WITH_MPI
-/**
- * Do the exchange of one type of particles with all the other nodes.
- *
- * @param label a label for the memory allocations of this particle type.
- * @param counts 2D array with the counts of particles to exchange with
- *               each other node.
- * @param parts the particle data to exchange
- * @param new_nr_parts the number of particles this node will have after all
- *                     exchanges have completed.
- * @param sizeofparts sizeof the particle struct.
- * @param alignsize the memory alignment required for this particle type.
- * @param mpi_type the MPI_Datatype for these particles.
- * @param nr_nodes the number of nodes to exchange with.
- * @param nodeID the id of this node.
- *
- * @result new particle data constructed from all the exchanges with the
- *         given alignment.
- */
-static void *engine_do_redistribute(const char *label, int *counts, char *parts,
-                                    size_t new_nr_parts, size_t sizeofparts,
-                                    size_t alignsize, MPI_Datatype mpi_type,
-                                    int nr_nodes, int nodeID) {
-
-  /* Allocate a new particle array with some extra margin */
-  char *parts_new = NULL;
-  if (swift_memalign(
-          label, (void **)&parts_new, alignsize,
-          sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0)
-    error("Failed to allocate new particle data.");
-
-  /* Prepare MPI requests for the asynchronous communications */
-  MPI_Request *reqs;
-  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) ==
-      NULL)
-    error("Failed to allocate MPI request list.");
-
-  /* Only send and receive only "chunk" particles per request. So we need to
-   * loop as many times as necessary here. Make 2Gb/sizeofparts so we only
-   * send 2Gb packets. */
-  const int chunk = INT_MAX / sizeofparts;
-  int sent = 0;
-  int recvd = 0;
-
-  int activenodes = 1;
-  while (activenodes) {
-
-    for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
-
-    /* Emit the sends and recvs for the data. */
-    size_t offset_send = sent;
-    size_t offset_recv = recvd;
-    activenodes = 0;
-
-    for (int k = 0; k < nr_nodes; k++) {
-
-      /* Indices in the count arrays of the node of interest */
-      const int ind_send = nodeID * nr_nodes + k;
-      const int ind_recv = k * nr_nodes + nodeID;
-
-      /* Are we sending any data this loop? */
-      int sending = counts[ind_send] - sent;
-      if (sending > 0) {
-        activenodes++;
-        if (sending > chunk) sending = chunk;
-
-        /* If the send and receive is local then just copy. */
-        if (k == nodeID) {
-          int receiving = counts[ind_recv] - recvd;
-          if (receiving > chunk) receiving = chunk;
-          memcpy(&parts_new[offset_recv * sizeofparts],
-                 &parts[offset_send * sizeofparts], sizeofparts * receiving);
-        } else {
-          /* Otherwise send it. */
-          int res =
-              MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k,
-                        ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]);
-          if (res != MPI_SUCCESS)
-            mpi_error(res, "Failed to isend parts to node %i.", k);
-        }
-      }
-
-      /* If we're sending to this node, then move past it to next. */
-      if (counts[ind_send] > 0) offset_send += counts[ind_send];
-
-      /* Are we receiving any data from this node? Note already done if coming
-       * from this node. */
-      if (k != nodeID) {
-        int receiving = counts[ind_recv] - recvd;
-        if (receiving > 0) {
-          activenodes++;
-          if (receiving > chunk) receiving = chunk;
-          int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving,
-                              mpi_type, k, ind_recv, MPI_COMM_WORLD,
-                              &reqs[2 * k + 1]);
-          if (res != MPI_SUCCESS)
-            mpi_error(res, "Failed to emit irecv of parts from node %i.", k);
-        }
-      }
-
-      /* If we're receiving from this node, then move past it to next. */
-      if (counts[ind_recv] > 0) offset_recv += counts[ind_recv];
-    }
-
-    /* Wait for all the sends and recvs to tumble in. */
-    MPI_Status stats[2 * nr_nodes];
-    int res;
-    if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
-      for (int k = 0; k < 2 * nr_nodes; k++) {
-        char buff[MPI_MAX_ERROR_STRING];
-        MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
-        message("request from source %i, tag %i has error '%s'.",
-                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
-      }
-      error("Failed during waitall for part data.");
-    }
-
-    /* Move to next chunks. */
-    sent += chunk;
-    recvd += chunk;
-  }
-
-  /* Free temps. */
-  free(reqs);
-
-  /* And return new memory. */
-  return parts_new;
-}
-#endif
-
-#ifdef WITH_MPI /* redist_mapper */
-
-/* Support for engine_redistribute threadpool dest mappers. */
-struct redist_mapper_data {
-  int *counts;
-  int *dest;
-  int nodeID;
-  int nr_nodes;
-  struct cell *cells;
-  struct space *s;
-  void *base;
-};
-
-/* Generic function for accumulating counts for TYPE parts. Note
- * we use a local counts array to avoid the atomic_add in the parts
- * loop. */
-#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE)                              \
-  engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \
-                                         void *extra_data) {               \
-    struct TYPE *parts = (struct TYPE *)map_data;                          \
-    struct redist_mapper_data *mydata =                                    \
-        (struct redist_mapper_data *)extra_data;                           \
-    struct space *s = mydata->s;                                           \
-    int *dest =                                                            \
-        mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base);   \
-    int *lcounts = NULL;                                                   \
-    if ((lcounts = (int *)calloc(                                          \
-             sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL)   \
-      error("Failed to allocate counts thread-specific buffer");           \
-    for (int k = 0; k < num_elements; k++) {                               \
-      for (int j = 0; j < 3; j++) {                                        \
-        if (parts[k].x[j] < 0.0)                                           \
-          parts[k].x[j] += s->dim[j];                                      \
-        else if (parts[k].x[j] >= s->dim[j])                               \
-          parts[k].x[j] -= s->dim[j];                                      \
-      }                                                                    \
-      const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0],    \
-                                 parts[k].x[1] * s->iwidth[1],             \
-                                 parts[k].x[2] * s->iwidth[2]);            \
-      dest[k] = s->cells_top[cid].nodeID;                                  \
-      size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k];            \
-      lcounts[ind] += 1;                                                   \
-    }                                                                      \
-    for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++)        \
-      atomic_add(&mydata->counts[k], lcounts[k]);                          \
-    free(lcounts);                                                         \
-  }
-
-/**
- * @brief Accumulate the counts of particles per cell.
- * Threadpool helper for accumulating the counts of particles per cell.
- *
- * part version.
- */
-static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part);
-
-/**
- * @brief Accumulate the counts of star particles per cell.
- * Threadpool helper for accumulating the counts of particles per cell.
- *
- * spart version.
- */
-static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart);
-
-/**
- * @brief Accumulate the counts of gravity particles per cell.
- * Threadpool helper for accumulating the counts of particles per cell.
- *
- * gpart version.
- */
-static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart);
-
-/**
- * @brief Accumulate the counts of black holes particles per cell.
- * Threadpool helper for accumulating the counts of particles per cell.
- *
- * bpart version.
- */
-static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart);
-
-#endif /* redist_mapper_data */
-
-#ifdef WITH_MPI /* savelink_mapper_data */
-
-/* Support for saving the linkage between gparts and parts/sparts. */
-struct savelink_mapper_data {
-  int nr_nodes;
-  int *counts;
-  void *parts;
-  int nodeID;
-};
-
-/**
- * @brief Save the offset of each gravity partner of a part or spart.
- *
- * The offset is from the start of the sorted particles to be sent to a node.
- * This is possible as parts without gravity partners have a positive id.
- * These offsets are used to restore the pointers on the receiving node.
- *
- * CHECKS should be eliminated as dead code when optimizing.
- */
-#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS)                      \
-  engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \
-                                             void *extra_data) {               \
-    int *nodes = (int *)map_data;                                              \
-    struct savelink_mapper_data *mydata =                                      \
-        (struct savelink_mapper_data *)extra_data;                             \
-    int nodeID = mydata->nodeID;                                               \
-    int nr_nodes = mydata->nr_nodes;                                           \
-    int *counts = mydata->counts;                                              \
-    struct TYPE *parts = (struct TYPE *)mydata->parts;                         \
-                                                                               \
-    for (int j = 0; j < num_elements; j++) {                                   \
-      int node = nodes[j];                                                     \
-      int count = 0;                                                           \
-      size_t offset = 0;                                                       \
-      for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i];  \
-                                                                               \
-      for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) {             \
-        if (parts[k + offset].gpart != NULL) {                                 \
-          if (CHECKS)                                                          \
-            if (parts[k + offset].gpart->id_or_neg_offset > 0)                 \
-              error("Trying to link a partnerless " #TYPE "!");                \
-          parts[k + offset].gpart->id_or_neg_offset = -count;                  \
-          count++;                                                             \
-        }                                                                      \
-      }                                                                        \
-    }                                                                          \
-  }
-
-/**
- * @brief Save position of part-gpart links.
- * Threadpool helper for accumulating the counts of particles per cell.
- */
-#ifdef SWIFT_DEBUG_CHECKS
-static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1);
-#else
-static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0);
-#endif
-
-/**
- * @brief Save position of spart-gpart links.
- * Threadpool helper for accumulating the counts of particles per cell.
- */
-#ifdef SWIFT_DEBUG_CHECKS
-static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1);
-#else
-static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0);
-#endif
-
-/**
- * @brief Save position of bpart-gpart links.
- * Threadpool helper for accumulating the counts of particles per cell.
- */
-#ifdef SWIFT_DEBUG_CHECKS
-static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1);
-#else
-static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0);
-#endif
-
-#endif /* savelink_mapper_data */
-
-#ifdef WITH_MPI /* relink_mapper_data */
-
-/* Support for relinking parts, gparts, sparts and bparts after moving between
- * nodes. */
-struct relink_mapper_data {
-  int nodeID;
-  int nr_nodes;
-  int *counts;
-  int *s_counts;
-  int *g_counts;
-  int *b_counts;
-  struct space *s;
-};
-
-/**
- * @brief Restore the part/gpart and spart/gpart links for a list of nodes.
- *
- * @param map_data address of nodes to process.
- * @param num_elements the number nodes to process.
- * @param extra_data additional data defining the context (a
- * relink_mapper_data).
- */
-static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
-                                              void *extra_data) {
-
-  int *nodes = (int *)map_data;
-  struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data;
-
-  int nodeID = mydata->nodeID;
-  int nr_nodes = mydata->nr_nodes;
-  int *counts = mydata->counts;
-  int *g_counts = mydata->g_counts;
-  int *s_counts = mydata->s_counts;
-  int *b_counts = mydata->b_counts;
-  struct space *s = mydata->s;
-
-  for (int i = 0; i < num_elements; i++) {
-
-    int node = nodes[i];
-
-    /* Get offsets to correct parts of the counts arrays for this node. */
-    size_t offset_parts = 0;
-    size_t offset_gparts = 0;
-    size_t offset_sparts = 0;
-    size_t offset_bparts = 0;
-    for (int n = 0; n < node; n++) {
-      int ind_recv = n * nr_nodes + nodeID;
-      offset_parts += counts[ind_recv];
-      offset_gparts += g_counts[ind_recv];
-      offset_sparts += s_counts[ind_recv];
-      offset_bparts += b_counts[ind_recv];
-    }
-
-    /* Number of gparts sent from this node. */
-    int ind_recv = node * nr_nodes + nodeID;
-    const size_t count_gparts = g_counts[ind_recv];
-
-    /* Loop over the gparts received from this node */
-    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) {
-
-      /* Does this gpart have a gas partner ? */
-      if (s->gparts[k].type == swift_type_gas) {
-
-        const ptrdiff_t partner_index =
-            offset_parts - s->gparts[k].id_or_neg_offset;
-
-        /* Re-link */
-        s->gparts[k].id_or_neg_offset = -partner_index;
-        s->parts[partner_index].gpart = &s->gparts[k];
-      }
-
-      /* Does this gpart have a star partner ? */
-      else if (s->gparts[k].type == swift_type_stars) {
-
-        const ptrdiff_t partner_index =
-            offset_sparts - s->gparts[k].id_or_neg_offset;
-
-        /* Re-link */
-        s->gparts[k].id_or_neg_offset = -partner_index;
-        s->sparts[partner_index].gpart = &s->gparts[k];
-      }
-
-      /* Does this gpart have a black hole partner ? */
-      else if (s->gparts[k].type == swift_type_black_hole) {
-
-        const ptrdiff_t partner_index =
-            offset_bparts - s->gparts[k].id_or_neg_offset;
-
-        /* Re-link */
-        s->gparts[k].id_or_neg_offset = -partner_index;
-        s->bparts[partner_index].gpart = &s->gparts[k];
-      }
-    }
-  }
-}
-
-#endif /* relink_mapper_data */
-
-/**
- * @brief Redistribute the particles amongst the nodes according
- *      to their cell's node IDs.
- *
- * The strategy here is as follows:
- * 1) Each node counts the number of particles it has to send to each other
- * node.
- * 2) The number of particles of each type is then exchanged.
- * 3) The particles to send are placed in a temporary buffer in which the
- * part-gpart links are preserved.
- * 4) Each node allocates enough space for the new particles.
- * 5) (Asynchronous) communications are issued to transfer the data.
- *
- *
- * @param e The #engine.
- */
-void engine_redistribute(struct engine *e) {
-
-#ifdef WITH_MPI
-
-  const int nr_nodes = e->nr_nodes;
-  const int nodeID = e->nodeID;
-  struct space *s = e->s;
-  struct cell *cells = s->cells_top;
-  const int nr_cells = s->nr_cells;
-  struct xpart *xparts = s->xparts;
-  struct part *parts = s->parts;
-  struct gpart *gparts = s->gparts;
-  struct spart *sparts = s->sparts;
-  struct bpart *bparts = s->bparts;
-  ticks tic = getticks();
-
-  size_t nr_parts = s->nr_parts;
-  size_t nr_gparts = s->nr_gparts;
-  size_t nr_sparts = s->nr_sparts;
-  size_t nr_bparts = s->nr_bparts;
-
-  /* Start by moving inhibited particles to the end of the arrays */
-  for (size_t k = 0; k < nr_parts; /* void */) {
-    if (parts[k].time_bin == time_bin_inhibited ||
-        parts[k].time_bin == time_bin_not_created) {
-      nr_parts -= 1;
-
-      /* Swap the particle */
-      memswap(&parts[k], &parts[nr_parts], sizeof(struct part));
-
-      /* Swap the xpart */
-      memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart));
-
-      /* Swap the link with the gpart */
-      if (parts[k].gpart != NULL) {
-        parts[k].gpart->id_or_neg_offset = -k;
-      }
-      if (parts[nr_parts].gpart != NULL) {
-        parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
-      }
-    } else {
-      k++;
-    }
-  }
-
-  /* Now move inhibited star particles to the end of the arrays */
-  for (size_t k = 0; k < nr_sparts; /* void */) {
-    if (sparts[k].time_bin == time_bin_inhibited ||
-        sparts[k].time_bin == time_bin_not_created) {
-      nr_sparts -= 1;
-
-      /* Swap the particle */
-      memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));
-
-      /* Swap the link with the gpart */
-      if (s->sparts[k].gpart != NULL) {
-        s->sparts[k].gpart->id_or_neg_offset = -k;
-      }
-      if (s->sparts[nr_sparts].gpart != NULL) {
-        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
-      }
-    } else {
-      k++;
-    }
-  }
-
-  /* Now move inhibited black hole particles to the end of the arrays */
-  for (size_t k = 0; k < nr_bparts; /* void */) {
-    if (bparts[k].time_bin == time_bin_inhibited ||
-        bparts[k].time_bin == time_bin_not_created) {
-      nr_bparts -= 1;
-
-      /* Swap the particle */
-      memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart));
-
-      /* Swap the link with the gpart */
-      if (s->bparts[k].gpart != NULL) {
-        s->bparts[k].gpart->id_or_neg_offset = -k;
-      }
-      if (s->bparts[nr_bparts].gpart != NULL) {
-        s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts;
-      }
-    } else {
-      k++;
-    }
-  }
-
-  /* Finally do the same with the gravity particles */
-  for (size_t k = 0; k < nr_gparts; /* void */) {
-    if (gparts[k].time_bin == time_bin_inhibited ||
-        gparts[k].time_bin == time_bin_not_created) {
-      nr_gparts -= 1;
-
-      /* Swap the particle */
-      memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart));
-
-      /* Swap the link with part/spart */
-      if (s->gparts[k].type == swift_type_gas) {
-        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
-      } else if (s->gparts[k].type == swift_type_stars) {
-        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
-      } else if (s->gparts[k].type == swift_type_black_hole) {
-        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
-      }
-
-      if (s->gparts[nr_gparts].type == swift_type_gas) {
-        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
-            &s->gparts[nr_gparts];
-      } else if (s->gparts[nr_gparts].type == swift_type_stars) {
-        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
-            &s->gparts[nr_gparts];
-      } else if (s->gparts[nr_gparts].type == swift_type_black_hole) {
-        s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
-            &s->gparts[nr_gparts];
-      }
-    } else {
-      k++;
-    }
-  }
-
-  /* Now we are ready to deal with real particles and can start the exchange. */
-
-  /* Allocate temporary arrays to store the counts of particles to be sent
-   * and the destination of each particle */
-  int *counts;
-  if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
-    error("Failed to allocate counts temporary buffer.");
-
-  int *dest;
-  if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL)
-    error("Failed to allocate dest temporary buffer.");
-
-  /* Simple index of node IDs, used for mappers over nodes. */
-  int *nodes = NULL;
-  if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL)
-    error("Failed to allocate nodes temporary buffer.");
-  for (int k = 0; k < nr_nodes; k++) nodes[k] = k;
-
-  /* Get destination of each particle */
-  struct redist_mapper_data redist_data;
-  redist_data.s = s;
-  redist_data.nodeID = nodeID;
-  redist_data.nr_nodes = nr_nodes;
-
-  redist_data.counts = counts;
-  redist_data.dest = dest;
-  redist_data.base = (void *)parts;
-
-  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts,
-                 nr_parts, sizeof(struct part), 0, &redist_data);
-
-  /* Sort the particles according to their cell index. */
-  if (nr_parts > 0)
-    space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes],
-                     nr_nodes, 0);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify that the part have been sorted correctly. */
-  for (size_t k = 0; k < nr_parts; k++) {
-    const struct part *p = &s->parts[k];
-
-    if (p->time_bin == time_bin_inhibited)
-      error("Inhibited particle found after sorting!");
-
-    if (p->time_bin == time_bin_not_created)
-      error("Inhibited particle found after sorting!");
-
-    /* New cell index */
-    const int new_cid =
-        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
-                   p->x[2] * s->iwidth[2]);
-
-    /* New cell of this part */
-    const struct cell *c = &s->cells_top[new_cid];
-    const int new_node = c->nodeID;
-
-    if (dest[k] != new_node)
-      error("part's new node index not matching sorted index.");
-
-    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
-        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
-        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
-      error("part not sorted into the right top-level cell!");
-  }
-#endif
-
-  /* We will need to re-link the gpart partners of parts, so save their
-   * relative positions in the sent lists. */
-  if (nr_parts > 0 && nr_gparts > 0) {
-
-    struct savelink_mapper_data savelink_data;
-    savelink_data.nr_nodes = nr_nodes;
-    savelink_data.counts = counts;
-    savelink_data.parts = (void *)parts;
-    savelink_data.nodeID = nodeID;
-    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part,
-                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
-  }
-  swift_free("dest", dest);
-
-  /* Get destination of each s-particle */
-  int *s_counts;
-  if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
-    error("Failed to allocate s_counts temporary buffer.");
-
-  int *s_dest;
-  if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL)
-    error("Failed to allocate s_dest temporary buffer.");
-
-  redist_data.counts = s_counts;
-  redist_data.dest = s_dest;
-  redist_data.base = (void *)sparts;
-
-  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts,
-                 nr_sparts, sizeof(struct spart), 0, &redist_data);
-
-  /* Sort the particles according to their cell index. */
-  if (nr_sparts > 0)
-    space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes,
-                      0);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify that the spart have been sorted correctly. */
-  for (size_t k = 0; k < nr_sparts; k++) {
-    const struct spart *sp = &s->sparts[k];
-
-    if (sp->time_bin == time_bin_inhibited)
-      error("Inhibited particle found after sorting!");
-
-    if (sp->time_bin == time_bin_not_created)
-      error("Inhibited particle found after sorting!");
-
-    /* New cell index */
-    const int new_cid =
-        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
-                   sp->x[2] * s->iwidth[2]);
-
-    /* New cell of this spart */
-    const struct cell *c = &s->cells_top[new_cid];
-    const int new_node = c->nodeID;
-
-    if (s_dest[k] != new_node)
-      error("spart's new node index not matching sorted index.");
-
-    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
-        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
-        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
-      error("spart not sorted into the right top-level cell!");
-  }
-#endif
-
-  /* We need to re-link the gpart partners of sparts. */
-  if (nr_sparts > 0) {
-
-    struct savelink_mapper_data savelink_data;
-    savelink_data.nr_nodes = nr_nodes;
-    savelink_data.counts = s_counts;
-    savelink_data.parts = (void *)sparts;
-    savelink_data.nodeID = nodeID;
-    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart,
-                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
-  }
-  swift_free("s_dest", s_dest);
-
-  /* Get destination of each b-particle */
-  int *b_counts;
-  if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
-    error("Failed to allocate b_counts temporary buffer.");
-
-  int *b_dest;
-  if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL)
-    error("Failed to allocate b_dest temporary buffer.");
-
-  redist_data.counts = b_counts;
-  redist_data.dest = b_dest;
-  redist_data.base = (void *)bparts;
-
-  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts,
-                 nr_bparts, sizeof(struct bpart), 0, &redist_data);
-
-  /* Sort the particles according to their cell index. */
-  if (nr_bparts > 0)
-    space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes,
-                      0);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify that the bpart have been sorted correctly. */
-  for (size_t k = 0; k < nr_bparts; k++) {
-    const struct bpart *bp = &s->bparts[k];
-
-    if (bp->time_bin == time_bin_inhibited)
-      error("Inhibited particle found after sorting!");
-
-    if (bp->time_bin == time_bin_not_created)
-      error("Inhibited particle found after sorting!");
-
-    /* New cell index */
-    const int new_cid =
-        cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1],
-                   bp->x[2] * s->iwidth[2]);
-
-    /* New cell of this bpart */
-    const struct cell *c = &s->cells_top[new_cid];
-    const int new_node = c->nodeID;
-
-    if (b_dest[k] != new_node)
-      error("bpart's new node index not matching sorted index.");
-
-    if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] ||
-        bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] ||
-        bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2])
-      error("bpart not sorted into the right top-level cell!");
-  }
-#endif
-
-  /* We need to re-link the gpart partners of bparts. */
-  if (nr_bparts > 0) {
-
-    struct savelink_mapper_data savelink_data;
-    savelink_data.nr_nodes = nr_nodes;
-    savelink_data.counts = b_counts;
-    savelink_data.parts = (void *)bparts;
-    savelink_data.nodeID = nodeID;
-    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart,
-                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
-  }
-  swift_free("b_dest", b_dest);
-
-  /* Get destination of each g-particle */
-  int *g_counts;
-  if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
-    error("Failed to allocate g_gcount temporary buffer.");
-
-  int *g_dest;
-  if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL)
-    error("Failed to allocate g_dest temporary buffer.");
-
-  redist_data.counts = g_counts;
-  redist_data.dest = g_dest;
-  redist_data.base = (void *)gparts;
-
-  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts,
-                 nr_gparts, sizeof(struct gpart), 0, &redist_data);
-
-  /* Sort the gparticles according to their cell index. */
-  if (nr_gparts > 0)
-    space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest,
-                      &g_counts[nodeID * nr_nodes], nr_nodes);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify that the gpart have been sorted correctly. */
-  for (size_t k = 0; k < nr_gparts; k++) {
-    const struct gpart *gp = &s->gparts[k];
-
-    if (gp->time_bin == time_bin_inhibited)
-      error("Inhibited particle found after sorting!");
-
-    if (gp->time_bin == time_bin_not_created)
-      error("Inhibited particle found after sorting!");
-
-    /* New cell index */
-    const int new_cid =
-        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
-                   gp->x[2] * s->iwidth[2]);
-
-    /* New cell of this gpart */
-    const struct cell *c = &s->cells_top[new_cid];
-    const int new_node = c->nodeID;
-
-    if (g_dest[k] != new_node)
-      error("gpart's new node index not matching sorted index (%d != %d).",
-            g_dest[k], new_node);
-
-    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
-        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
-        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
-      error("gpart not sorted into the right top-level cell!");
-  }
-#endif
-
-  swift_free("g_dest", g_dest);
-
-  /* Get all the counts from all the nodes. */
-  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
-                    MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to allreduce particle transfer counts.");
-
-  /* Get all the g_counts from all the nodes. */
-  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
-                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to allreduce gparticle transfer counts.");
-
-  /* Get all the s_counts from all the nodes. */
-  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
-                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to allreduce sparticle transfer counts.");
-
-  /* Get all the b_counts from all the nodes. */
-  if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT,
-                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to allreduce bparticle transfer counts.");
-
-  /* Report how many particles will be moved. */
-  if (e->verbose) {
-    if (e->nodeID == 0) {
-      size_t total = 0, g_total = 0, s_total = 0, b_total = 0;
-      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0;
-      for (int p = 0, r = 0; p < nr_nodes; p++) {
-        for (int n = 0; n < nr_nodes; n++) {
-          total += counts[r];
-          g_total += g_counts[r];
-          s_total += s_counts[r];
-          b_total += b_counts[r];
-          if (p == n) {
-            unmoved += counts[r];
-            g_unmoved += g_counts[r];
-            s_unmoved += s_counts[r];
-            b_unmoved += b_counts[r];
-          }
-          r++;
-        }
-      }
-      if (total > 0)
-        message("%zu of %zu (%.2f%%) of particles moved", total - unmoved,
-                total, 100.0 * (double)(total - unmoved) / (double)total);
-      if (g_total > 0)
-        message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved,
-                g_total,
-                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
-      if (s_total > 0)
-        message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved,
-                s_total,
-                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
-      if (b_total > 0)
-        message("%ld of %ld (%.2f%%) of b-particles moved", b_total - b_unmoved,
-                b_total,
-                100.0 * (double)(b_total - b_unmoved) / (double)b_total);
-    }
-  }
-
-  /* Now each node knows how many parts, sparts, bparts, and gparts will be
-   * transferred to every other node. Get the new numbers of particles for this
-   * node. */
-  size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0,
-         nr_bparts_new = 0;
-  for (int k = 0; k < nr_nodes; k++)
-    nr_parts_new += counts[k * nr_nodes + nodeID];
-  for (int k = 0; k < nr_nodes; k++)
-    nr_gparts_new += g_counts[k * nr_nodes + nodeID];
-  for (int k = 0; k < nr_nodes; k++)
-    nr_sparts_new += s_counts[k * nr_nodes + nodeID];
-  for (int k = 0; k < nr_nodes; k++)
-    nr_bparts_new += b_counts[k * nr_nodes + nodeID];
-
-  /* Now exchange the particles, type by type to keep the memory required
-   * under control. */
-
-  /* SPH particles. */
-  void *new_parts = engine_do_redistribute(
-      "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part),
-      part_align, part_mpi_type, nr_nodes, nodeID);
-  swift_free("parts", s->parts);
-  s->parts = (struct part *)new_parts;
-  s->nr_parts = nr_parts_new;
-  s->size_parts = engine_redistribute_alloc_margin * nr_parts_new;
-
-  /* Extra SPH particle properties. */
-  new_parts = engine_do_redistribute(
-      "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart),
-      xpart_align, xpart_mpi_type, nr_nodes, nodeID);
-  swift_free("xparts", s->xparts);
-  s->xparts = (struct xpart *)new_parts;
-
-  /* Gravity particles. */
-  new_parts = engine_do_redistribute(
-      "gparts", g_counts, (char *)s->gparts, nr_gparts_new,
-      sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID);
-  swift_free("gparts", s->gparts);
-  s->gparts = (struct gpart *)new_parts;
-  s->nr_gparts = nr_gparts_new;
-  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new;
-
-  /* Star particles. */
-  new_parts = engine_do_redistribute(
-      "sparts", s_counts, (char *)s->sparts, nr_sparts_new,
-      sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID);
-  swift_free("sparts", s->sparts);
-  s->sparts = (struct spart *)new_parts;
-  s->nr_sparts = nr_sparts_new;
-  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new;
-
-  /* Black holes particles. */
-  new_parts = engine_do_redistribute(
-      "bparts", b_counts, (char *)s->bparts, nr_bparts_new,
-      sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID);
-  swift_free("bparts", s->bparts);
-  s->bparts = (struct bpart *)new_parts;
-  s->nr_bparts = nr_bparts_new;
-  s->size_bparts = engine_redistribute_alloc_margin * nr_bparts_new;
-
-  /* All particles have now arrived. Time for some final operations on the
-     stuff we just received */
-
-  /* Restore the part<->gpart and spart<->gpart links.
-   * Generate indices and counts for threadpool tasks. Note we process a node
-   * at a time. */
-  struct relink_mapper_data relink_data;
-  relink_data.s = s;
-  relink_data.counts = counts;
-  relink_data.g_counts = g_counts;
-  relink_data.s_counts = s_counts;
-  relink_data.b_counts = b_counts;
-  relink_data.nodeID = nodeID;
-  relink_data.nr_nodes = nr_nodes;
-
-  threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes,
-                 nr_nodes, sizeof(int), 1, &relink_data);
-  free(nodes);
-
-  /* Clean up the counts now we are done. */
-  free(counts);
-  free(g_counts);
-  free(s_counts);
-  free(b_counts);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify that all parts are in the right place. */
-  for (size_t k = 0; k < nr_parts_new; k++) {
-    const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
-                               s->parts[k].x[1] * s->iwidth[1],
-                               s->parts[k].x[2] * s->iwidth[2]);
-    if (cells[cid].nodeID != nodeID)
-      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
-            cells[cid].nodeID);
-  }
-  for (size_t k = 0; k < nr_gparts_new; k++) {
-    const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0],
-                               s->gparts[k].x[1] * s->iwidth[1],
-                               s->gparts[k].x[2] * s->iwidth[2]);
-    if (cells[cid].nodeID != nodeID)
-      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
-            k, cells[cid].nodeID);
-  }
-  for (size_t k = 0; k < nr_sparts_new; k++) {
-    const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0],
-                               s->sparts[k].x[1] * s->iwidth[1],
-                               s->sparts[k].x[2] * s->iwidth[2]);
-    if (cells[cid].nodeID != nodeID)
-      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
-            k, cells[cid].nodeID);
-  }
-  for (size_t k = 0; k < nr_bparts_new; k++) {
-    const int cid = cell_getid(s->cdim, s->bparts[k].x[0] * s->iwidth[0],
-                               s->bparts[k].x[1] * s->iwidth[1],
-                               s->bparts[k].x[2] * s->iwidth[2]);
-    if (cells[cid].nodeID != nodeID)
-      error("Received b-particle (%zu) that does not belong here (nodeID=%i).",
-            k, cells[cid].nodeID);
-  }
-
-  /* Verify that the links are correct */
-  part_verify_links(s->parts, s->gparts, s->sparts, s->bparts, nr_parts_new,
-                    nr_gparts_new, nr_sparts_new, nr_bparts_new, e->verbose);
-
-#endif
-
-  /* Be verbose about what just happened. */
-  if (e->verbose) {
-    int my_cells = 0;
-    for (int k = 0; k < nr_cells; k++)
-      if (cells[k].nodeID == nodeID) my_cells += 1;
-    message(
-        "node %i now has %zu parts, %zu sparts, %zu bparts and %zu gparts in "
-        "%i cells.",
-        nodeID, nr_parts_new, nr_sparts_new, nr_bparts_new, nr_gparts_new,
-        my_cells);
-  }
-
-  /* Flag that we do not have any extra particles any more */
-  s->nr_extra_parts = 0;
-  s->nr_extra_gparts = 0;
-  s->nr_extra_sparts = 0;
-  s->nr_extra_bparts = 0;
-
-  /* Flag that a redistribute has taken place */
-  e->step_props |= engine_step_prop_redistribute;
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
 /**
  * @brief Repartition the cells amongst the nodes.
  *
@@ -2687,544 +1669,6 @@ void engine_barrier(struct engine *e) {
   swift_barrier_wait(&e->run_barrier);
 }
 
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_hydro(struct cell *c,
-                                              const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_part) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* if (!c->split) error("Reached a leaf without finding a time-step task!
-     * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */
-    /* 		       c->depth, c->maxdepth, c->hydro.count, c->nodeID); */
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-
-  /* Local Star formation history properties */
-  struct star_formation_history sfh_updated;
-
-  /* Initialize the star formation structs */
-  star_formation_logger_init(&sfh_updated);
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->hydro.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_hydro(cp, e);
-
-      /* And update */
-      ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min);
-      ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max);
-      ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max);
-
-      updated += cp->hydro.updated;
-
-      /* Check if the cell is inactive and in that case reorder the SFH */
-      if (!cell_is_starting_hydro(cp, e)) {
-        star_formation_logger_log_inactive_cell(&cp->stars.sfh);
-      }
-
-      /* Add the star formation history in this cell to sfh_updated */
-      star_formation_logger_add(&sfh_updated, &cp->stars.sfh);
-
-      /* Collected, so clear for next time. */
-      cp->hydro.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->hydro.ti_end_min = ti_hydro_end_min;
-  c->hydro.ti_end_max = ti_hydro_end_max;
-  c->hydro.ti_beg_max = ti_hydro_beg_max;
-  c->hydro.updated = updated;
-  // c->hydro.inhibited = inhibited;
-
-  /* Store the star formation history in the parent cell */
-  star_formation_logger_add(&c->stars.sfh, &sfh_updated);
-}
-
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_grav(struct cell *c,
-                                             const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    //  if (!c->split) error("Reached a leaf without finding a time-step
-    //  task!");
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0,
-                ti_grav_beg_max = 0;
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->grav.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_grav(cp, e);
-
-      /* And update */
-      ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min);
-      ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max);
-      ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max);
-
-      updated += cp->grav.updated;
-
-      /* Collected, so clear for next time. */
-      cp->grav.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->grav.ti_end_min = ti_grav_end_min;
-  c->grav.ti_end_max = ti_grav_end_max;
-  c->grav.ti_beg_max = ti_grav_beg_max;
-  c->grav.updated = updated;
-}
-
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_stars(struct cell *c,
-                                              const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    // if (!c->split) error("Reached a leaf without finding a time-step task!");
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
-                ti_stars_beg_max = 0;
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->stars.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_stars(cp, e);
-
-      /* And update */
-      ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min);
-      ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max);
-      ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max);
-
-      updated += cp->stars.updated;
-
-      /* Collected, so clear for next time. */
-      cp->stars.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->stars.ti_end_min = ti_stars_end_min;
-  c->stars.ti_end_max = ti_stars_end_max;
-  c->stars.ti_beg_max = ti_stars_beg_max;
-  c->stars.updated = updated;
-}
-
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_black_holes(struct cell *c,
-                                                    const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    // if (!c->split) error("Reached a leaf without finding a time-step task!");
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_black_holes_end_min = max_nr_timesteps,
-                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->black_holes.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_black_holes(cp, e);
-
-      /* And update */
-      ti_black_holes_end_min =
-          min(ti_black_holes_end_min, cp->black_holes.ti_end_min);
-      ti_black_holes_end_max =
-          max(ti_black_holes_end_max, cp->black_holes.ti_end_max);
-      ti_black_holes_beg_max =
-          max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max);
-
-      updated += cp->black_holes.updated;
-
-      /* Collected, so clear for next time. */
-      cp->black_holes.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->black_holes.ti_end_min = ti_black_holes_end_min;
-  c->black_holes.ti_end_max = ti_black_holes_end_max;
-  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
-  c->black_holes.updated = updated;
-}
-
-/**
- * @brief Mapping function to collect the data from the end of the step
- *
- * This function will call a recursive function on all the top-level cells
- * to collect the information we are after.
- *
- * @param map_data The list of cells with tasks on this node.
- * @param num_elements The number of elements in the list this thread will work
- * on.
- * @param extra_data The #engine.
- */
-void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
-                                       void *extra_data) {
-
-  struct end_of_step_data *data = (struct end_of_step_data *)extra_data;
-  const struct engine *e = data->e;
-  const int with_hydro = (e->policy & engine_policy_hydro);
-  const int with_self_grav = (e->policy & engine_policy_self_gravity);
-  const int with_ext_grav = (e->policy & engine_policy_external_gravity);
-  const int with_grav = (with_self_grav || with_ext_grav);
-  const int with_stars = (e->policy & engine_policy_stars);
-  const int with_black_holes = (e->policy & engine_policy_black_holes);
-  struct space *s = e->s;
-  int *local_cells = (int *)map_data;
-  struct star_formation_history *sfh_top = &data->sfh;
-
-  /* Local collectible */
-  size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
-                ti_gravity_beg_max = 0;
-  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
-                ti_stars_beg_max = 0;
-  integertime_t ti_black_holes_end_min = max_nr_timesteps,
-                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
-
-  /* Local Star formation history properties */
-  struct star_formation_history sfh_updated;
-
-  /* Initialize the star formation structs for this engine to zero */
-  star_formation_logger_init(&sfh_updated);
-
-  for (int ind = 0; ind < num_elements; ind++) {
-    struct cell *c = &s->cells_top[local_cells[ind]];
-
-    if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 ||
-        c->black_holes.count > 0) {
-
-      /* Make the top-cells recurse */
-      if (with_hydro) {
-        engine_collect_end_of_step_recurse_hydro(c, e);
-      }
-      if (with_grav) {
-        engine_collect_end_of_step_recurse_grav(c, e);
-      }
-      if (with_stars) {
-        engine_collect_end_of_step_recurse_stars(c, e);
-      }
-      if (with_black_holes) {
-        engine_collect_end_of_step_recurse_black_holes(c, e);
-      }
-
-      /* And aggregate */
-      if (c->hydro.ti_end_min > e->ti_current)
-        ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min);
-      ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max);
-      ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max);
-
-      if (c->grav.ti_end_min > e->ti_current)
-        ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min);
-      ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max);
-      ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max);
-
-      if (c->stars.ti_end_min > e->ti_current)
-        ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min);
-      ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max);
-      ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max);
-
-      if (c->black_holes.ti_end_min > e->ti_current)
-        ti_black_holes_end_min =
-            min(ti_black_holes_end_min, c->black_holes.ti_end_min);
-      ti_black_holes_end_max =
-          max(ti_black_holes_end_max, c->black_holes.ti_end_max);
-      ti_black_holes_beg_max =
-          max(ti_black_holes_beg_max, c->black_holes.ti_beg_max);
-
-      updated += c->hydro.updated;
-      g_updated += c->grav.updated;
-      s_updated += c->stars.updated;
-      b_updated += c->black_holes.updated;
-
-      /* Check if the cell is inactive and in that case reorder the SFH */
-      if (!cell_is_starting_hydro(c, e)) {
-        star_formation_logger_log_inactive_cell(&c->stars.sfh);
-      }
-
-      /* Get the star formation history from the current cell and store it in
-       * the star formation history struct */
-      star_formation_logger_add(&sfh_updated, &c->stars.sfh);
-
-      /* Collected, so clear for next time. */
-      c->hydro.updated = 0;
-      c->grav.updated = 0;
-      c->stars.updated = 0;
-      c->black_holes.updated = 0;
-    }
-  }
-
-  /* Let's write back to the global data.
-   * We use the space lock to garanty single access*/
-  if (lock_lock(&s->lock) == 0) {
-    data->updated += updated;
-    data->g_updated += g_updated;
-    data->s_updated += s_updated;
-    data->b_updated += b_updated;
-
-    /* Add the SFH information from this engine to the global data */
-    star_formation_logger_add(sfh_top, &sfh_updated);
-
-    if (ti_hydro_end_min > e->ti_current)
-      data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min);
-    data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max);
-    data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max);
-
-    if (ti_gravity_end_min > e->ti_current)
-      data->ti_gravity_end_min =
-          min(ti_gravity_end_min, data->ti_gravity_end_min);
-    data->ti_gravity_end_max =
-        max(ti_gravity_end_max, data->ti_gravity_end_max);
-    data->ti_gravity_beg_max =
-        max(ti_gravity_beg_max, data->ti_gravity_beg_max);
-
-    if (ti_stars_end_min > e->ti_current)
-      data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min);
-    data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max);
-    data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max);
-
-    if (ti_black_holes_end_min > e->ti_current)
-      data->ti_black_holes_end_min =
-          min(ti_black_holes_end_min, data->ti_black_holes_end_min);
-    data->ti_black_holes_end_max =
-        max(ti_black_holes_end_max, data->ti_black_holes_end_max);
-    data->ti_black_holes_beg_max =
-        max(ti_black_holes_beg_max, data->ti_black_holes_beg_max);
-  }
-
-  if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
-}
-
-/**
- * @brief Collects the next time-step and rebuild flag.
- *
- * The next time-step is determined by making each super-cell recurse to
- * collect the minimal of ti_end and the number of updated particles.  When in
- * MPI mode this routines reduces these across all nodes and also collects the
- * forcerebuild flag -- this is so that we only use a single collective MPI
- * call per step for all these values.
- *
- * Note that the results are stored in e->collect_group1 struct not in the
- * engine fields, unless apply is true. These can be applied field-by-field
- * or all at once using collectgroup1_copy();
- *
- * @param e The #engine.
- * @param apply whether to apply the results to the engine or just keep in the
- *              group1 struct.
- */
-void engine_collect_end_of_step(struct engine *e, int apply) {
-
-  const ticks tic = getticks();
-  struct space *s = e->s;
-  struct end_of_step_data data;
-  data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0;
-  data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0,
-  data.ti_hydro_beg_max = 0;
-  data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0,
-  data.ti_gravity_beg_max = 0;
-  data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0,
-  data.ti_stars_beg_max = 0;
-  data.ti_black_holes_end_min = max_nr_timesteps,
-  data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0;
-  data.e = e;
-
-  /* Initialize the total SFH of the simulation to zero */
-  star_formation_logger_init(&data.sfh);
-
-  /* Collect information from the local top-level cells */
-  threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper,
-                 s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks,
-                 sizeof(int), 0, &data);
-
-  /* Get the number of inhibited particles from the space-wide counters
-   * since these have been updated atomically during the time-steps. */
-  data.inhibited = s->nr_inhibited_parts;
-  data.g_inhibited = s->nr_inhibited_gparts;
-  data.s_inhibited = s->nr_inhibited_sparts;
-  data.b_inhibited = s->nr_inhibited_bparts;
-
-  /* Store these in the temporary collection group. */
-  collectgroup1_init(
-      &e->collect_group1, data.updated, data.g_updated, data.s_updated,
-      data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited,
-      data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max,
-      data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max,
-      data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max,
-      data.ti_stars_beg_max, data.ti_black_holes_end_min,
-      data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild,
-      e->s->tot_cells, e->sched.nr_tasks,
-      (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh);
-
-/* Aggregate collective data from the different nodes for this step. */
-#ifdef WITH_MPI
-  collectgroup1_reduce(&e->collect_group1);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  {
-    /* Check the above using the original MPI calls. */
-    integertime_t in_i[2], out_i[2];
-    in_i[0] = 0;
-    in_i[1] = 0;
-    out_i[0] = data.ti_hydro_end_min;
-    out_i[1] = data.ti_gravity_end_min;
-    if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate ti_end_min.");
-    if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min)
-      error("Failed to get same ti_hydro_end_min, is %lld, should be %lld",
-            in_i[0], e->collect_group1.ti_hydro_end_min);
-    if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min)
-      error("Failed to get same ti_gravity_end_min, is %lld, should be %lld",
-            in_i[1], e->collect_group1.ti_gravity_end_min);
-
-    long long in_ll[4], out_ll[4];
-    out_ll[0] = data.updated;
-    out_ll[1] = data.g_updated;
-    out_ll[2] = data.s_updated;
-    out_ll[3] = data.b_updated;
-    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate particle counts.");
-    if (in_ll[0] != (long long)e->collect_group1.updated)
-      error("Failed to get same updated, is %lld, should be %lld", in_ll[0],
-            e->collect_group1.updated);
-    if (in_ll[1] != (long long)e->collect_group1.g_updated)
-      error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1],
-            e->collect_group1.g_updated);
-    if (in_ll[2] != (long long)e->collect_group1.s_updated)
-      error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2],
-            e->collect_group1.s_updated);
-    if (in_ll[3] != (long long)e->collect_group1.b_updated)
-      error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3],
-            e->collect_group1.b_updated);
-
-    out_ll[0] = data.inhibited;
-    out_ll[1] = data.g_inhibited;
-    out_ll[2] = data.s_inhibited;
-    out_ll[3] = data.b_inhibited;
-    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate particle counts.");
-    if (in_ll[0] != (long long)e->collect_group1.inhibited)
-      error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0],
-            e->collect_group1.inhibited);
-    if (in_ll[1] != (long long)e->collect_group1.g_inhibited)
-      error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1],
-            e->collect_group1.g_inhibited);
-    if (in_ll[2] != (long long)e->collect_group1.s_inhibited)
-      error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2],
-            e->collect_group1.s_inhibited);
-    if (in_ll[3] != (long long)e->collect_group1.b_inhibited)
-      error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3],
-            e->collect_group1.b_inhibited);
-
-    int buff = 0;
-    if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate the rebuild flag across nodes.");
-    if (!!buff != !!e->collect_group1.forcerebuild)
-      error(
-          "Failed to get same rebuild flag from all nodes, is %d,"
-          "should be %d",
-          buff, e->collect_group1.forcerebuild);
-  }
-#endif
-#endif
-
-  /* Apply to the engine, if requested. */
-  if (apply) collectgroup1_apply(&e->collect_group1, e);
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-}
-
 /**
  * @brief Print the conserved quantities statistics to a log file
  *
@@ -6423,127 +4867,3 @@ void engine_struct_restore(struct engine *e, FILE *stream) {
   e->forcerebuild = 1;
   e->forcerepart = 0;
 }
-
-/**
- * @brief Activate all the #gpart communications in preparation
- * fof a call to FOF.
- *
- * @param e The #engine to act on.
- */
-void engine_activate_gpart_comms(struct engine *e) {
-
-#ifdef WITH_MPI
-
-  const ticks tic = getticks();
-
-  struct scheduler *s = &e->sched;
-  const int nr_tasks = s->nr_tasks;
-  struct task *tasks = s->tasks;
-
-  for (int k = 0; k < nr_tasks; ++k) {
-
-    struct task *t = &tasks[k];
-
-    if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) {
-      scheduler_activate(s, t);
-    } else if ((t->type == task_type_recv) &&
-               (t->subtype == task_subtype_gpart)) {
-      scheduler_activate(s, t);
-    } else {
-      t->skip = 1;
-    }
-  }
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-
-#else
-  error("Calling an MPI function in non-MPI mode.");
-#endif
-}
-
-/**
- * @brief Activate all the FOF tasks.
- *
- * Marks all the other task types to be skipped.
- *
- * @param e The #engine to act on.
- */
-void engine_activate_fof_tasks(struct engine *e) {
-
-  const ticks tic = getticks();
-
-  struct scheduler *s = &e->sched;
-  const int nr_tasks = s->nr_tasks;
-  struct task *tasks = s->tasks;
-
-  for (int k = 0; k < nr_tasks; k++) {
-
-    struct task *t = &tasks[k];
-
-    if (t->type == task_type_fof_self || t->type == task_type_fof_pair)
-      scheduler_activate(s, t);
-    else
-      t->skip = 1;
-  }
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-}
-
-/**
- * @brief Run a FOF search.
- *
- * @param e the engine
- * @param dump_results Are we writing group catalogues to output files?
- * @param seed_black_holes Are we seeding black holes?
- */
-void engine_fof(struct engine *e, const int dump_results,
-                const int seed_black_holes) {
-
-#ifdef WITH_FOF
-
-  ticks tic = getticks();
-
-  /* Compute number of DM particles */
-  const long long total_nr_baryons =
-      e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts;
-  const long long total_nr_dmparts =
-      e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons;
-
-  /* Initialise FOF parameters and allocate FOF arrays. */
-  fof_allocate(e->s, total_nr_dmparts, e->fof_properties);
-
-  /* Make FOF tasks */
-  engine_make_fof_tasks(e);
-
-  /* and activate them. */
-  engine_activate_fof_tasks(e);
-
-  /* Perform local FOF tasks. */
-  engine_launch(e);
-
-  /* Perform FOF search over foreign particles and
-   * find groups which require black hole seeding.  */
-  fof_search_tree(e->fof_properties, e->black_holes_properties,
-                  e->physical_constants, e->cosmology, e->s, dump_results,
-                  seed_black_holes);
-
-  /* Reset flag. */
-  e->run_fof = 0;
-
-  /* Flag that a FOF has taken place */
-  e->step_props |= engine_step_prop_fof;
-
-  /* ... and find the next FOF time */
-  if (seed_black_holes) engine_compute_next_fof_time(e);
-
-  if (engine_rank == 0)
-    message("Complete FOF search took: %.3f %s.",
-            clocks_from_ticks(getticks() - tic), clocks_getunit());
-#else
-  error("SWIFT was not compiled with FOF enabled!");
-#endif
-}
diff --git a/src/engine.h b/src/engine.h
index 3484336039c64baa43469b1152f1856f70ee2823..72d528969553b6e24ab939ce05acea69c7cb1b0c 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -490,6 +490,7 @@ void engine_reconstruct_multipoles(struct engine *e);
 void engine_allocate_foreign_particles(struct engine *e);
 void engine_print_stats(struct engine *e);
 void engine_check_for_dumps(struct engine *e);
+void engine_collect_end_of_step(struct engine *e, int apply);
 void engine_dump_snapshot(struct engine *e);
 void engine_init_output_lists(struct engine *e, struct swift_params *params);
 void engine_init(struct engine *e, struct space *s, struct swift_params *params,
diff --git a/src/engine_collect_end_of_step.c b/src/engine_collect_end_of_step.c
new file mode 100644
index 0000000000000000000000000000000000000000..ec02acfefdf65aca13d44a7cf90d48f31b99778f
--- /dev/null
+++ b/src/engine_collect_end_of_step.c
@@ -0,0 +1,584 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/* Local headers. */
+#include "active.h"
+#include "timeline.h"
+
+/**
+ * @brief Data collected from the cells at the end of a time-step
+ */
+struct end_of_step_data {
+
+  size_t updated, g_updated, s_updated, b_updated;
+  size_t inhibited, g_inhibited, s_inhibited, b_inhibited;
+  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
+  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
+  integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max;
+  integertime_t ti_black_holes_end_min, ti_black_holes_end_max,
+      ti_black_holes_beg_max;
+  struct engine *e;
+  struct star_formation_history sfh;
+};
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_hydro(struct cell *c,
+                                              const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_part) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* if (!c->split) error("Reached a leaf without finding a time-step task!
+     * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */
+    /* 		       c->depth, c->maxdepth, c->hydro.count, c->nodeID); */
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+
+  /* Local Star formation history properties */
+  struct star_formation_history sfh_updated;
+
+  /* Initialize the star formation structs */
+  star_formation_logger_init(&sfh_updated);
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->hydro.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_hydro(cp, e);
+
+      /* And update */
+      ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min);
+      ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max);
+      ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max);
+
+      updated += cp->hydro.updated;
+
+      /* Check if the cell is inactive and in that case reorder the SFH */
+      if (!cell_is_starting_hydro(cp, e)) {
+        star_formation_logger_log_inactive_cell(&cp->stars.sfh);
+      }
+
+      /* Add the star formation history in this cell to sfh_updated */
+      star_formation_logger_add(&sfh_updated, &cp->stars.sfh);
+
+      /* Collected, so clear for next time. */
+      cp->hydro.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->hydro.ti_end_min = ti_hydro_end_min;
+  c->hydro.ti_end_max = ti_hydro_end_max;
+  c->hydro.ti_beg_max = ti_hydro_beg_max;
+  c->hydro.updated = updated;
+  // c->hydro.inhibited = inhibited;
+
+  /* Store the star formation history in the parent cell */
+  star_formation_logger_add(&c->stars.sfh, &sfh_updated);
+}
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_grav(struct cell *c,
+                                             const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    //  if (!c->split) error("Reached a leaf without finding a time-step
+    //  task!");
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0,
+                ti_grav_beg_max = 0;
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->grav.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_grav(cp, e);
+
+      /* And update */
+      ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min);
+      ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max);
+      ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max);
+
+      updated += cp->grav.updated;
+
+      /* Collected, so clear for next time. */
+      cp->grav.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->grav.ti_end_min = ti_grav_end_min;
+  c->grav.ti_end_max = ti_grav_end_max;
+  c->grav.ti_beg_max = ti_grav_beg_max;
+  c->grav.updated = updated;
+}
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_stars(struct cell *c,
+                                              const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    // if (!c->split) error("Reached a leaf without finding a time-step task!");
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
+                ti_stars_beg_max = 0;
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->stars.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_stars(cp, e);
+
+      /* And update */
+      ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min);
+      ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max);
+      ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max);
+
+      updated += cp->stars.updated;
+
+      /* Collected, so clear for next time. */
+      cp->stars.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->stars.ti_end_min = ti_stars_end_min;
+  c->stars.ti_end_max = ti_stars_end_max;
+  c->stars.ti_beg_max = ti_stars_beg_max;
+  c->stars.updated = updated;
+}
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_black_holes(struct cell *c,
+                                                    const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    // if (!c->split) error("Reached a leaf without finding a time-step task!");
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_black_holes_end_min = max_nr_timesteps,
+                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->black_holes.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_black_holes(cp, e);
+
+      /* And update */
+      ti_black_holes_end_min =
+          min(ti_black_holes_end_min, cp->black_holes.ti_end_min);
+      ti_black_holes_end_max =
+          max(ti_black_holes_end_max, cp->black_holes.ti_end_max);
+      ti_black_holes_beg_max =
+          max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max);
+
+      updated += cp->black_holes.updated;
+
+      /* Collected, so clear for next time. */
+      cp->black_holes.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->black_holes.ti_end_min = ti_black_holes_end_min;
+  c->black_holes.ti_end_max = ti_black_holes_end_max;
+  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
+  c->black_holes.updated = updated;
+}
+
+/**
+ * @brief Mapping function to collect the data from the end of the step
+ *
+ * This function will call a recursive function on all the top-level cells
+ * to collect the information we are after.
+ *
+ * @param map_data The list of cells with tasks on this node.
+ * @param num_elements The number of elements in the list this thread will work
+ * on.
+ * @param extra_data The #engine.
+ */
+void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
+                                       void *extra_data) {
+
+  struct end_of_step_data *data = (struct end_of_step_data *)extra_data;
+  const struct engine *e = data->e;
+  const int with_hydro = (e->policy & engine_policy_hydro);
+  const int with_self_grav = (e->policy & engine_policy_self_gravity);
+  const int with_ext_grav = (e->policy & engine_policy_external_gravity);
+  const int with_grav = (with_self_grav || with_ext_grav);
+  const int with_stars = (e->policy & engine_policy_stars);
+  const int with_black_holes = (e->policy & engine_policy_black_holes);
+  struct space *s = e->s;
+  int *local_cells = (int *)map_data;
+  struct star_formation_history *sfh_top = &data->sfh;
+
+  /* Local collectible */
+  size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
+                ti_stars_beg_max = 0;
+  integertime_t ti_black_holes_end_min = max_nr_timesteps,
+                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
+
+  /* Local Star formation history properties */
+  struct star_formation_history sfh_updated;
+
+  /* Initialize the star formation structs for this engine to zero */
+  star_formation_logger_init(&sfh_updated);
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct cell *c = &s->cells_top[local_cells[ind]];
+
+    if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 ||
+        c->black_holes.count > 0) {
+
+      /* Make the top-cells recurse */
+      if (with_hydro) {
+        engine_collect_end_of_step_recurse_hydro(c, e);
+      }
+      if (with_grav) {
+        engine_collect_end_of_step_recurse_grav(c, e);
+      }
+      if (with_stars) {
+        engine_collect_end_of_step_recurse_stars(c, e);
+      }
+      if (with_black_holes) {
+        engine_collect_end_of_step_recurse_black_holes(c, e);
+      }
+
+      /* And aggregate */
+      if (c->hydro.ti_end_min > e->ti_current)
+        ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min);
+      ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max);
+      ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max);
+
+      if (c->grav.ti_end_min > e->ti_current)
+        ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min);
+      ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max);
+      ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max);
+
+      if (c->stars.ti_end_min > e->ti_current)
+        ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min);
+      ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max);
+      ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max);
+
+      if (c->black_holes.ti_end_min > e->ti_current)
+        ti_black_holes_end_min =
+            min(ti_black_holes_end_min, c->black_holes.ti_end_min);
+      ti_black_holes_end_max =
+          max(ti_black_holes_end_max, c->black_holes.ti_end_max);
+      ti_black_holes_beg_max =
+          max(ti_black_holes_beg_max, c->black_holes.ti_beg_max);
+
+      updated += c->hydro.updated;
+      g_updated += c->grav.updated;
+      s_updated += c->stars.updated;
+      b_updated += c->black_holes.updated;
+
+      /* Check if the cell is inactive and in that case reorder the SFH */
+      if (!cell_is_starting_hydro(c, e)) {
+        star_formation_logger_log_inactive_cell(&c->stars.sfh);
+      }
+
+      /* Get the star formation history from the current cell and store it in
+       * the star formation history struct */
+      star_formation_logger_add(&sfh_updated, &c->stars.sfh);
+
+      /* Collected, so clear for next time. */
+      c->hydro.updated = 0;
+      c->grav.updated = 0;
+      c->stars.updated = 0;
+      c->black_holes.updated = 0;
+    }
+  }
+
+  /* Let's write back to the global data.
+   * We use the space lock to garanty single access*/
+  if (lock_lock(&s->lock) == 0) {
+    data->updated += updated;
+    data->g_updated += g_updated;
+    data->s_updated += s_updated;
+    data->b_updated += b_updated;
+
+    /* Add the SFH information from this engine to the global data */
+    star_formation_logger_add(sfh_top, &sfh_updated);
+
+    if (ti_hydro_end_min > e->ti_current)
+      data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min);
+    data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max);
+    data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max);
+
+    if (ti_gravity_end_min > e->ti_current)
+      data->ti_gravity_end_min =
+          min(ti_gravity_end_min, data->ti_gravity_end_min);
+    data->ti_gravity_end_max =
+        max(ti_gravity_end_max, data->ti_gravity_end_max);
+    data->ti_gravity_beg_max =
+        max(ti_gravity_beg_max, data->ti_gravity_beg_max);
+
+    if (ti_stars_end_min > e->ti_current)
+      data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min);
+    data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max);
+    data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max);
+
+    if (ti_black_holes_end_min > e->ti_current)
+      data->ti_black_holes_end_min =
+          min(ti_black_holes_end_min, data->ti_black_holes_end_min);
+    data->ti_black_holes_end_max =
+        max(ti_black_holes_end_max, data->ti_black_holes_end_max);
+    data->ti_black_holes_beg_max =
+        max(ti_black_holes_beg_max, data->ti_black_holes_beg_max);
+  }
+
+  if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
+}
+
+/**
+ * @brief Collects the next time-step and rebuild flag.
+ *
+ * The next time-step is determined by making each super-cell recurse to
+ * collect the minimal of ti_end and the number of updated particles.  When in
+ * MPI mode this routines reduces these across all nodes and also collects the
+ * forcerebuild flag -- this is so that we only use a single collective MPI
+ * call per step for all these values.
+ *
+ * Note that the results are stored in e->collect_group1 struct not in the
+ * engine fields, unless apply is true. These can be applied field-by-field
+ * or all at once using collectgroup1_copy();
+ *
+ * @param e The #engine.
+ * @param apply whether to apply the results to the engine or just keep in the
+ *              group1 struct.
+ */
+void engine_collect_end_of_step(struct engine *e, int apply) {
+
+  const ticks tic = getticks();
+  struct space *s = e->s;
+  struct end_of_step_data data;
+  data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0;
+  data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0,
+  data.ti_hydro_beg_max = 0;
+  data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0,
+  data.ti_gravity_beg_max = 0;
+  data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0,
+  data.ti_stars_beg_max = 0;
+  data.ti_black_holes_end_min = max_nr_timesteps,
+  data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0;
+  data.e = e;
+
+  /* Initialize the total SFH of the simulation to zero */
+  star_formation_logger_init(&data.sfh);
+
+  /* Collect information from the local top-level cells */
+  threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper,
+                 s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks,
+                 sizeof(int), 0, &data);
+
+  /* Get the number of inhibited particles from the space-wide counters
+   * since these have been updated atomically during the time-steps. */
+  data.inhibited = s->nr_inhibited_parts;
+  data.g_inhibited = s->nr_inhibited_gparts;
+  data.s_inhibited = s->nr_inhibited_sparts;
+  data.b_inhibited = s->nr_inhibited_bparts;
+
+  /* Store these in the temporary collection group. */
+  collectgroup1_init(
+      &e->collect_group1, data.updated, data.g_updated, data.s_updated,
+      data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited,
+      data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max,
+      data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max,
+      data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max,
+      data.ti_stars_beg_max, data.ti_black_holes_end_min,
+      data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild,
+      e->s->tot_cells, e->sched.nr_tasks,
+      (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh);
+
+/* Aggregate collective data from the different nodes for this step. */
+#ifdef WITH_MPI
+  collectgroup1_reduce(&e->collect_group1);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  {
+    /* Check the above using the original MPI calls. */
+    integertime_t in_i[2], out_i[2];
+    in_i[0] = 0;
+    in_i[1] = 0;
+    out_i[0] = data.ti_hydro_end_min;
+    out_i[1] = data.ti_gravity_end_min;
+    if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate ti_end_min.");
+    if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min)
+      error("Failed to get same ti_hydro_end_min, is %lld, should be %lld",
+            in_i[0], e->collect_group1.ti_hydro_end_min);
+    if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min)
+      error("Failed to get same ti_gravity_end_min, is %lld, should be %lld",
+            in_i[1], e->collect_group1.ti_gravity_end_min);
+
+    long long in_ll[4], out_ll[4];
+    out_ll[0] = data.updated;
+    out_ll[1] = data.g_updated;
+    out_ll[2] = data.s_updated;
+    out_ll[3] = data.b_updated;
+    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate particle counts.");
+    if (in_ll[0] != (long long)e->collect_group1.updated)
+      error("Failed to get same updated, is %lld, should be %lld", in_ll[0],
+            e->collect_group1.updated);
+    if (in_ll[1] != (long long)e->collect_group1.g_updated)
+      error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1],
+            e->collect_group1.g_updated);
+    if (in_ll[2] != (long long)e->collect_group1.s_updated)
+      error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2],
+            e->collect_group1.s_updated);
+    if (in_ll[3] != (long long)e->collect_group1.b_updated)
+      error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3],
+            e->collect_group1.b_updated);
+
+    out_ll[0] = data.inhibited;
+    out_ll[1] = data.g_inhibited;
+    out_ll[2] = data.s_inhibited;
+    out_ll[3] = data.b_inhibited;
+    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate particle counts.");
+    if (in_ll[0] != (long long)e->collect_group1.inhibited)
+      error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0],
+            e->collect_group1.inhibited);
+    if (in_ll[1] != (long long)e->collect_group1.g_inhibited)
+      error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1],
+            e->collect_group1.g_inhibited);
+    if (in_ll[2] != (long long)e->collect_group1.s_inhibited)
+      error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2],
+            e->collect_group1.s_inhibited);
+    if (in_ll[3] != (long long)e->collect_group1.b_inhibited)
+      error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3],
+            e->collect_group1.b_inhibited);
+
+    int buff = 0;
+    if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate the rebuild flag across nodes.");
+    if (!!buff != !!e->collect_group1.forcerebuild)
+      error(
+          "Failed to get same rebuild flag from all nodes, is %d,"
+          "should be %d",
+          buff, e->collect_group1.forcerebuild);
+  }
+#endif
+#endif
+
+  /* Apply to the engine, if requested. */
+  if (apply) collectgroup1_apply(&e->collect_group1, e);
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
diff --git a/src/engine_fof.c b/src/engine_fof.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1bb5b452104642f68b4a9987a1ab8d8e3b0162b
--- /dev/null
+++ b/src/engine_fof.c
@@ -0,0 +1,150 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/**
+ * @brief Activate all the #gpart communications in preparation
+ * fof a call to FOF.
+ *
+ * @param e The #engine to act on.
+ */
+void engine_activate_gpart_comms(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  const ticks tic = getticks();
+
+  struct scheduler *s = &e->sched;
+  const int nr_tasks = s->nr_tasks;
+  struct task *tasks = s->tasks;
+
+  for (int k = 0; k < nr_tasks; ++k) {
+
+    struct task *t = &tasks[k];
+
+    if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) {
+      scheduler_activate(s, t);
+    } else if ((t->type == task_type_recv) &&
+               (t->subtype == task_subtype_gpart)) {
+      scheduler_activate(s, t);
+    } else {
+      t->skip = 1;
+    }
+  }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+
+#else
+  error("Calling an MPI function in non-MPI mode.");
+#endif
+}
+
+/**
+ * @brief Activate all the FOF tasks.
+ *
+ * Marks all the other task types to be skipped.
+ *
+ * @param e The #engine to act on.
+ */
+void engine_activate_fof_tasks(struct engine *e) {
+
+  const ticks tic = getticks();
+
+  struct scheduler *s = &e->sched;
+  const int nr_tasks = s->nr_tasks;
+  struct task *tasks = s->tasks;
+
+  for (int k = 0; k < nr_tasks; k++) {
+
+    struct task *t = &tasks[k];
+
+    if (t->type == task_type_fof_self || t->type == task_type_fof_pair)
+      scheduler_activate(s, t);
+    else
+      t->skip = 1;
+  }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+/**
+ * @brief Run a FOF search.
+ *
+ * @param e the engine
+ * @param dump_results Are we writing group catalogues to output files?
+ * @param seed_black_holes Are we seeding black holes?
+ */
+void engine_fof(struct engine *e, const int dump_results,
+                const int seed_black_holes) {
+
+#ifdef WITH_FOF
+
+  ticks tic = getticks();
+
+  /* Compute number of DM particles */
+  const long long total_nr_baryons =
+      e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts;
+  const long long total_nr_dmparts =
+      e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons;
+
+  /* Initialise FOF parameters and allocate FOF arrays. */
+  fof_allocate(e->s, total_nr_dmparts, e->fof_properties);
+
+  /* Make FOF tasks */
+  engine_make_fof_tasks(e);
+
+  /* and activate them. */
+  engine_activate_fof_tasks(e);
+
+  /* Perform local FOF tasks. */
+  engine_launch(e);
+
+  /* Perform FOF search over foreign particles and
+   * find groups which require black hole seeding.  */
+  fof_search_tree(e->fof_properties, e->black_holes_properties,
+                  e->physical_constants, e->cosmology, e->s, dump_results,
+                  seed_black_holes);
+
+  /* Reset flag. */
+  e->run_fof = 0;
+
+  /* Flag that a FOF has taken place */
+  e->step_props |= engine_step_prop_fof;
+
+  /* ... and find the next FOF time */
+  if (seed_black_holes) engine_compute_next_fof_time(e);
+
+  if (engine_rank == 0)
+    message("Complete FOF search took: %.3f %s.",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
+#else
+  error("SWIFT was not compiled with FOF enabled!");
+#endif
+}
diff --git a/src/engine_redistribute.c b/src/engine_redistribute.c
new file mode 100644
index 0000000000000000000000000000000000000000..3132ad2665c67cd244ae1ec9ece75726788c1506
--- /dev/null
+++ b/src/engine_redistribute.c
@@ -0,0 +1,1031 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/* Local headers. */
+#include "memswap.h"
+
+#ifdef WITH_MPI
+
+/**
+ * Do the exchange of one type of particles with all the other nodes.
+ *
+ * @param label a label for the memory allocations of this particle type.
+ * @param counts 2D array with the counts of particles to exchange with
+ *               each other node.
+ * @param parts the particle data to exchange
+ * @param new_nr_parts the number of particles this node will have after all
+ *                     exchanges have completed.
+ * @param sizeofparts sizeof the particle struct.
+ * @param alignsize the memory alignment required for this particle type.
+ * @param mpi_type the MPI_Datatype for these particles.
+ * @param nr_nodes the number of nodes to exchange with.
+ * @param nodeID the id of this node.
+ *
+ * @result new particle data constructed from all the exchanges with the
+ *         given alignment.
+ */
+static void *engine_do_redistribute(const char *label, int *counts, char *parts,
+                                    size_t new_nr_parts, size_t sizeofparts,
+                                    size_t alignsize, MPI_Datatype mpi_type,
+                                    int nr_nodes, int nodeID) {
+
+  /* Allocate a new particle array with some extra margin */
+  char *parts_new = NULL;
+  if (swift_memalign(
+          label, (void **)&parts_new, alignsize,
+          sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new particle data.");
+
+  /* Prepare MPI requests for the asynchronous communications */
+  MPI_Request *reqs;
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) ==
+      NULL)
+    error("Failed to allocate MPI request list.");
+
+  /* Only send and receive only "chunk" particles per request. So we need to
+   * loop as many times as necessary here. Make 2Gb/sizeofparts so we only
+   * send 2Gb packets. */
+  const int chunk = INT_MAX / sizeofparts;
+  int sent = 0;
+  int recvd = 0;
+
+  int activenodes = 1;
+  while (activenodes) {
+
+    for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
+
+    /* Emit the sends and recvs for the data. */
+    size_t offset_send = sent;
+    size_t offset_recv = recvd;
+    activenodes = 0;
+
+    for (int k = 0; k < nr_nodes; k++) {
+
+      /* Indices in the count arrays of the node of interest */
+      const int ind_send = nodeID * nr_nodes + k;
+      const int ind_recv = k * nr_nodes + nodeID;
+
+      /* Are we sending any data this loop? */
+      int sending = counts[ind_send] - sent;
+      if (sending > 0) {
+        activenodes++;
+        if (sending > chunk) sending = chunk;
+
+        /* If the send and receive is local then just copy. */
+        if (k == nodeID) {
+          int receiving = counts[ind_recv] - recvd;
+          if (receiving > chunk) receiving = chunk;
+          memcpy(&parts_new[offset_recv * sizeofparts],
+                 &parts[offset_send * sizeofparts], sizeofparts * receiving);
+        } else {
+          /* Otherwise send it. */
+          int res =
+              MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k,
+                        ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]);
+          if (res != MPI_SUCCESS)
+            mpi_error(res, "Failed to isend parts to node %i.", k);
+        }
+      }
+
+      /* If we're sending to this node, then move past it to next. */
+      if (counts[ind_send] > 0) offset_send += counts[ind_send];
+
+      /* Are we receiving any data from this node? Note already done if coming
+       * from this node. */
+      if (k != nodeID) {
+        int receiving = counts[ind_recv] - recvd;
+        if (receiving > 0) {
+          activenodes++;
+          if (receiving > chunk) receiving = chunk;
+          int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving,
+                              mpi_type, k, ind_recv, MPI_COMM_WORLD,
+                              &reqs[2 * k + 1]);
+          if (res != MPI_SUCCESS)
+            mpi_error(res, "Failed to emit irecv of parts from node %i.", k);
+        }
+      }
+
+      /* If we're receiving from this node, then move past it to next. */
+      if (counts[ind_recv] > 0) offset_recv += counts[ind_recv];
+    }
+
+    /* Wait for all the sends and recvs to tumble in. */
+    MPI_Status stats[2 * nr_nodes];
+    int res;
+    if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
+      for (int k = 0; k < 2 * nr_nodes; k++) {
+        char buff[MPI_MAX_ERROR_STRING];
+        MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
+        message("request from source %i, tag %i has error '%s'.",
+                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
+      }
+      error("Failed during waitall for part data.");
+    }
+
+    /* Move to next chunks. */
+    sent += chunk;
+    recvd += chunk;
+  }
+
+  /* Free temps. */
+  free(reqs);
+
+  /* And return new memory. */
+  return parts_new;
+}
+#endif
+
+#ifdef WITH_MPI /* redist_mapper */
+
+/* Support for engine_redistribute threadpool dest mappers. */
+struct redist_mapper_data {
+  int *counts;
+  int *dest;
+  int nodeID;
+  int nr_nodes;
+  struct cell *cells;
+  struct space *s;
+  void *base;
+};
+
+/* Generic function for accumulating counts for TYPE parts. Note
+ * we use a local counts array to avoid the atomic_add in the parts
+ * loop. */
+#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE)                              \
+  engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \
+                                         void *extra_data) {               \
+    struct TYPE *parts = (struct TYPE *)map_data;                          \
+    struct redist_mapper_data *mydata =                                    \
+        (struct redist_mapper_data *)extra_data;                           \
+    struct space *s = mydata->s;                                           \
+    int *dest =                                                            \
+        mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base);   \
+    int *lcounts = NULL;                                                   \
+    if ((lcounts = (int *)calloc(                                          \
+             sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL)   \
+      error("Failed to allocate counts thread-specific buffer");           \
+    for (int k = 0; k < num_elements; k++) {                               \
+      for (int j = 0; j < 3; j++) {                                        \
+        if (parts[k].x[j] < 0.0)                                           \
+          parts[k].x[j] += s->dim[j];                                      \
+        else if (parts[k].x[j] >= s->dim[j])                               \
+          parts[k].x[j] -= s->dim[j];                                      \
+      }                                                                    \
+      const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0],    \
+                                 parts[k].x[1] * s->iwidth[1],             \
+                                 parts[k].x[2] * s->iwidth[2]);            \
+      dest[k] = s->cells_top[cid].nodeID;                                  \
+      size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k];            \
+      lcounts[ind] += 1;                                                   \
+    }                                                                      \
+    for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++)        \
+      atomic_add(&mydata->counts[k], lcounts[k]);                          \
+    free(lcounts);                                                         \
+  }
+
+/**
+ * @brief Accumulate the counts of particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * part version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part);
+
+/**
+ * @brief Accumulate the counts of star particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * spart version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart);
+
+/**
+ * @brief Accumulate the counts of gravity particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * gpart version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart);
+
+/**
+ * @brief Accumulate the counts of black holes particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * bpart version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart);
+
+#endif /* redist_mapper_data */
+
+#ifdef WITH_MPI /* savelink_mapper_data */
+
+/* Support for saving the linkage between gparts and parts/sparts. */
+struct savelink_mapper_data {
+  int nr_nodes;
+  int *counts;
+  void *parts;
+  int nodeID;
+};
+
+/**
+ * @brief Save the offset of each gravity partner of a part or spart.
+ *
+ * The offset is from the start of the sorted particles to be sent to a node.
+ * This is possible as parts without gravity partners have a positive id.
+ * These offsets are used to restore the pointers on the receiving node.
+ *
+ * CHECKS should be eliminated as dead code when optimizing.
+ */
+#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS)                      \
+  engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \
+                                             void *extra_data) {               \
+    int *nodes = (int *)map_data;                                              \
+    struct savelink_mapper_data *mydata =                                      \
+        (struct savelink_mapper_data *)extra_data;                             \
+    int nodeID = mydata->nodeID;                                               \
+    int nr_nodes = mydata->nr_nodes;                                           \
+    int *counts = mydata->counts;                                              \
+    struct TYPE *parts = (struct TYPE *)mydata->parts;                         \
+                                                                               \
+    for (int j = 0; j < num_elements; j++) {                                   \
+      int node = nodes[j];                                                     \
+      int count = 0;                                                           \
+      size_t offset = 0;                                                       \
+      for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i];  \
+                                                                               \
+      for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) {             \
+        if (parts[k + offset].gpart != NULL) {                                 \
+          if (CHECKS)                                                          \
+            if (parts[k + offset].gpart->id_or_neg_offset > 0)                 \
+              error("Trying to link a partnerless " #TYPE "!");                \
+          parts[k + offset].gpart->id_or_neg_offset = -count;                  \
+          count++;                                                             \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+  }
+
+/**
+ * @brief Save position of part-gpart links.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ */
+#ifdef SWIFT_DEBUG_CHECKS
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1);
+#else
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0);
+#endif
+
+/**
+ * @brief Save position of spart-gpart links.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ */
+#ifdef SWIFT_DEBUG_CHECKS
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1);
+#else
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0);
+#endif
+
+/**
+ * @brief Save position of bpart-gpart links.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ */
+#ifdef SWIFT_DEBUG_CHECKS
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1);
+#else
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0);
+#endif
+
+#endif /* savelink_mapper_data */
+
+#ifdef WITH_MPI /* relink_mapper_data */
+
+/* Support for relinking parts, gparts, sparts and bparts after moving between
+ * nodes. */
+struct relink_mapper_data {
+  int nodeID;
+  int nr_nodes;
+  int *counts;
+  int *s_counts;
+  int *g_counts;
+  int *b_counts;
+  struct space *s;
+};
+
+/**
+ * @brief Restore the part/gpart and spart/gpart links for a list of nodes.
+ *
+ * @param map_data address of nodes to process.
+ * @param num_elements the number nodes to process.
+ * @param extra_data additional data defining the context (a
+ * relink_mapper_data).
+ */
+static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
+                                              void *extra_data) {
+
+  int *nodes = (int *)map_data;
+  struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data;
+
+  int nodeID = mydata->nodeID;
+  int nr_nodes = mydata->nr_nodes;
+  int *counts = mydata->counts;
+  int *g_counts = mydata->g_counts;
+  int *s_counts = mydata->s_counts;
+  int *b_counts = mydata->b_counts;
+  struct space *s = mydata->s;
+
+  for (int i = 0; i < num_elements; i++) {
+
+    int node = nodes[i];
+
+    /* Get offsets to correct parts of the counts arrays for this node. */
+    size_t offset_parts = 0;
+    size_t offset_gparts = 0;
+    size_t offset_sparts = 0;
+    size_t offset_bparts = 0;
+    for (int n = 0; n < node; n++) {
+      int ind_recv = n * nr_nodes + nodeID;
+      offset_parts += counts[ind_recv];
+      offset_gparts += g_counts[ind_recv];
+      offset_sparts += s_counts[ind_recv];
+      offset_bparts += b_counts[ind_recv];
+    }
+
+    /* Number of gparts sent from this node. */
+    int ind_recv = node * nr_nodes + nodeID;
+    const size_t count_gparts = g_counts[ind_recv];
+
+    /* Loop over the gparts received from this node */
+    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) {
+
+      /* Does this gpart have a gas partner ? */
+      if (s->gparts[k].type == swift_type_gas) {
+
+        const ptrdiff_t partner_index =
+            offset_parts - s->gparts[k].id_or_neg_offset;
+
+        /* Re-link */
+        s->gparts[k].id_or_neg_offset = -partner_index;
+        s->parts[partner_index].gpart = &s->gparts[k];
+      }
+
+      /* Does this gpart have a star partner ? */
+      else if (s->gparts[k].type == swift_type_stars) {
+
+        const ptrdiff_t partner_index =
+            offset_sparts - s->gparts[k].id_or_neg_offset;
+
+        /* Re-link */
+        s->gparts[k].id_or_neg_offset = -partner_index;
+        s->sparts[partner_index].gpart = &s->gparts[k];
+      }
+
+      /* Does this gpart have a black hole partner ? */
+      else if (s->gparts[k].type == swift_type_black_hole) {
+
+        const ptrdiff_t partner_index =
+            offset_bparts - s->gparts[k].id_or_neg_offset;
+
+        /* Re-link */
+        s->gparts[k].id_or_neg_offset = -partner_index;
+        s->bparts[partner_index].gpart = &s->gparts[k];
+      }
+    }
+  }
+}
+
+#endif /* relink_mapper_data */
+
+/**
+ * @brief Redistribute the particles amongst the nodes according
+ *      to their cell's node IDs.
+ *
+ * The strategy here is as follows:
+ * 1) Each node counts the number of particles it has to send to each other
+ * node.
+ * 2) The number of particles of each type is then exchanged.
+ * 3) The particles to send are placed in a temporary buffer in which the
+ * part-gpart links are preserved.
+ * 4) Each node allocates enough space for the new particles.
+ * 5) (Asynchronous) communications are issued to transfer the data.
+ *
+ *
+ * @param e The #engine.
+ */
+void engine_redistribute(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  const int nr_nodes = e->nr_nodes;
+  const int nodeID = e->nodeID;
+  struct space *s = e->s;
+  struct cell *cells = s->cells_top;
+  const int nr_cells = s->nr_cells;
+  struct xpart *xparts = s->xparts;
+  struct part *parts = s->parts;
+  struct gpart *gparts = s->gparts;
+  struct spart *sparts = s->sparts;
+  struct bpart *bparts = s->bparts;
+  ticks tic = getticks();
+
+  size_t nr_parts = s->nr_parts;
+  size_t nr_gparts = s->nr_gparts;
+  size_t nr_sparts = s->nr_sparts;
+  size_t nr_bparts = s->nr_bparts;
+
+  /* Start by moving inhibited particles to the end of the arrays */
+  for (size_t k = 0; k < nr_parts; /* void */) {
+    if (parts[k].time_bin == time_bin_inhibited ||
+        parts[k].time_bin == time_bin_not_created) {
+      nr_parts -= 1;
+
+      /* Swap the particle */
+      memswap(&parts[k], &parts[nr_parts], sizeof(struct part));
+
+      /* Swap the xpart */
+      memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart));
+
+      /* Swap the link with the gpart */
+      if (parts[k].gpart != NULL) {
+        parts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (parts[nr_parts].gpart != NULL) {
+        parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Now move inhibited star particles to the end of the arrays */
+  for (size_t k = 0; k < nr_sparts; /* void */) {
+    if (sparts[k].time_bin == time_bin_inhibited ||
+        sparts[k].time_bin == time_bin_not_created) {
+      nr_sparts -= 1;
+
+      /* Swap the particle */
+      memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));
+
+      /* Swap the link with the gpart */
+      if (s->sparts[k].gpart != NULL) {
+        s->sparts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (s->sparts[nr_sparts].gpart != NULL) {
+        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Now move inhibited black hole particles to the end of the arrays */
+  for (size_t k = 0; k < nr_bparts; /* void */) {
+    if (bparts[k].time_bin == time_bin_inhibited ||
+        bparts[k].time_bin == time_bin_not_created) {
+      nr_bparts -= 1;
+
+      /* Swap the particle */
+      memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart));
+
+      /* Swap the link with the gpart */
+      if (s->bparts[k].gpart != NULL) {
+        s->bparts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (s->bparts[nr_bparts].gpart != NULL) {
+        s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts;
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Finally do the same with the gravity particles */
+  for (size_t k = 0; k < nr_gparts; /* void */) {
+    if (gparts[k].time_bin == time_bin_inhibited ||
+        gparts[k].time_bin == time_bin_not_created) {
+      nr_gparts -= 1;
+
+      /* Swap the particle */
+      memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart));
+
+      /* Swap the link with part/spart */
+      if (s->gparts[k].type == swift_type_gas) {
+        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      } else if (s->gparts[k].type == swift_type_stars) {
+        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      } else if (s->gparts[k].type == swift_type_black_hole) {
+        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      }
+
+      if (s->gparts[nr_gparts].type == swift_type_gas) {
+        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
+      } else if (s->gparts[nr_gparts].type == swift_type_stars) {
+        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
+      } else if (s->gparts[nr_gparts].type == swift_type_black_hole) {
+        s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Now we are ready to deal with real particles and can start the exchange. */
+
+  /* Allocate temporary arrays to store the counts of particles to be sent
+   * and the destination of each particle */
+  int *counts;
+  if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate counts temporary buffer.");
+
+  int *dest;
+  if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL)
+    error("Failed to allocate dest temporary buffer.");
+
+  /* Simple index of node IDs, used for mappers over nodes. */
+  int *nodes = NULL;
+  if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL)
+    error("Failed to allocate nodes temporary buffer.");
+  for (int k = 0; k < nr_nodes; k++) nodes[k] = k;
+
+  /* Get destination of each particle */
+  struct redist_mapper_data redist_data;
+  redist_data.s = s;
+  redist_data.nodeID = nodeID;
+  redist_data.nr_nodes = nr_nodes;
+
+  redist_data.counts = counts;
+  redist_data.dest = dest;
+  redist_data.base = (void *)parts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts,
+                 nr_parts, sizeof(struct part), 0, &redist_data);
+
+  /* Sort the particles according to their cell index. */
+  if (nr_parts > 0)
+    space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes],
+                     nr_nodes, 0);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the part have been sorted correctly. */
+  for (size_t k = 0; k < nr_parts; k++) {
+    const struct part *p = &s->parts[k];
+
+    if (p->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (p->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
+                   p->x[2] * s->iwidth[2]);
+
+    /* New cell of this part */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (dest[k] != new_node)
+      error("part's new node index not matching sorted index.");
+
+    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
+        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
+        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
+      error("part not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We will need to re-link the gpart partners of parts, so save their
+   * relative positions in the sent lists. */
+  if (nr_parts > 0 && nr_gparts > 0) {
+
+    struct savelink_mapper_data savelink_data;
+    savelink_data.nr_nodes = nr_nodes;
+    savelink_data.counts = counts;
+    savelink_data.parts = (void *)parts;
+    savelink_data.nodeID = nodeID;
+    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part,
+                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
+  }
+  swift_free("dest", dest);
+
+  /* Get destination of each s-particle */
+  int *s_counts;
+  if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate s_counts temporary buffer.");
+
+  int *s_dest;
+  if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL)
+    error("Failed to allocate s_dest temporary buffer.");
+
+  redist_data.counts = s_counts;
+  redist_data.dest = s_dest;
+  redist_data.base = (void *)sparts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts,
+                 nr_sparts, sizeof(struct spart), 0, &redist_data);
+
+  /* Sort the particles according to their cell index. */
+  if (nr_sparts > 0)
+    space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes,
+                      0);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the spart have been sorted correctly. */
+  for (size_t k = 0; k < nr_sparts; k++) {
+    const struct spart *sp = &s->sparts[k];
+
+    if (sp->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (sp->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
+                   sp->x[2] * s->iwidth[2]);
+
+    /* New cell of this spart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (s_dest[k] != new_node)
+      error("spart's new node index not matching sorted index.");
+
+    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
+        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
+        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
+      error("spart not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We need to re-link the gpart partners of sparts. */
+  if (nr_sparts > 0) {
+
+    struct savelink_mapper_data savelink_data;
+    savelink_data.nr_nodes = nr_nodes;
+    savelink_data.counts = s_counts;
+    savelink_data.parts = (void *)sparts;
+    savelink_data.nodeID = nodeID;
+    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart,
+                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
+  }
+  swift_free("s_dest", s_dest);
+
+  /* Get destination of each b-particle */
+  int *b_counts;
+  if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate b_counts temporary buffer.");
+
+  int *b_dest;
+  if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL)
+    error("Failed to allocate b_dest temporary buffer.");
+
+  redist_data.counts = b_counts;
+  redist_data.dest = b_dest;
+  redist_data.base = (void *)bparts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts,
+                 nr_bparts, sizeof(struct bpart), 0, &redist_data);
+
+  /* Sort the particles according to their cell index. */
+  if (nr_bparts > 0)
+    space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes,
+                      0);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the bpart have been sorted correctly. */
+  for (size_t k = 0; k < nr_bparts; k++) {
+    const struct bpart *bp = &s->bparts[k];
+
+    if (bp->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (bp->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1],
+                   bp->x[2] * s->iwidth[2]);
+
+    /* New cell of this bpart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (b_dest[k] != new_node)
+      error("bpart's new node index not matching sorted index.");
+
+    if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] ||
+        bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] ||
+        bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2])
+      error("bpart not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We need to re-link the gpart partners of bparts. */
+  if (nr_bparts > 0) {
+
+    struct savelink_mapper_data savelink_data;
+    savelink_data.nr_nodes = nr_nodes;
+    savelink_data.counts = b_counts;
+    savelink_data.parts = (void *)bparts;
+    savelink_data.nodeID = nodeID;
+    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart,
+                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
+  }
+  swift_free("b_dest", b_dest);
+
+  /* Get destination of each g-particle */
+  int *g_counts;
+  if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate g_gcount temporary buffer.");
+
+  int *g_dest;
+  if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL)
+    error("Failed to allocate g_dest temporary buffer.");
+
+  redist_data.counts = g_counts;
+  redist_data.dest = g_dest;
+  redist_data.base = (void *)gparts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts,
+                 nr_gparts, sizeof(struct gpart), 0, &redist_data);
+
+  /* Sort the gparticles according to their cell index. */
+  if (nr_gparts > 0)
+    space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest,
+                      &g_counts[nodeID * nr_nodes], nr_nodes);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the gpart have been sorted correctly. */
+  for (size_t k = 0; k < nr_gparts; k++) {
+    const struct gpart *gp = &s->gparts[k];
+
+    if (gp->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (gp->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
+                   gp->x[2] * s->iwidth[2]);
+
+    /* New cell of this gpart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (g_dest[k] != new_node)
+      error("gpart's new node index not matching sorted index (%d != %d).",
+            g_dest[k], new_node);
+
+    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
+        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
+        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
+      error("gpart not sorted into the right top-level cell!");
+  }
+#endif
+
+  swift_free("g_dest", g_dest);
+
+  /* Get all the counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
+                    MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce particle transfer counts.");
+
+  /* Get all the g_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce gparticle transfer counts.");
+
+  /* Get all the s_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce sparticle transfer counts.");
+
+  /* Get all the b_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce bparticle transfer counts.");
+
+  /* Report how many particles will be moved. */
+  if (e->verbose) {
+    if (e->nodeID == 0) {
+      size_t total = 0, g_total = 0, s_total = 0, b_total = 0;
+      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0;
+      for (int p = 0, r = 0; p < nr_nodes; p++) {
+        for (int n = 0; n < nr_nodes; n++) {
+          total += counts[r];
+          g_total += g_counts[r];
+          s_total += s_counts[r];
+          b_total += b_counts[r];
+          if (p == n) {
+            unmoved += counts[r];
+            g_unmoved += g_counts[r];
+            s_unmoved += s_counts[r];
+            b_unmoved += b_counts[r];
+          }
+          r++;
+        }
+      }
+      if (total > 0)
+        message("%zu of %zu (%.2f%%) of particles moved", total - unmoved,
+                total, 100.0 * (double)(total - unmoved) / (double)total);
+      if (g_total > 0)
+        message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved,
+                g_total,
+                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
+      if (s_total > 0)
+        message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved,
+                s_total,
+                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
+      if (b_total > 0)
+        message("%ld of %ld (%.2f%%) of b-particles moved", b_total - b_unmoved,
+                b_total,
+                100.0 * (double)(b_total - b_unmoved) / (double)b_total);
+    }
+  }
+
+  /* Now each node knows how many parts, sparts, bparts, and gparts will be
+   * transferred to every other node. Get the new numbers of particles for this
+   * node. */
+  size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0,
+         nr_bparts_new = 0;
+  for (int k = 0; k < nr_nodes; k++)
+    nr_parts_new += counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_gparts_new += g_counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_sparts_new += s_counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_bparts_new += b_counts[k * nr_nodes + nodeID];
+
+  /* Now exchange the particles, type by type to keep the memory required
+   * under control. */
+
+  /* SPH particles. */
+  void *new_parts = engine_do_redistribute(
+      "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part),
+      part_align, part_mpi_type, nr_nodes, nodeID);
+  swift_free("parts", s->parts);
+  s->parts = (struct part *)new_parts;
+  s->nr_parts = nr_parts_new;
+  s->size_parts = engine_redistribute_alloc_margin * nr_parts_new;
+
+  /* Extra SPH particle properties. */
+  new_parts = engine_do_redistribute(
+      "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart),
+      xpart_align, xpart_mpi_type, nr_nodes, nodeID);
+  swift_free("xparts", s->xparts);
+  s->xparts = (struct xpart *)new_parts;
+
+  /* Gravity particles. */
+  new_parts = engine_do_redistribute(
+      "gparts", g_counts, (char *)s->gparts, nr_gparts_new,
+      sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID);
+  swift_free("gparts", s->gparts);
+  s->gparts = (struct gpart *)new_parts;
+  s->nr_gparts = nr_gparts_new;
+  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new;
+
+  /* Star particles. */
+  new_parts = engine_do_redistribute(
+      "sparts", s_counts, (char *)s->sparts, nr_sparts_new,
+      sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID);
+  swift_free("sparts", s->sparts);
+  s->sparts = (struct spart *)new_parts;
+  s->nr_sparts = nr_sparts_new;
+  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new;
+
+  /* Black holes particles. */
+  new_parts = engine_do_redistribute(
+      "bparts", b_counts, (char *)s->bparts, nr_bparts_new,
+      sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID);
+  swift_free("bparts", s->bparts);
+  s->bparts = (struct bpart *)new_parts;
+  s->nr_bparts = nr_bparts_new;
+  s->size_bparts = engine_redistribute_alloc_margin * nr_bparts_new;
+
+  /* All particles have now arrived. Time for some final operations on the
+     stuff we just received */
+
+  /* Restore the part<->gpart and spart<->gpart links.
+   * Generate indices and counts for threadpool tasks. Note we process a node
+   * at a time. */
+  struct relink_mapper_data relink_data;
+  relink_data.s = s;
+  relink_data.counts = counts;
+  relink_data.g_counts = g_counts;
+  relink_data.s_counts = s_counts;
+  relink_data.b_counts = b_counts;
+  relink_data.nodeID = nodeID;
+  relink_data.nr_nodes = nr_nodes;
+
+  threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes,
+                 nr_nodes, sizeof(int), 1, &relink_data);
+  free(nodes);
+
+  /* Clean up the counts now we are done. */
+  free(counts);
+  free(g_counts);
+  free(s_counts);
+  free(b_counts);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that all parts are in the right place. */
+  for (size_t k = 0; k < nr_parts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
+                               s->parts[k].x[1] * s->iwidth[1],
+                               s->parts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
+            cells[cid].nodeID);
+  }
+  for (size_t k = 0; k < nr_gparts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0],
+                               s->gparts[k].x[1] * s->iwidth[1],
+                               s->gparts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
+  }
+  for (size_t k = 0; k < nr_sparts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0],
+                               s->sparts[k].x[1] * s->iwidth[1],
+                               s->sparts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
+  }
+  for (size_t k = 0; k < nr_bparts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->bparts[k].x[0] * s->iwidth[0],
+                               s->bparts[k].x[1] * s->iwidth[1],
+                               s->bparts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received b-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
+  }
+
+  /* Verify that the links are correct */
+  part_verify_links(s->parts, s->gparts, s->sparts, s->bparts, nr_parts_new,
+                    nr_gparts_new, nr_sparts_new, nr_bparts_new, e->verbose);
+
+#endif
+
+  /* Be verbose about what just happened. */
+  if (e->verbose) {
+    int my_cells = 0;
+    for (int k = 0; k < nr_cells; k++)
+      if (cells[k].nodeID == nodeID) my_cells += 1;
+    message(
+        "node %i now has %zu parts, %zu sparts, %zu bparts and %zu gparts in "
+        "%i cells.",
+        nodeID, nr_parts_new, nr_sparts_new, nr_bparts_new, nr_gparts_new,
+        my_cells);
+  }
+
+  /* Flag that we do not have any extra particles any more */
+  s->nr_extra_parts = 0;
+  s->nr_extra_gparts = 0;
+  s->nr_extra_sparts = 0;
+  s->nr_extra_bparts = 0;
+
+  /* Flag that a redistribute has taken place */
+  e->step_props |= engine_step_prop_redistribute;
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
diff --git a/src/runner.c b/src/runner.c
deleted file mode 100644
index 38c31971555a16e01f6b5f3d056a018ee2c299a2..0000000000000000000000000000000000000000
--- a/src/runner.c
+++ /dev/null
@@ -1,4885 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
- *               2016 John A. Regan (john.a.regan@durham.ac.uk)
- *                    Tom Theuns (tom.theuns@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- ******************************************************************************/
-
-/* Config parameters. */
-#include "../config.h"
-
-/* Some standard headers. */
-#include <float.h>
-#include <limits.h>
-#include <stdlib.h>
-
-/* MPI headers. */
-#ifdef WITH_MPI
-#include <mpi.h>
-#endif
-
-/* This object's header. */
-#include "runner.h"
-
-/* Local headers. */
-#include "active.h"
-#include "approx_math.h"
-#include "atomic.h"
-#include "black_holes.h"
-#include "black_holes_properties.h"
-#include "cell.h"
-#include "chemistry.h"
-#include "const.h"
-#include "cooling.h"
-#include "debug.h"
-#include "drift.h"
-#include "engine.h"
-#include "entropy_floor.h"
-#include "error.h"
-#include "feedback.h"
-#include "gravity.h"
-#include "hydro.h"
-#include "hydro_properties.h"
-#include "kick.h"
-#include "logger.h"
-#include "memuse.h"
-#include "minmax.h"
-#include "pressure_floor.h"
-#include "pressure_floor_iact.h"
-#include "runner_doiact_vec.h"
-#include "scheduler.h"
-#include "sort_part.h"
-#include "space.h"
-#include "space_getsid.h"
-#include "star_formation.h"
-#include "star_formation_logger.h"
-#include "stars.h"
-#include "task.h"
-#include "timers.h"
-#include "timestep.h"
-#include "timestep_limiter.h"
-#include "tracers.h"
-
-/* Unique identifier of loop types */
-#define TASK_LOOP_DENSITY 0
-#define TASK_LOOP_GRADIENT 1
-#define TASK_LOOP_FORCE 2
-#define TASK_LOOP_LIMITER 3
-#define TASK_LOOP_FEEDBACK 4
-#define TASK_LOOP_SWALLOW 5
-
-/* Import the density loop functions. */
-#define FUNCTION density
-#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-
-/* Import the gradient loop functions (if required). */
-#ifdef EXTRA_HYDRO_LOOP
-#define FUNCTION gradient
-#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-#endif
-
-/* Import the force loop functions. */
-#define FUNCTION force
-#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-
-/* Import the limiter loop functions. */
-#define FUNCTION limiter
-#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-
-/* Import the gravity loop functions. */
-#include "runner_doiact_grav.h"
-
-/* Import the stars density loop functions. */
-#define FUNCTION density
-#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
-#include "runner_doiact_stars.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the stars feedback loop functions. */
-#define FUNCTION feedback
-#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
-#include "runner_doiact_stars.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the black hole density loop functions. */
-#define FUNCTION density
-#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
-#include "runner_doiact_black_holes.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the black hole feedback loop functions. */
-#define FUNCTION swallow
-#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW
-#include "runner_doiact_black_holes.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the black hole feedback loop functions. */
-#define FUNCTION feedback
-#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
-#include "runner_doiact_black_holes.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/**
- * @brief Intermediate task after the density to check that the smoothing
- * lengths are correct.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) {
-
-  struct spart *restrict sparts = c->stars.parts;
-  const struct engine *e = r->e;
-  const struct unit_system *us = e->internal_units;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const struct cosmology *cosmo = e->cosmology;
-  const struct feedback_props *feedback_props = e->feedback_props;
-  const float stars_h_max = e->hydro_properties->h_max;
-  const float stars_h_min = e->hydro_properties->h_min;
-  const float eps = e->stars_properties->h_tolerance;
-  const float stars_eta_dim =
-      pow_dimension(e->stars_properties->eta_neighbours);
-  const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations;
-  int redo = 0, scount = 0;
-
-  /* Running value of the maximal smoothing length */
-  double h_max = c->stars.h_max;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != e->nodeID)
-    error("Running the star ghost on a foreign node!");
-#endif
-
-  /* Anything to do here? */
-  if (c->stars.count == 0) return;
-  if (!cell_is_active_stars(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        runner_do_stars_ghost(r, c->progeny[k], 0);
-
-        /* Update h_max */
-        h_max = max(h_max, c->progeny[k]->stars.h_max);
-      }
-    }
-  } else {
-
-    /* Init the list of active particles that have to be updated. */
-    int *sid = NULL;
-    float *h_0 = NULL;
-    float *left = NULL;
-    float *right = NULL;
-    if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL)
-      error("Can't allocate memory for sid.");
-    if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
-      error("Can't allocate memory for h_0.");
-    if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
-      error("Can't allocate memory for left.");
-    if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
-      error("Can't allocate memory for right.");
-    for (int k = 0; k < c->stars.count; k++)
-      if (spart_is_active(&sparts[k], e) &&
-          feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) {
-        sid[scount] = k;
-        h_0[scount] = sparts[k].h;
-        left[scount] = 0.f;
-        right[scount] = stars_h_max;
-        ++scount;
-      }
-
-    /* While there are particles that need to be updated... */
-    for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter;
-         num_reruns++) {
-
-      /* Reset the redo-count. */
-      redo = 0;
-
-      /* Loop over the remaining active parts in this cell. */
-      for (int i = 0; i < scount; i++) {
-
-        /* Get a direct pointer on the part. */
-        struct spart *sp = &sparts[sid[i]];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Is this part within the timestep? */
-        if (!spart_is_active(sp, e))
-          error("Ghost applied to inactive particle");
-#endif
-
-        /* Get some useful values */
-        const float h_init = h_0[i];
-        const float h_old = sp->h;
-        const float h_old_dim = pow_dimension(h_old);
-        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
-
-        float h_new;
-        int has_no_neighbours = 0;
-
-        if (sp->density.wcount == 0.f) { /* No neighbours case */
-
-          /* Flag that there were no neighbours */
-          has_no_neighbours = 1;
-
-          /* Double h and try again */
-          h_new = 2.f * h_old;
-
-        } else {
-
-          /* Finish the density calculation */
-          stars_end_density(sp, cosmo);
-
-          /* Compute one step of the Newton-Raphson scheme */
-          const float n_sum = sp->density.wcount * h_old_dim;
-          const float n_target = stars_eta_dim;
-          const float f = n_sum - n_target;
-          const float f_prime =
-              sp->density.wcount_dh * h_old_dim +
-              hydro_dimension * sp->density.wcount * h_old_dim_minus_one;
-
-          /* Improve the bisection bounds */
-          if (n_sum < n_target)
-            left[i] = max(left[i], h_old);
-          else if (n_sum > n_target)
-            right[i] = min(right[i], h_old);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Check the validity of the left and right bounds */
-          if (left[i] > right[i])
-            error("Invalid left (%e) and right (%e)", left[i], right[i]);
-#endif
-
-          /* Skip if h is already h_max and we don't have enough neighbours */
-          /* Same if we are below h_min */
-          if (((sp->h >= stars_h_max) && (f < 0.f)) ||
-              ((sp->h <= stars_h_min) && (f > 0.f))) {
-
-            stars_reset_feedback(sp);
-
-            /* Only do feedback if stars have a reasonable birth time */
-            if (feedback_do_feedback(sp)) {
-
-              const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-              const integertime_t ti_begin =
-                  get_integer_time_begin(e->ti_current - 1, sp->time_bin);
-
-              /* Get particle time-step */
-              double dt;
-              if (with_cosmology) {
-                dt = cosmology_get_delta_time(e->cosmology, ti_begin,
-                                              ti_begin + ti_step);
-              } else {
-                dt = get_timestep(sp->time_bin, e->time_base);
-              }
-
-              /* Calculate age of the star at current time */
-              double star_age_end_of_step;
-              if (with_cosmology) {
-                star_age_end_of_step =
-                    cosmology_get_delta_time_from_scale_factors(
-                        cosmo, (double)sp->birth_scale_factor, cosmo->a);
-              } else {
-                star_age_end_of_step = (float)e->time - sp->birth_time;
-              }
-
-              /* Has this star been around for a while ? */
-              if (star_age_end_of_step > 0.) {
-
-                /* Age of the star at the start of the step */
-                const double star_age_beg_of_step =
-                    max(star_age_end_of_step - dt, 0.);
-
-                /* Compute the stellar evolution  */
-                feedback_evolve_spart(sp, feedback_props, cosmo, us,
-                                      star_age_beg_of_step, dt);
-              } else {
-
-                /* Reset the feedback fields of the star particle */
-                feedback_reset_feedback(sp, feedback_props);
-              }
-            } else {
-
-              feedback_reset_feedback(sp, feedback_props);
-            }
-
-            /* Ok, we are done with this particle */
-            continue;
-          }
-
-          /* Normal case: Use Newton-Raphson to get a better value of h */
-
-          /* Avoid floating point exception from f_prime = 0 */
-          h_new = h_old - f / (f_prime + FLT_MIN);
-
-          /* Be verbose about the particles that struggle to converge */
-          if (num_reruns > max_smoothing_iter - 10) {
-
-            message(
-                "Smoothing length convergence problem: iter=%d p->id=%lld "
-                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
-                num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum,
-                n_target, left[i], right[i]);
-          }
-
-          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
-          h_new = min(h_new, 2.f * h_old);
-          h_new = max(h_new, 0.5f * h_old);
-
-          /* Verify that we are actually progrssing towards the answer */
-          h_new = max(h_new, left[i]);
-          h_new = min(h_new, right[i]);
-        }
-
-        /* Check whether the particle has an inappropriate smoothing length */
-        if (fabsf(h_new - h_old) > eps * h_old) {
-
-          /* Ok, correct then */
-
-          /* Case where we have been oscillating around the solution */
-          if ((h_new == left[i] && h_old == right[i]) ||
-              (h_old == left[i] && h_new == right[i])) {
-
-            /* Bissect the remaining interval */
-            sp->h = pow_inv_dimension(
-                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
-
-          } else {
-
-            /* Normal case */
-            sp->h = h_new;
-          }
-
-          /* If below the absolute maximum, try again */
-          if (sp->h < stars_h_max && sp->h > stars_h_min) {
-
-            /* Flag for another round of fun */
-            sid[redo] = sid[i];
-            h_0[redo] = h_0[i];
-            left[redo] = left[i];
-            right[redo] = right[i];
-            redo += 1;
-
-            /* Re-initialise everything */
-            stars_init_spart(sp);
-            feedback_init_spart(sp);
-
-            /* Off we go ! */
-            continue;
-
-          } else if (sp->h <= stars_h_min) {
-
-            /* Ok, this particle is a lost cause... */
-            sp->h = stars_h_min;
-
-          } else if (sp->h >= stars_h_max) {
-
-            /* Ok, this particle is a lost cause... */
-            sp->h = stars_h_max;
-
-            /* Do some damage control if no neighbours at all were found */
-            if (has_no_neighbours) {
-              stars_spart_has_no_neighbours(sp, cosmo);
-            }
-
-          } else {
-            error(
-                "Fundamental problem with the smoothing length iteration "
-                "logic.");
-          }
-        }
-
-        /* We now have a particle whose smoothing length has converged */
-
-        /* Check if h_max has increased */
-        h_max = max(h_max, sp->h);
-
-        stars_reset_feedback(sp);
-
-        /* Only do feedback if stars have a reasonable birth time */
-        if (feedback_do_feedback(sp)) {
-
-          const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(e->ti_current - 1, sp->time_bin);
-
-          /* Get particle time-step */
-          double dt;
-          if (with_cosmology) {
-            dt = cosmology_get_delta_time(e->cosmology, ti_begin,
-                                          ti_begin + ti_step);
-          } else {
-            dt = get_timestep(sp->time_bin, e->time_base);
-          }
-
-          /* Calculate age of the star at current time */
-          double star_age_end_of_step;
-          if (with_cosmology) {
-            star_age_end_of_step = cosmology_get_delta_time_from_scale_factors(
-                cosmo, sp->birth_scale_factor, (float)cosmo->a);
-          } else {
-            star_age_end_of_step = (float)e->time - sp->birth_time;
-          }
-
-          /* Has this star been around for a while ? */
-          if (star_age_end_of_step > 0.) {
-
-            /* Age of the star at the start of the step */
-            const double star_age_beg_of_step =
-                max(star_age_end_of_step - dt, 0.);
-
-            /* Compute the stellar evolution  */
-            feedback_evolve_spart(sp, feedback_props, cosmo, us,
-                                  star_age_beg_of_step, dt);
-          } else {
-
-            /* Reset the feedback fields of the star particle */
-            feedback_reset_feedback(sp, feedback_props);
-          }
-        } else {
-
-          /* Reset the feedback fields of the star particle */
-          feedback_reset_feedback(sp, feedback_props);
-        }
-      }
-
-      /* We now need to treat the particles whose smoothing length had not
-       * converged again */
-
-      /* Re-set the counter for the next loop (potentially). */
-      scount = redo;
-      if (scount > 0) {
-
-        /* Climb up the cell hierarchy. */
-        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
-
-          /* Run through this cell's density interactions. */
-          for (struct link *l = finger->stars.density; l != NULL; l = l->next) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-            if (l->t->ti_run < r->e->ti_current)
-              error("Density task should have been run.");
-#endif
-
-            /* Self-interaction? */
-            if (l->t->type == task_type_self)
-              runner_doself_subset_branch_stars_density(r, finger, sparts, sid,
-                                                        scount);
-
-            /* Otherwise, pair interaction? */
-            else if (l->t->type == task_type_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dopair_subset_branch_stars_density(
-                    r, finger, sparts, sid, scount, l->t->cj);
-              else
-                runner_dopair_subset_branch_stars_density(
-                    r, finger, sparts, sid, scount, l->t->ci);
-            }
-
-            /* Otherwise, sub-self interaction? */
-            else if (l->t->type == task_type_sub_self)
-              runner_dosub_subset_stars_density(r, finger, sparts, sid, scount,
-                                                NULL, 1);
-
-            /* Otherwise, sub-pair interaction? */
-            else if (l->t->type == task_type_sub_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dosub_subset_stars_density(r, finger, sparts, sid,
-                                                  scount, l->t->cj, 1);
-              else
-                runner_dosub_subset_stars_density(r, finger, sparts, sid,
-                                                  scount, l->t->ci, 1);
-            }
-          }
-        }
-      }
-    }
-
-    if (scount) {
-      error("Smoothing length failed to converge on %i particles.", scount);
-    }
-
-    /* Be clean */
-    free(left);
-    free(right);
-    free(sid);
-    free(h_0);
-  }
-
-  /* Update h_max */
-  c->stars.h_max = h_max;
-
-  /* The ghost may not always be at the top level.
-   * Therefore we need to update h_max between the super- and top-levels */
-  if (c->stars.ghost) {
-    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
-      atomic_max_d(&tmp->stars.h_max, h_max);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_stars_ghost);
-}
-
-/**
- * @brief Intermediate task after the density to check that the smoothing
- * lengths are correct.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c,
-                                         int timer) {
-
-  struct bpart *restrict bparts = c->black_holes.parts;
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const float black_holes_h_max = e->hydro_properties->h_max;
-  const float black_holes_h_min = e->hydro_properties->h_min;
-  const float eps = e->black_holes_properties->h_tolerance;
-  const float black_holes_eta_dim =
-      pow_dimension(e->black_holes_properties->eta_neighbours);
-  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
-  int redo = 0, bcount = 0;
-
-  /* Running value of the maximal smoothing length */
-  double h_max = c->black_holes.h_max;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (c->black_holes.count == 0) return;
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        runner_do_black_holes_density_ghost(r, c->progeny[k], 0);
-
-        /* Update h_max */
-        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
-      }
-    }
-  } else {
-
-    /* Init the list of active particles that have to be updated. */
-    int *sid = NULL;
-    float *h_0 = NULL;
-    float *left = NULL;
-    float *right = NULL;
-    if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for sid.");
-    if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for h_0.");
-    if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for left.");
-    if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for right.");
-    for (int k = 0; k < c->black_holes.count; k++)
-      if (bpart_is_active(&bparts[k], e)) {
-        sid[bcount] = k;
-        h_0[bcount] = bparts[k].h;
-        left[bcount] = 0.f;
-        right[bcount] = black_holes_h_max;
-        ++bcount;
-      }
-
-    /* While there are particles that need to be updated... */
-    for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter;
-         num_reruns++) {
-
-      /* Reset the redo-count. */
-      redo = 0;
-
-      /* Loop over the remaining active parts in this cell. */
-      for (int i = 0; i < bcount; i++) {
-
-        /* Get a direct pointer on the part. */
-        struct bpart *bp = &bparts[sid[i]];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Is this part within the timestep? */
-        if (!bpart_is_active(bp, e))
-          error("Ghost applied to inactive particle");
-#endif
-
-        /* Get some useful values */
-        const float h_init = h_0[i];
-        const float h_old = bp->h;
-        const float h_old_dim = pow_dimension(h_old);
-        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
-
-        float h_new;
-        int has_no_neighbours = 0;
-
-        if (bp->density.wcount == 0.f) { /* No neighbours case */
-
-          /* Flag that there were no neighbours */
-          has_no_neighbours = 1;
-
-          /* Double h and try again */
-          h_new = 2.f * h_old;
-
-        } else {
-
-          /* Finish the density calculation */
-          black_holes_end_density(bp, cosmo);
-
-          /* Compute one step of the Newton-Raphson scheme */
-          const float n_sum = bp->density.wcount * h_old_dim;
-          const float n_target = black_holes_eta_dim;
-          const float f = n_sum - n_target;
-          const float f_prime =
-              bp->density.wcount_dh * h_old_dim +
-              hydro_dimension * bp->density.wcount * h_old_dim_minus_one;
-
-          /* Improve the bisection bounds */
-          if (n_sum < n_target)
-            left[i] = max(left[i], h_old);
-          else if (n_sum > n_target)
-            right[i] = min(right[i], h_old);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Check the validity of the left and right bounds */
-          if (left[i] > right[i])
-            error("Invalid left (%e) and right (%e)", left[i], right[i]);
-#endif
-
-          /* Skip if h is already h_max and we don't have enough neighbours */
-          /* Same if we are below h_min */
-          if (((bp->h >= black_holes_h_max) && (f < 0.f)) ||
-              ((bp->h <= black_holes_h_min) && (f > 0.f))) {
-
-            black_holes_reset_feedback(bp);
-
-            /* Ok, we are done with this particle */
-            continue;
-          }
-
-          /* Normal case: Use Newton-Raphson to get a better value of h */
-
-          /* Avoid floating point exception from f_prime = 0 */
-          h_new = h_old - f / (f_prime + FLT_MIN);
-
-          /* Be verbose about the particles that struggle to converge */
-          if (num_reruns > max_smoothing_iter - 10) {
-
-            message(
-                "Smoothing length convergence problem: iter=%d p->id=%lld "
-                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
-                num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum,
-                n_target, left[i], right[i]);
-          }
-
-          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
-          h_new = min(h_new, 2.f * h_old);
-          h_new = max(h_new, 0.5f * h_old);
-
-          /* Verify that we are actually progrssing towards the answer */
-          h_new = max(h_new, left[i]);
-          h_new = min(h_new, right[i]);
-        }
-
-        /* Check whether the particle has an inappropriate smoothing length */
-        if (fabsf(h_new - h_old) > eps * h_old) {
-
-          /* Ok, correct then */
-
-          /* Case where we have been oscillating around the solution */
-          if ((h_new == left[i] && h_old == right[i]) ||
-              (h_old == left[i] && h_new == right[i])) {
-
-            /* Bissect the remaining interval */
-            bp->h = pow_inv_dimension(
-                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
-
-          } else {
-
-            /* Normal case */
-            bp->h = h_new;
-          }
-
-          /* If below the absolute maximum, try again */
-          if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) {
-
-            /* Flag for another round of fun */
-            sid[redo] = sid[i];
-            h_0[redo] = h_0[i];
-            left[redo] = left[i];
-            right[redo] = right[i];
-            redo += 1;
-
-            /* Re-initialise everything */
-            black_holes_init_bpart(bp);
-
-            /* Off we go ! */
-            continue;
-
-          } else if (bp->h <= black_holes_h_min) {
-
-            /* Ok, this particle is a lost cause... */
-            bp->h = black_holes_h_min;
-
-          } else if (bp->h >= black_holes_h_max) {
-
-            /* Ok, this particle is a lost cause... */
-            bp->h = black_holes_h_max;
-
-            /* Do some damage control if no neighbours at all were found */
-            if (has_no_neighbours) {
-              black_holes_bpart_has_no_neighbours(bp, cosmo);
-            }
-
-          } else {
-            error(
-                "Fundamental problem with the smoothing length iteration "
-                "logic.");
-          }
-        }
-
-        /* We now have a particle whose smoothing length has converged */
-
-        black_holes_reset_feedback(bp);
-
-        /* Check if h_max has increased */
-        h_max = max(h_max, bp->h);
-      }
-
-      /* We now need to treat the particles whose smoothing length had not
-       * converged again */
-
-      /* Re-set the counter for the next loop (potentially). */
-      bcount = redo;
-      if (bcount > 0) {
-
-        /* Climb up the cell hierarchy. */
-        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
-
-          /* Run through this cell's density interactions. */
-          for (struct link *l = finger->black_holes.density; l != NULL;
-               l = l->next) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-            if (l->t->ti_run < r->e->ti_current)
-              error("Density task should have been run.");
-#endif
-
-            /* Self-interaction? */
-            if (l->t->type == task_type_self)
-              runner_doself_subset_branch_bh_density(r, finger, bparts, sid,
-                                                     bcount);
-
-            /* Otherwise, pair interaction? */
-            else if (l->t->type == task_type_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
-                                                       bcount, l->t->cj);
-              else
-                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
-                                                       bcount, l->t->ci);
-            }
-
-            /* Otherwise, sub-self interaction? */
-            else if (l->t->type == task_type_sub_self)
-              runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
-                                             NULL, 1);
-
-            /* Otherwise, sub-pair interaction? */
-            else if (l->t->type == task_type_sub_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
-                                               l->t->cj, 1);
-              else
-                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
-                                               l->t->ci, 1);
-            }
-          }
-        }
-      }
-    }
-
-    if (bcount) {
-      error("Smoothing length failed to converge on %i particles.", bcount);
-    }
-
-    /* Be clean */
-    free(left);
-    free(right);
-    free(sid);
-    free(h_0);
-  }
-
-  /* Update h_max */
-  c->black_holes.h_max = h_max;
-
-  /* The ghost may not always be at the top level.
-   * Therefore we need to update h_max between the super- and top-levels */
-  if (c->black_holes.density_ghost) {
-    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
-      atomic_max_d(&tmp->black_holes.h_max, h_max);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
-}
-
-/**
- * @brief Intermediate task after the BHs have done their swallowing step.
- * This is used to update the BH quantities if necessary.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c,
-                                         int timer) {
-
-  struct bpart *restrict bparts = c->black_holes.parts;
-  const int count = c->black_holes.count;
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL)
-        runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int i = 0; i < count; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct bpart *bp = &bparts[i];
-
-      if (bpart_is_active(bp, e)) {
-
-        /* Compute the final operations for repositioning of this BH */
-        black_holes_end_reposition(bp, e->black_holes_properties,
-                                   e->physical_constants, e->cosmology);
-
-        /* Get particle time-step */
-        double dt;
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(bp->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(e->ti_current - 1, bp->time_bin);
-
-          dt = cosmology_get_delta_time(e->cosmology, ti_begin,
-                                        ti_begin + ti_step);
-        } else {
-          dt = get_timestep(bp->time_bin, e->time_base);
-        }
-
-        /* Compute variables required for the feedback loop */
-        black_holes_prepare_feedback(bp, e->black_holes_properties,
-                                     e->physical_constants, e->cosmology, dt);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
-}
-
-/**
- * @brief Calculate gravity acceleration from external potential
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
-
-  struct gpart *restrict gparts = c->grav.parts;
-  const int gcount = c->grav.count;
-  const struct engine *e = r->e;
-  const struct external_potential *potential = e->external_potential;
-  const struct phys_const *constants = e->physical_constants;
-  const double time = r->e->time;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the gparts in this cell. */
-    for (int i = 0; i < gcount; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct gpart *restrict gp = &gparts[i];
-
-      /* Is this part within the time step? */
-      if (gpart_is_active(gp, e)) {
-        external_gravity_acceleration(time, potential, constants, gp);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_dograv_external);
-}
-
-/**
- * @brief Calculate gravity accelerations from the periodic mesh
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) {
-
-  struct gpart *restrict gparts = c->grav.parts;
-  const int gcount = c->grav.count;
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic) error("Calling mesh forces in non-periodic mode.");
-#endif
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0);
-  } else {
-
-    /* Get the forces from the gravity mesh */
-    pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount);
-  }
-
-  if (timer) TIMER_TOC(timer_dograv_mesh);
-}
-
-/**
- * @brief Calculate change in thermal state of particles induced
- * by radiative cooling and heating.
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const struct cooling_function_data *cooling_func = e->cooling_func;
-  const struct phys_const *constants = e->physical_constants;
-  const struct unit_system *us = e->internal_units;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-  const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor;
-  const double time_base = e->time_base;
-  const integertime_t ti_current = e->ti_current;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int count = c->hydro.count;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int i = 0; i < count; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct part *restrict p = &parts[i];
-      struct xpart *restrict xp = &xparts[i];
-
-      if (part_is_active(p, e)) {
-
-        double dt_cool, dt_therm;
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(p->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(ti_current - 1, p->time_bin);
-
-          dt_cool =
-              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-          dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin,
-                                                     ti_begin + ti_step);
-
-        } else {
-          dt_cool = get_timestep(p->time_bin, time_base);
-          dt_therm = get_timestep(p->time_bin, time_base);
-        }
-
-        /* Let's cool ! */
-        cooling_cool_part(constants, us, cosmo, hydro_props,
-                          entropy_floor_props, cooling_func, p, xp, dt_cool,
-                          dt_therm);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_cooling);
-}
-
-/**
- *
- */
-void runner_do_star_formation(struct runner *r, struct cell *c, int timer) {
-
-  struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct star_formation *sf_props = e->star_formation;
-  const struct phys_const *phys_const = e->physical_constants;
-  const int count = c->hydro.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const int with_feedback = (e->policy & engine_policy_feedback);
-  const struct hydro_props *restrict hydro_props = e->hydro_properties;
-  const struct unit_system *restrict us = e->internal_units;
-  struct cooling_function_data *restrict cooling = e->cooling_func;
-  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
-  const double time_base = e->time_base;
-  const integertime_t ti_current = e->ti_current;
-  const int current_stars_count = c->stars.count;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != e->nodeID)
-    error("Running star formation task on a foreign node!");
-#endif
-
-  /* Anything to do here? */
-  if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) {
-    star_formation_logger_log_inactive_cell(&c->stars.sfh);
-    return;
-  }
-
-  /* Reset the SFR */
-  star_formation_logger_init(&c->stars.sfh);
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) {
-        /* Load the child cell */
-        struct cell *restrict cp = c->progeny[k];
-
-        /* Do the recursion */
-        runner_do_star_formation(r, cp, 0);
-
-        /* Update current cell using child cells */
-        star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh);
-      }
-  } else {
-
-    /* Loop over the gas particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* Only work on active particles */
-      if (part_is_active(p, e)) {
-
-        /* Is this particle star forming? */
-        if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo,
-                                           hydro_props, us, cooling,
-                                           entropy_floor)) {
-
-          /* Time-step size for this particle */
-          double dt_star;
-          if (with_cosmology) {
-            const integertime_t ti_step = get_integer_timestep(p->time_bin);
-            const integertime_t ti_begin =
-                get_integer_time_begin(ti_current - 1, p->time_bin);
-
-            dt_star =
-                cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-
-          } else {
-            dt_star = get_timestep(p->time_bin, time_base);
-          }
-
-          /* Compute the SF rate of the particle */
-          star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo,
-                                     dt_star);
-
-          /* Add the SFR and SFR*dt to the SFH struct of this cell */
-          star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star);
-
-          /* Are we forming a star particle from this SF rate? */
-          if (star_formation_should_convert_to_star(p, xp, sf_props, e,
-                                                    dt_star)) {
-
-            /* Convert the gas particle to a star particle */
-            struct spart *sp = cell_convert_part_to_spart(e, c, p, xp);
-
-            /* Did we get a star? (Or did we run out of spare ones?) */
-            if (sp != NULL) {
-
-              /* message("We formed a star id=%lld cellID=%d", sp->id,
-               * c->cellID); */
-
-              /* Copy the properties of the gas particle to the star particle */
-              star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo,
-                                             with_cosmology, phys_const,
-                                             hydro_props, us, cooling);
-
-              /* Update the Star formation history */
-              star_formation_logger_log_new_spart(sp, &c->stars.sfh);
-            }
-          }
-
-        } else { /* Are we not star-forming? */
-
-          /* Update the particle to flag it as not star-forming */
-          star_formation_update_part_not_SFR(p, xp, e, sf_props,
-                                             with_cosmology);
-
-        } /* Not Star-forming? */
-
-      } else { /* is active? */
-
-        /* Check if the particle is not inhibited */
-        if (!part_is_inhibited(p, e)) {
-          star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh);
-        }
-      }
-    } /* Loop over particles */
-  }
-
-  /* If we formed any stars, the star sorts are now invalid. We need to
-   * re-compute them. */
-  if (with_feedback && (c == c->top) &&
-      (current_stars_count != c->stars.count)) {
-    cell_set_star_resort_flag(c);
-  }
-
-  if (timer) TIMER_TOC(timer_do_star_formation);
-}
-
-/**
- * @brief Sorts again all the stars in a given cell hierarchy.
- *
- * This is intended to be used after the star formation task has been run
- * to get the cells back into a state where self/pair star tasks can be run.
- *
- * @param r The thread #runner.
- * @param c The top-level cell to run on.
- * @param timer Are we timing this?
- */
-void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != r->e->nodeID) error("Task must be run locally!");
-#endif
-
-  TIMER_TIC;
-
-  /* Did we demand a recalculation of the stars'sorts? */
-  if (cell_get_flag(c, cell_flag_do_stars_resort)) {
-    runner_do_all_stars_sort(r, c);
-    cell_clear_flag(c, cell_flag_do_stars_resort);
-  }
-
-  if (timer) TIMER_TOC(timer_do_stars_resort);
-}
-
-/**
- * @brief Sort the entries in ascending order using QuickSort.
- *
- * @param sort The entries
- * @param N The number of entries.
- */
-void runner_do_sort_ascending(struct sort_entry *sort, int N) {
-
-  struct {
-    short int lo, hi;
-  } qstack[10];
-  int qpos, i, j, lo, hi, imin;
-  struct sort_entry temp;
-  float pivot;
-
-  /* Sort parts in cell_i in decreasing order with quicksort */
-  qstack[0].lo = 0;
-  qstack[0].hi = N - 1;
-  qpos = 0;
-  while (qpos >= 0) {
-    lo = qstack[qpos].lo;
-    hi = qstack[qpos].hi;
-    qpos -= 1;
-    if (hi - lo < 15) {
-      for (i = lo; i < hi; i++) {
-        imin = i;
-        for (j = i + 1; j <= hi; j++)
-          if (sort[j].d < sort[imin].d) imin = j;
-        if (imin != i) {
-          temp = sort[imin];
-          sort[imin] = sort[i];
-          sort[i] = temp;
-        }
-      }
-    } else {
-      pivot = sort[(lo + hi) / 2].d;
-      i = lo;
-      j = hi;
-      while (i <= j) {
-        while (sort[i].d < pivot) i++;
-        while (sort[j].d > pivot) j--;
-        if (i <= j) {
-          if (i < j) {
-            temp = sort[i];
-            sort[i] = sort[j];
-            sort[j] = temp;
-          }
-          i += 1;
-          j -= 1;
-        }
-      }
-      if (j > (lo + hi) / 2) {
-        if (lo < j) {
-          qpos += 1;
-          qstack[qpos].lo = lo;
-          qstack[qpos].hi = j;
-        }
-        if (i < hi) {
-          qpos += 1;
-          qstack[qpos].lo = i;
-          qstack[qpos].hi = hi;
-        }
-      } else {
-        if (i < hi) {
-          qpos += 1;
-          qstack[qpos].lo = i;
-          qstack[qpos].hi = hi;
-        }
-        if (lo < j) {
-          qpos += 1;
-          qstack[qpos].lo = lo;
-          qstack[qpos].hi = j;
-        }
-      }
-    }
-  }
-}
-
-#ifdef SWIFT_DEBUG_CHECKS
-/**
- * @brief Recursively checks that the flags are consistent in a cell hierarchy.
- *
- * Debugging function. Exists in two flavours: hydro & stars.
- */
-#define RUNNER_CHECK_SORTS(TYPE)                                               \
-  void runner_check_sorts_##TYPE(struct cell *c, int flags) {                  \
-                                                                               \
-    if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \
-    if (c->split)                                                              \
-      for (int k = 0; k < 8; k++)                                              \
-        if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0)            \
-          runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted);            \
-  }
-#else
-#define RUNNER_CHECK_SORTS(TYPE)                                       \
-  void runner_check_sorts_##TYPE(struct cell *c, int flags) {          \
-    error("Calling debugging code without debugging flag activated."); \
-  }
-#endif
-
-RUNNER_CHECK_SORTS(hydro)
-RUNNER_CHECK_SORTS(stars)
-
-/**
- * @brief Sort the particles in the given cell along all cardinal directions.
- *
- * @param r The #runner.
- * @param c The #cell.
- * @param flags Cell flag.
- * @param cleanup If true, re-build the sorts for the selected flags instead
- *        of just adding them.
- * @param clock Flag indicating whether to record the timing or not, needed
- *      for recursive calls.
- */
-void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags,
-                          int cleanup, int clock) {
-
-  struct sort_entry *fingers[8];
-  const int count = c->hydro.count;
-  const struct part *parts = c->hydro.parts;
-  struct xpart *xparts = c->hydro.xparts;
-  float buff[8];
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->hydro.super == NULL) error("Task called above the super level!!!");
-#endif
-
-  /* We need to do the local sorts plus whatever was requested further up. */
-  flags |= c->hydro.do_sort;
-  if (cleanup) {
-    c->hydro.sorted = 0;
-  } else {
-    flags &= ~c->hydro.sorted;
-  }
-  if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return;
-
-  /* Check that the particles have been moved to the current time */
-  if (flags && !cell_are_part_drifted(c, r->e))
-    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_hydro(c, c->hydro.sorted);
-
-  /* Make sure the sort flags are consistent (upard). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->hydro.sorted & ~c->hydro.sorted)
-      error("Inconsistent sort flags (upward).");
-  }
-
-  /* Update the sort timer which represents the last time the sorts
-     were re-set. */
-  if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current;
-#endif
-
-  /* Allocate memory for sorting. */
-  cell_malloc_hydro_sorts(c, flags);
-
-  /* Does this cell have any progeny? */
-  if (c->split) {
-
-    /* Fill in the gaps within the progeny. */
-    float dx_max_sort = 0.0f;
-    float dx_max_sort_old = 0.0f;
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-
-        if (c->progeny[k]->hydro.count > 0) {
-
-          /* Only propagate cleanup if the progeny is stale. */
-          runner_do_hydro_sort(
-              r, c->progeny[k], flags,
-              cleanup && (c->progeny[k]->hydro.dx_max_sort_old >
-                          space_maxreldx * c->progeny[k]->dmin),
-              0);
-          dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort);
-          dx_max_sort_old =
-              max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old);
-        } else {
-
-          /* We need to clean up the unused flags that were in case the
-             number of particles in the cell would change */
-          cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
-        }
-      }
-    }
-    c->hydro.dx_max_sort = dx_max_sort;
-    c->hydro.dx_max_sort_old = dx_max_sort_old;
-
-    /* Loop over the 13 different sort arrays. */
-    for (int j = 0; j < 13; j++) {
-
-      /* Has this sort array been flagged? */
-      if (!(flags & (1 << j))) continue;
-
-      /* Init the particle index offsets. */
-      int off[8];
-      off[0] = 0;
-      for (int k = 1; k < 8; k++)
-        if (c->progeny[k - 1] != NULL)
-          off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count;
-        else
-          off[k] = off[k - 1];
-
-      /* Init the entries and indices. */
-      int inds[8];
-      for (int k = 0; k < 8; k++) {
-        inds[k] = k;
-        if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
-          fingers[k] = c->progeny[k]->hydro.sort[j];
-          buff[k] = fingers[k]->d;
-          off[k] = off[k];
-        } else
-          buff[k] = FLT_MAX;
-      }
-
-      /* Sort the buffer. */
-      for (int i = 0; i < 7; i++)
-        for (int k = i + 1; k < 8; k++)
-          if (buff[inds[k]] < buff[inds[i]]) {
-            int temp_i = inds[i];
-            inds[i] = inds[k];
-            inds[k] = temp_i;
-          }
-
-      /* For each entry in the new sort list. */
-      struct sort_entry *finger = c->hydro.sort[j];
-      for (int ind = 0; ind < count; ind++) {
-
-        /* Copy the minimum into the new sort array. */
-        finger[ind].d = buff[inds[0]];
-        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
-
-        /* Update the buffer. */
-        fingers[inds[0]] += 1;
-        buff[inds[0]] = fingers[inds[0]]->d;
-
-        /* Find the smallest entry. */
-        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
-          int temp_i = inds[k - 1];
-          inds[k - 1] = inds[k];
-          inds[k] = temp_i;
-        }
-
-      } /* Merge. */
-
-      /* Add a sentinel. */
-      c->hydro.sort[j][count].d = FLT_MAX;
-      c->hydro.sort[j][count].i = 0;
-
-      /* Mark as sorted. */
-      atomic_or(&c->hydro.sorted, 1 << j);
-
-    } /* loop over sort arrays. */
-
-  } /* progeny? */
-
-  /* Otherwise, just sort. */
-  else {
-
-    /* Reset the sort distance */
-    if (c->hydro.sorted == 0) {
-#ifdef SWIFT_DEBUG_CHECKS
-      if (xparts != NULL && c->nodeID != engine_rank)
-        error("Have non-NULL xparts in foreign cell");
-#endif
-
-      /* And the individual sort distances if we are a local cell */
-      if (xparts != NULL) {
-        for (int k = 0; k < count; k++) {
-          xparts[k].x_diff_sort[0] = 0.0f;
-          xparts[k].x_diff_sort[1] = 0.0f;
-          xparts[k].x_diff_sort[2] = 0.0f;
-        }
-      }
-      c->hydro.dx_max_sort_old = 0.f;
-      c->hydro.dx_max_sort = 0.f;
-    }
-
-    /* Fill the sort array. */
-    for (int k = 0; k < count; k++) {
-      const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]};
-      for (int j = 0; j < 13; j++)
-        if (flags & (1 << j)) {
-          c->hydro.sort[j][k].i = k;
-          c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] +
-                                  px[1] * runner_shift[j][1] +
-                                  px[2] * runner_shift[j][2];
-        }
-    }
-
-    /* Add the sentinel and sort. */
-    for (int j = 0; j < 13; j++)
-      if (flags & (1 << j)) {
-        c->hydro.sort[j][count].d = FLT_MAX;
-        c->hydro.sort[j][count].i = 0;
-        runner_do_sort_ascending(c->hydro.sort[j], count);
-        atomic_or(&c->hydro.sorted, 1 << j);
-      }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify the sorting. */
-  for (int j = 0; j < 13; j++) {
-    if (!(flags & (1 << j))) continue;
-    struct sort_entry *finger = c->hydro.sort[j];
-    for (int k = 1; k < count; k++) {
-      if (finger[k].d < finger[k - 1].d)
-        error("Sorting failed, ascending array.");
-      if (finger[k].i >= count) error("Sorting failed, indices borked.");
-    }
-  }
-
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_hydro(c, flags);
-
-  /* Make sure the sort flags are consistent (upward). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->hydro.sorted & ~c->hydro.sorted)
-      error("Inconsistent sort flags.");
-  }
-#endif
-
-  /* Clear the cell's sort flags. */
-  c->hydro.do_sort = 0;
-  cell_clear_flag(c, cell_flag_do_hydro_sub_sort);
-  c->hydro.requires_sorts = 0;
-
-  if (clock) TIMER_TOC(timer_dosort);
-}
-
-/**
- * @brief Sort the stars particles in the given cell along all cardinal
- * directions.
- *
- * @param r The #runner.
- * @param c The #cell.
- * @param flags Cell flag.
- * @param cleanup If true, re-build the sorts for the selected flags instead
- *        of just adding them.
- * @param clock Flag indicating whether to record the timing or not, needed
- *      for recursive calls.
- */
-void runner_do_stars_sort(struct runner *r, struct cell *c, int flags,
-                          int cleanup, int clock) {
-
-  struct sort_entry *fingers[8];
-  const int count = c->stars.count;
-  struct spart *sparts = c->stars.parts;
-  float buff[8];
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->hydro.super == NULL) error("Task called above the super level!!!");
-#endif
-
-  /* We need to do the local sorts plus whatever was requested further up. */
-  flags |= c->stars.do_sort;
-  if (cleanup) {
-    c->stars.sorted = 0;
-  } else {
-    flags &= ~c->stars.sorted;
-  }
-  if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return;
-
-  /* Check that the particles have been moved to the current time */
-  if (flags && !cell_are_spart_drifted(c, r->e)) {
-    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_stars(c, c->stars.sorted);
-
-  /* Make sure the sort flags are consistent (upward). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->stars.sorted & ~c->stars.sorted)
-      error("Inconsistent sort flags (upward).");
-  }
-
-  /* Update the sort timer which represents the last time the sorts
-     were re-set. */
-  if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current;
-#endif
-
-  /* start by allocating the entry arrays in the requested dimensions. */
-  cell_malloc_stars_sorts(c, flags);
-
-  /* Does this cell have any progeny? */
-  if (c->split) {
-
-    /* Fill in the gaps within the progeny. */
-    float dx_max_sort = 0.0f;
-    float dx_max_sort_old = 0.0f;
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-
-        if (c->progeny[k]->stars.count > 0) {
-
-          /* Only propagate cleanup if the progeny is stale. */
-          const int cleanup_prog =
-              cleanup && (c->progeny[k]->stars.dx_max_sort_old >
-                          space_maxreldx * c->progeny[k]->dmin);
-          runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0);
-          dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort);
-          dx_max_sort_old =
-              max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old);
-        } else {
-
-          /* We need to clean up the unused flags that were in case the
-             number of particles in the cell would change */
-          cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
-        }
-      }
-    }
-    c->stars.dx_max_sort = dx_max_sort;
-    c->stars.dx_max_sort_old = dx_max_sort_old;
-
-    /* Loop over the 13 different sort arrays. */
-    for (int j = 0; j < 13; j++) {
-
-      /* Has this sort array been flagged? */
-      if (!(flags & (1 << j))) continue;
-
-      /* Init the particle index offsets. */
-      int off[8];
-      off[0] = 0;
-      for (int k = 1; k < 8; k++)
-        if (c->progeny[k - 1] != NULL)
-          off[k] = off[k - 1] + c->progeny[k - 1]->stars.count;
-        else
-          off[k] = off[k - 1];
-
-      /* Init the entries and indices. */
-      int inds[8];
-      for (int k = 0; k < 8; k++) {
-        inds[k] = k;
-        if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
-          fingers[k] = c->progeny[k]->stars.sort[j];
-          buff[k] = fingers[k]->d;
-          off[k] = off[k];
-        } else
-          buff[k] = FLT_MAX;
-      }
-
-      /* Sort the buffer. */
-      for (int i = 0; i < 7; i++)
-        for (int k = i + 1; k < 8; k++)
-          if (buff[inds[k]] < buff[inds[i]]) {
-            int temp_i = inds[i];
-            inds[i] = inds[k];
-            inds[k] = temp_i;
-          }
-
-      /* For each entry in the new sort list. */
-      struct sort_entry *finger = c->stars.sort[j];
-      for (int ind = 0; ind < count; ind++) {
-
-        /* Copy the minimum into the new sort array. */
-        finger[ind].d = buff[inds[0]];
-        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
-
-        /* Update the buffer. */
-        fingers[inds[0]] += 1;
-        buff[inds[0]] = fingers[inds[0]]->d;
-
-        /* Find the smallest entry. */
-        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
-          int temp_i = inds[k - 1];
-          inds[k - 1] = inds[k];
-          inds[k] = temp_i;
-        }
-
-      } /* Merge. */
-
-      /* Add a sentinel. */
-      c->stars.sort[j][count].d = FLT_MAX;
-      c->stars.sort[j][count].i = 0;
-
-      /* Mark as sorted. */
-      atomic_or(&c->stars.sorted, 1 << j);
-
-    } /* loop over sort arrays. */
-
-  } /* progeny? */
-
-  /* Otherwise, just sort. */
-  else {
-
-    /* Reset the sort distance */
-    if (c->stars.sorted == 0) {
-
-      /* And the individual sort distances if we are a local cell */
-      for (int k = 0; k < count; k++) {
-        sparts[k].x_diff_sort[0] = 0.0f;
-        sparts[k].x_diff_sort[1] = 0.0f;
-        sparts[k].x_diff_sort[2] = 0.0f;
-      }
-      c->stars.dx_max_sort_old = 0.f;
-      c->stars.dx_max_sort = 0.f;
-    }
-
-    /* Fill the sort array. */
-    for (int k = 0; k < count; k++) {
-      const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]};
-      for (int j = 0; j < 13; j++)
-        if (flags & (1 << j)) {
-          c->stars.sort[j][k].i = k;
-          c->stars.sort[j][k].d = px[0] * runner_shift[j][0] +
-                                  px[1] * runner_shift[j][1] +
-                                  px[2] * runner_shift[j][2];
-        }
-    }
-
-    /* Add the sentinel and sort. */
-    for (int j = 0; j < 13; j++)
-      if (flags & (1 << j)) {
-        c->stars.sort[j][count].d = FLT_MAX;
-        c->stars.sort[j][count].i = 0;
-        runner_do_sort_ascending(c->stars.sort[j], count);
-        atomic_or(&c->stars.sorted, 1 << j);
-      }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify the sorting. */
-  for (int j = 0; j < 13; j++) {
-    if (!(flags & (1 << j))) continue;
-    struct sort_entry *finger = c->stars.sort[j];
-    for (int k = 1; k < count; k++) {
-      if (finger[k].d < finger[k - 1].d)
-        error("Sorting failed, ascending array.");
-      if (finger[k].i >= count) error("Sorting failed, indices borked.");
-    }
-  }
-
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_stars(c, flags);
-
-  /* Make sure the sort flags are consistent (upward). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->stars.sorted & ~c->stars.sorted)
-      error("Inconsistent sort flags.");
-  }
-#endif
-
-  /* Clear the cell's sort flags. */
-  c->stars.do_sort = 0;
-  cell_clear_flag(c, cell_flag_do_stars_sub_sort);
-  c->stars.requires_sorts = 0;
-
-  if (clock) TIMER_TOC(timer_do_stars_sort);
-}
-
-/**
- * @brief Recurse into a cell until reaching the super level and call
- * the hydro sorting function there.
- *
- * This function must be called at or above the super level!
- *
- * This function will sort the particles in all 13 directions.
- *
- * @param r the #runner.
- * @param c the #cell.
- */
-void runner_do_all_hydro_sort(struct runner *r, struct cell *c) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
-#endif
-
-  if (!cell_is_active_hydro(c, r->e)) return;
-
-  /* Shall we sort at this level? */
-  if (c->hydro.super == c) {
-
-    /* Sort everything */
-    runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
-
-  } else {
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (c->hydro.super != NULL) error("Function called below the super level!");
-#endif
-
-    /* Ok, then, let's try lower */
-    if (c->split) {
-      for (int k = 0; k < 8; ++k) {
-        if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]);
-      }
-    } else {
-#ifdef SWIFT_DEBUG_CHECKS
-      error("Reached a leaf without encountering a hydro super cell!");
-#endif
-    }
-  }
-}
-
-/**
- * @brief Recurse into a cell until reaching the super level and call
- * the star sorting function there.
- *
- * This function must be called at or above the super level!
- *
- * This function will sort the particles in all 13 directions.
- *
- * @param r the #runner.
- * @param c the #cell.
- */
-void runner_do_all_stars_sort(struct runner *r, struct cell *c) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
-#endif
-
-  if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return;
-
-  /* Shall we sort at this level? */
-  if (c->hydro.super == c) {
-
-    /* Sort everything */
-    runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
-
-  } else {
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (c->hydro.super != NULL) error("Function called below the super level!");
-#endif
-
-    /* Ok, then, let's try lower */
-    if (c->split) {
-      for (int k = 0; k < 8; ++k) {
-        if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]);
-      }
-    } else {
-#ifdef SWIFT_DEBUG_CHECKS
-      error("Reached a leaf without encountering a hydro super cell!");
-#endif
-    }
-  }
-}
-
-/**
- * @brief Initialize the multipoles before the gravity calculation.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_init_grav(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!(e->policy & engine_policy_self_gravity))
-    error("Grav-init task called outside of self-gravity calculation");
-#endif
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Reset the gravity acceleration tensors */
-  gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current);
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_init_grav);
-}
-
-/**
- * @brief Intermediate task after the gradient loop that does final operations
- * on the gradient quantities and optionally slope limits the gradients
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) {
-
-#ifdef EXTRA_HYDRO_LOOP
-
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int count = c->hydro.count;
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const double time_base = e->time_base;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int i = 0; i < count; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct part *restrict p = &parts[i];
-      struct xpart *restrict xp = &xparts[i];
-
-      if (part_is_active(p, e)) {
-
-        /* Finish the gradient calculation */
-        hydro_end_gradient(p);
-
-        /* As of here, particle force variables will be set. */
-
-        /* Calculate the time-step for passing to hydro_prepare_force.
-         * This is the physical time between the start and end of the time-step
-         * without any scale-factor powers. */
-        double dt_alpha;
-
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(p->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(ti_current - 1, p->time_bin);
-
-          dt_alpha =
-              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-        } else {
-          dt_alpha = get_timestep(p->time_bin, time_base);
-        }
-
-        /* Compute variables required for the force loop */
-        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
-
-        /* The particle force values are now set.  Do _NOT_
-           try to read any particle density variables! */
-
-        /* Prepare the particle for the force loop over neighbours */
-        hydro_reset_acceleration(p);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_extra_ghost);
-
-#else
-  error("SWIFT was not compiled with the extra hydro loop activated.");
-#endif
-}
-
-/**
- * @brief Intermediate task after the density to check that the smoothing
- * lengths are correct.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
-
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const struct engine *e = r->e;
-  const struct space *s = e->s;
-  const struct hydro_space *hs = &s->hs;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct chemistry_global_data *chemistry = e->chemistry;
-
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-
-  const float hydro_h_max = e->hydro_properties->h_max;
-  const float hydro_h_min = e->hydro_properties->h_min;
-  const float eps = e->hydro_properties->h_tolerance;
-  const float hydro_eta_dim =
-      pow_dimension(e->hydro_properties->eta_neighbours);
-  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
-  int redo = 0, count = 0;
-
-  /* Running value of the maximal smoothing length */
-  double h_max = c->hydro.h_max;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (c->hydro.count == 0) return;
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        runner_do_ghost(r, c->progeny[k], 0);
-
-        /* Update h_max */
-        h_max = max(h_max, c->progeny[k]->hydro.h_max);
-      }
-    }
-  } else {
-
-    /* Init the list of active particles that have to be updated and their
-     * current smoothing lengths. */
-    int *pid = NULL;
-    float *h_0 = NULL;
-    float *left = NULL;
-    float *right = NULL;
-    if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for pid.");
-    if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for h_0.");
-    if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for left.");
-    if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for right.");
-    for (int k = 0; k < c->hydro.count; k++)
-      if (part_is_active(&parts[k], e)) {
-        pid[count] = k;
-        h_0[count] = parts[k].h;
-        left[count] = 0.f;
-        right[count] = hydro_h_max;
-        ++count;
-      }
-
-    /* While there are particles that need to be updated... */
-    for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter;
-         num_reruns++) {
-
-      /* Reset the redo-count. */
-      redo = 0;
-
-      /* Loop over the remaining active parts in this cell. */
-      for (int i = 0; i < count; i++) {
-
-        /* Get a direct pointer on the part. */
-        struct part *p = &parts[pid[i]];
-        struct xpart *xp = &xparts[pid[i]];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Is this part within the timestep? */
-        if (!part_is_active(p, e)) error("Ghost applied to inactive particle");
-#endif
-
-        /* Get some useful values */
-        const float h_init = h_0[i];
-        const float h_old = p->h;
-        const float h_old_dim = pow_dimension(h_old);
-        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
-
-        float h_new;
-        int has_no_neighbours = 0;
-
-        if (p->density.wcount == 0.f) { /* No neighbours case */
-
-          /* Flag that there were no neighbours */
-          has_no_neighbours = 1;
-
-          /* Double h and try again */
-          h_new = 2.f * h_old;
-
-        } else {
-
-          /* Finish the density calculation */
-          hydro_end_density(p, cosmo);
-          chemistry_end_density(p, chemistry, cosmo);
-          pressure_floor_end_density(p, cosmo);
-
-          /* Compute one step of the Newton-Raphson scheme */
-          const float n_sum = p->density.wcount * h_old_dim;
-          const float n_target = hydro_eta_dim;
-          const float f = n_sum - n_target;
-          const float f_prime =
-              p->density.wcount_dh * h_old_dim +
-              hydro_dimension * p->density.wcount * h_old_dim_minus_one;
-
-          /* Improve the bisection bounds */
-          if (n_sum < n_target)
-            left[i] = max(left[i], h_old);
-          else if (n_sum > n_target)
-            right[i] = min(right[i], h_old);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Check the validity of the left and right bounds */
-          if (left[i] > right[i])
-            error("Invalid left (%e) and right (%e)", left[i], right[i]);
-#endif
-
-          /* Skip if h is already h_max and we don't have enough neighbours */
-          /* Same if we are below h_min */
-          if (((p->h >= hydro_h_max) && (f < 0.f)) ||
-              ((p->h <= hydro_h_min) && (f > 0.f))) {
-
-          /* We have a particle whose smoothing length is already set (wants
-           * to be larger but has already hit the maximum OR wants to be
-           * smaller but has already reached the minimum). So, just tidy up as
-           * if the smoothing length had converged correctly  */
-
-#ifdef EXTRA_HYDRO_LOOP
-
-            /* As of here, particle gradient variables will be set. */
-            /* The force variables are set in the extra ghost. */
-
-            /* Compute variables required for the gradient loop */
-            hydro_prepare_gradient(p, xp, cosmo);
-
-            /* The particle gradient values are now set.  Do _NOT_
-               try to read any particle density variables! */
-
-            /* Prepare the particle for the gradient loop over neighbours */
-            hydro_reset_gradient(p);
-
-#else
-            const struct hydro_props *hydro_props = e->hydro_properties;
-
-            /* Calculate the time-step for passing to hydro_prepare_force, used
-             * for the evolution of alpha factors (i.e. those involved in the
-             * artificial viscosity and thermal conduction terms) */
-            const double time_base = e->time_base;
-            const integertime_t ti_current = e->ti_current;
-            double dt_alpha;
-
-            if (with_cosmology) {
-              const integertime_t ti_step = get_integer_timestep(p->time_bin);
-              const integertime_t ti_begin =
-                  get_integer_time_begin(ti_current - 1, p->time_bin);
-
-              dt_alpha =
-                  cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-            } else {
-              dt_alpha = get_timestep(p->time_bin, time_base);
-            }
-
-            /* As of here, particle force variables will be set. */
-
-            /* Compute variables required for the force loop */
-            hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
-
-            /* The particle force values are now set.  Do _NOT_
-               try to read any particle density variables! */
-
-            /* Prepare the particle for the force loop over neighbours */
-            hydro_reset_acceleration(p);
-
-#endif /* EXTRA_HYDRO_LOOP */
-
-            /* Ok, we are done with this particle */
-            continue;
-          }
-
-          /* Normal case: Use Newton-Raphson to get a better value of h */
-
-          /* Avoid floating point exception from f_prime = 0 */
-          h_new = h_old - f / (f_prime + FLT_MIN);
-
-          /* Be verbose about the particles that struggle to converge */
-          if (num_reruns > max_smoothing_iter - 10) {
-
-            message(
-                "Smoothing length convergence problem: iter=%d p->id=%lld "
-                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
-                num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum,
-                n_target, left[i], right[i]);
-          }
-
-#ifdef SWIFT_DEBUG_CHECKS
-          if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old))
-            error(
-                "Smoothing length correction not going in the right direction");
-#endif
-
-          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
-          h_new = min(h_new, 2.f * h_old);
-          h_new = max(h_new, 0.5f * h_old);
-
-          /* Verify that we are actually progrssing towards the answer */
-          h_new = max(h_new, left[i]);
-          h_new = min(h_new, right[i]);
-        }
-
-        /* Check whether the particle has an inappropriate smoothing length */
-        if (fabsf(h_new - h_old) > eps * h_old) {
-
-          /* Ok, correct then */
-
-          /* Case where we have been oscillating around the solution */
-          if ((h_new == left[i] && h_old == right[i]) ||
-              (h_old == left[i] && h_new == right[i])) {
-
-            /* Bissect the remaining interval */
-            p->h = pow_inv_dimension(
-                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
-
-          } else {
-
-            /* Normal case */
-            p->h = h_new;
-          }
-
-          /* If within the allowed range, try again */
-          if (p->h < hydro_h_max && p->h > hydro_h_min) {
-
-            /* Flag for another round of fun */
-            pid[redo] = pid[i];
-            h_0[redo] = h_0[i];
-            left[redo] = left[i];
-            right[redo] = right[i];
-            redo += 1;
-
-            /* Re-initialise everything */
-            hydro_init_part(p, hs);
-            chemistry_init_part(p, chemistry);
-            pressure_floor_init_part(p, xp);
-            tracers_after_init(p, xp, e->internal_units, e->physical_constants,
-                               with_cosmology, e->cosmology,
-                               e->hydro_properties, e->cooling_func, e->time);
-
-            /* Off we go ! */
-            continue;
-
-          } else if (p->h <= hydro_h_min) {
-
-            /* Ok, this particle is a lost cause... */
-            p->h = hydro_h_min;
-
-          } else if (p->h >= hydro_h_max) {
-
-            /* Ok, this particle is a lost cause... */
-            p->h = hydro_h_max;
-
-            /* Do some damage control if no neighbours at all were found */
-            if (has_no_neighbours) {
-              hydro_part_has_no_neighbours(p, xp, cosmo);
-              chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo);
-              pressure_floor_part_has_no_neighbours(p, xp, cosmo);
-            }
-
-          } else {
-            error(
-                "Fundamental problem with the smoothing length iteration "
-                "logic.");
-          }
-        }
-
-        /* We now have a particle whose smoothing length has converged */
-
-        /* Check if h_max is increased */
-        h_max = max(h_max, p->h);
-
-#ifdef EXTRA_HYDRO_LOOP
-
-        /* As of here, particle gradient variables will be set. */
-        /* The force variables are set in the extra ghost. */
-
-        /* Compute variables required for the gradient loop */
-        hydro_prepare_gradient(p, xp, cosmo);
-
-        /* The particle gradient values are now set.  Do _NOT_
-           try to read any particle density variables! */
-
-        /* Prepare the particle for the gradient loop over neighbours */
-        hydro_reset_gradient(p);
-
-#else
-        const struct hydro_props *hydro_props = e->hydro_properties;
-
-        /* Calculate the time-step for passing to hydro_prepare_force, used for
-         * the evolution of alpha factors (i.e. those involved in the artificial
-         * viscosity and thermal conduction terms) */
-        const double time_base = e->time_base;
-        const integertime_t ti_current = e->ti_current;
-        double dt_alpha;
-
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(p->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(ti_current - 1, p->time_bin);
-
-          dt_alpha =
-              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-        } else {
-          dt_alpha = get_timestep(p->time_bin, time_base);
-        }
-
-        /* As of here, particle force variables will be set. */
-
-        /* Compute variables required for the force loop */
-        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
-
-        /* The particle force values are now set.  Do _NOT_
-           try to read any particle density variables! */
-
-        /* Prepare the particle for the force loop over neighbours */
-        hydro_reset_acceleration(p);
-
-#endif /* EXTRA_HYDRO_LOOP */
-      }
-
-      /* We now need to treat the particles whose smoothing length had not
-       * converged again */
-
-      /* Re-set the counter for the next loop (potentially). */
-      count = redo;
-      if (count > 0) {
-
-        /* Climb up the cell hierarchy. */
-        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
-
-          /* Run through this cell's density interactions. */
-          for (struct link *l = finger->hydro.density; l != NULL; l = l->next) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-            if (l->t->ti_run < r->e->ti_current)
-              error("Density task should have been run.");
-#endif
-
-            /* Self-interaction? */
-            if (l->t->type == task_type_self)
-              runner_doself_subset_branch_density(r, finger, parts, pid, count);
-
-            /* Otherwise, pair interaction? */
-            else if (l->t->type == task_type_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dopair_subset_branch_density(r, finger, parts, pid,
-                                                    count, l->t->cj);
-              else
-                runner_dopair_subset_branch_density(r, finger, parts, pid,
-                                                    count, l->t->ci);
-            }
-
-            /* Otherwise, sub-self interaction? */
-            else if (l->t->type == task_type_sub_self)
-              runner_dosub_subset_density(r, finger, parts, pid, count, NULL,
-                                          1);
-
-            /* Otherwise, sub-pair interaction? */
-            else if (l->t->type == task_type_sub_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dosub_subset_density(r, finger, parts, pid, count,
-                                            l->t->cj, 1);
-              else
-                runner_dosub_subset_density(r, finger, parts, pid, count,
-                                            l->t->ci, 1);
-            }
-          }
-        }
-      }
-    }
-
-    if (count) {
-      error("Smoothing length failed to converge on %i particles.", count);
-    }
-
-    /* Be clean */
-    free(left);
-    free(right);
-    free(pid);
-    free(h_0);
-  }
-
-  /* Update h_max */
-  c->hydro.h_max = h_max;
-
-  /* The ghost may not always be at the top level.
-   * Therefore we need to update h_max between the super- and top-levels */
-  if (c->hydro.ghost) {
-    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
-      atomic_max_d(&tmp->hydro.h_max, h_max);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_ghost);
-}
-
-/**
- * @brief Drift all part in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_part(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_part(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_part);
-}
-
-/**
- * @brief Drift all gpart in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_gpart(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_gpart);
-}
-
-/**
- * @brief Drift all spart in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_spart(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_spart);
-}
-
-/**
- * @brief Drift all bpart in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_bpart(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_bpart);
-}
-
-/**
- * @brief Perform the first half-kick on all the active particles in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  struct gpart *restrict gparts = c->grav.parts;
-  struct spart *restrict sparts = c->stars.parts;
-  const int count = c->hydro.count;
-  const int gcount = c->grav.count;
-  const int scount = c->stars.count;
-  const integertime_t ti_current = e->ti_current;
-  const double time_base = e->time_base;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) &&
-      !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e))
-    return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs to be kicked */
-      if (part_is_starting(p, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (p->wakeup == time_bin_awake)
-          error("Woken-up particle that has not been processed in kick1");
-#endif
-
-        /* Skip particles that have been woken up and treated by the limiter. */
-        if (p->wakeup != time_bin_not_awake) continue;
-
-        const integertime_t ti_step = get_integer_timestep(p->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current + 1, p->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end = ti_begin + ti_step;
-
-        if (ti_begin != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
-        if (with_cosmology) {
-          dt_kick_hydro = cosmology_get_hydro_kick_factor(
-              cosmo, ti_begin, ti_begin + ti_step / 2);
-          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-          dt_kick_therm = cosmology_get_therm_kick_factor(
-              cosmo, ti_begin, ti_begin + ti_step / 2);
-          dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-        } else {
-          dt_kick_hydro = (ti_step / 2) * time_base;
-          dt_kick_grav = (ti_step / 2) * time_base;
-          dt_kick_therm = (ti_step / 2) * time_base;
-          dt_kick_corr = (ti_step / 2) * time_base;
-        }
-
-        /* do the kick */
-        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
-                  dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin,
-                  ti_begin + ti_step / 2);
-
-        /* Update the accelerations to be used in the drift for hydro */
-        if (p->gpart != NULL) {
-
-          xp->a_grav[0] = p->gpart->a_grav[0];
-          xp->a_grav[1] = p->gpart->a_grav[1];
-          xp->a_grav[2] = p->gpart->a_grav[2];
-        }
-      }
-    }
-
-    /* Loop over the gparts in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the part. */
-      struct gpart *restrict gp = &gparts[k];
-
-      /* If the g-particle has no counterpart and needs to be kicked */
-      if ((gp->type == swift_type_dark_matter ||
-           gp->type == swift_type_dark_matter_background) &&
-          gpart_is_starting(gp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current + 1, gp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current + 1, gp->time_bin);
-
-        if (ti_begin != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, gp->time_bin, ti_current);
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* do the kick */
-        kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
-      }
-    }
-
-    /* Loop over the stars particles in this cell. */
-    for (int k = 0; k < scount; k++) {
-
-      /* Get a handle on the s-part. */
-      struct spart *restrict sp = &sparts[k];
-
-      /* If particle needs to be kicked */
-      if (spart_is_starting(sp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current + 1, sp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current + 1, sp->time_bin);
-
-        if (ti_begin != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, sp->time_bin, ti_current);
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* do the kick */
-        kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_kick1);
-}
-
-/**
- * @brief Perform the second half-kick on all the active particles in a cell.
- *
- * Also prepares particles to be drifted.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const int count = c->hydro.count;
-  const int gcount = c->grav.count;
-  const int scount = c->stars.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  struct gpart *restrict gparts = c->grav.parts;
-  struct spart *restrict sparts = c->stars.parts;
-  const integertime_t ti_current = e->ti_current;
-  const double time_base = e->time_base;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
-      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e))
-    return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs to be kicked */
-      if (part_is_active(p, e)) {
-
-        integertime_t ti_begin, ti_end, ti_step;
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (p->wakeup == time_bin_awake)
-          error("Woken-up particle that has not been processed in kick1");
-#endif
-
-        if (p->wakeup == time_bin_not_awake) {
-
-          /* Time-step from a regular kick */
-          ti_step = get_integer_timestep(p->time_bin);
-          ti_begin = get_integer_time_begin(ti_current, p->time_bin);
-          ti_end = ti_begin + ti_step;
-
-        } else {
-
-          /* Time-step that follows a wake-up call */
-          ti_begin = get_integer_time_begin(ti_current, p->wakeup);
-          ti_end = get_integer_time_end(ti_current, p->time_bin);
-          ti_step = ti_end - ti_begin;
-
-          /* Reset the flag. Everything is back to normal from now on. */
-          p->wakeup = time_bin_awake;
-        }
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (ti_begin + ti_step != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
-              "time_bin=%d wakeup=%d ti_current=%lld",
-              ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
-#endif
-        /* Time interval for this half-kick */
-        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
-        if (with_cosmology) {
-          dt_kick_hydro = cosmology_get_hydro_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-          dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-          dt_kick_therm = cosmology_get_therm_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-          dt_kick_corr = cosmology_get_corr_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-        } else {
-          dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-          dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-          dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-          dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-        }
-
-        /* Finish the time-step with a second half-kick */
-        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
-                  dt_kick_corr, cosmo, hydro_props, entropy_floor,
-                  ti_begin + ti_step / 2, ti_end);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that kick and the drift are synchronized */
-        if (p->ti_drift != p->ti_kick) error("Error integrating part in time.");
-#endif
-
-        /* Prepare the values to be drifted */
-        hydro_reset_predicted_values(p, xp, cosmo);
-      }
-    }
-
-    /* Loop over the g-particles in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the part. */
-      struct gpart *restrict gp = &gparts[k];
-
-      /* If the g-particle has no counterpart and needs to be kicked */
-      if ((gp->type == swift_type_dark_matter ||
-           gp->type == swift_type_dark_matter_background) &&
-          gpart_is_active(gp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current, gp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (ti_begin + ti_step != ti_current)
-          error("Particle in wrong time-bin");
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* Finish the time-step with a second half-kick */
-        kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2,
-                   ti_begin + ti_step);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that kick and the drift are synchronized */
-        if (gp->ti_drift != gp->ti_kick)
-          error("Error integrating g-part in time.");
-#endif
-
-        /* Prepare the values to be drifted */
-        gravity_reset_predicted_values(gp);
-      }
-    }
-
-    /* Loop over the particles in this cell. */
-    for (int k = 0; k < scount; k++) {
-
-      /* Get a handle on the part. */
-      struct spart *restrict sp = &sparts[k];
-
-      /* If particle needs to be kicked */
-      if (spart_is_active(sp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current, sp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (ti_begin + ti_step != ti_current)
-          error("Particle in wrong time-bin");
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* Finish the time-step with a second half-kick */
-        kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2,
-                   ti_begin + ti_step);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that kick and the drift are synchronized */
-        if (sp->ti_drift != sp->ti_kick)
-          error("Error integrating s-part in time.");
-#endif
-
-        /* Prepare the values to be drifted */
-        stars_reset_predicted_values(sp);
-      }
-    }
-  }
-  if (timer) TIMER_TOC(timer_kick2);
-}
-
-/**
- * @brief Computes the next time-step of all active particles in this cell
- * and update the cell's statistics.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const int count = c->hydro.count;
-  const int gcount = c->grav.count;
-  const int scount = c->stars.count;
-  const int bcount = c->black_holes.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  struct gpart *restrict gparts = c->grav.parts;
-  struct spart *restrict sparts = c->stars.parts;
-  struct bpart *restrict bparts = c->black_holes.parts;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
-      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) {
-    c->hydro.updated = 0;
-    c->grav.updated = 0;
-    c->stars.updated = 0;
-    c->black_holes.updated = 0;
-    return;
-  }
-
-  int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
-                ti_gravity_beg_max = 0;
-  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
-                ti_stars_beg_max = 0;
-  integertime_t ti_black_holes_end_min = max_nr_timesteps,
-                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
-
-  /* No children? */
-  if (!c->split) {
-
-    /* Loop over the particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs updating */
-      if (part_is_active(p, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Current end of time-step */
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current, p->time_bin);
-
-        if (ti_end != ti_current)
-          error("Computing time-step of rogue particle.");
-#endif
-
-        /* Get new time-step */
-        const integertime_t ti_new_step = get_part_timestep(p, xp, e);
-
-        /* Update particle */
-        p->time_bin = get_time_bin(ti_new_step);
-        if (p->gpart != NULL) p->gpart->time_bin = p->time_bin;
-
-        /* Update the tracers properties */
-        tracers_after_timestep(p, xp, e->internal_units, e->physical_constants,
-                               with_cosmology, e->cosmology,
-                               e->hydro_properties, e->cooling_func, e->time);
-
-        /* Number of updated particles */
-        updated++;
-        if (p->gpart != NULL) g_updated++;
-
-        /* What is the next sync-point ? */
-        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
-        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
-
-        if (p->gpart != NULL) {
-
-          /* What is the next sync-point ? */
-          ti_gravity_end_min =
-              min(ti_current + ti_new_step, ti_gravity_end_min);
-          ti_gravity_end_max =
-              max(ti_current + ti_new_step, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-        }
-      }
-
-      else { /* part is inactive */
-
-        if (!part_is_inhibited(p, e)) {
-
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, p->time_bin);
-
-          const integertime_t ti_beg =
-              get_integer_time_begin(ti_current + 1, p->time_bin);
-
-          /* What is the next sync-point ? */
-          ti_hydro_end_min = min(ti_end, ti_hydro_end_min);
-          ti_hydro_end_max = max(ti_end, ti_hydro_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max);
-
-          if (p->gpart != NULL) {
-
-            /* What is the next sync-point ? */
-            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-            /* What is the next starting point for this cell ? */
-            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-          }
-        }
-      }
-    }
-
-    /* Loop over the g-particles in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the part. */
-      struct gpart *restrict gp = &gparts[k];
-
-      /* If the g-particle has no counterpart */
-      if (gp->type == swift_type_dark_matter ||
-          gp->type == swift_type_dark_matter_background) {
-
-        /* need to be updated ? */
-        if (gpart_is_active(gp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Current end of time-step */
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, gp->time_bin);
-
-          if (ti_end != ti_current)
-            error("Computing time-step of rogue particle.");
-#endif
-
-          /* Get new time-step */
-          const integertime_t ti_new_step = get_gpart_timestep(gp, e);
-
-          /* Update particle */
-          gp->time_bin = get_time_bin(ti_new_step);
-
-          /* Number of updated g-particles */
-          g_updated++;
-
-          /* What is the next sync-point ? */
-          ti_gravity_end_min =
-              min(ti_current + ti_new_step, ti_gravity_end_min);
-          ti_gravity_end_max =
-              max(ti_current + ti_new_step, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-
-        } else { /* gpart is inactive */
-
-          if (!gpart_is_inhibited(gp, e)) {
-
-            const integertime_t ti_end =
-                get_integer_time_end(ti_current, gp->time_bin);
-
-            /* What is the next sync-point ? */
-            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-            const integertime_t ti_beg =
-                get_integer_time_begin(ti_current + 1, gp->time_bin);
-
-            /* What is the next starting point for this cell ? */
-            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-          }
-        }
-      }
-    }
-
-    /* Loop over the star particles in this cell. */
-    for (int k = 0; k < scount; k++) {
-
-      /* Get a handle on the part. */
-      struct spart *restrict sp = &sparts[k];
-
-      /* need to be updated ? */
-      if (spart_is_active(sp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Current end of time-step */
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current, sp->time_bin);
-
-        if (ti_end != ti_current)
-          error("Computing time-step of rogue particle.");
-#endif
-        /* Get new time-step */
-        const integertime_t ti_new_step = get_spart_timestep(sp, e);
-
-        /* Update particle */
-        sp->time_bin = get_time_bin(ti_new_step);
-        sp->gpart->time_bin = get_time_bin(ti_new_step);
-
-        /* Number of updated s-particles */
-        s_updated++;
-        g_updated++;
-
-        ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min);
-        ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max);
-        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
-        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_stars_beg_max = max(ti_current, ti_stars_beg_max);
-        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-
-        /* star particle is inactive but not inhibited */
-      } else {
-
-        if (!spart_is_inhibited(sp, e)) {
-
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, sp->time_bin);
-
-          const integertime_t ti_beg =
-              get_integer_time_begin(ti_current + 1, sp->time_bin);
-
-          ti_stars_end_min = min(ti_end, ti_stars_end_min);
-          ti_stars_end_max = max(ti_end, ti_stars_end_max);
-          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_stars_beg_max = max(ti_beg, ti_stars_beg_max);
-          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-        }
-      }
-    }
-
-    /* Loop over the star particles in this cell. */
-    for (int k = 0; k < bcount; k++) {
-
-      /* Get a handle on the part. */
-      struct bpart *restrict bp = &bparts[k];
-
-      /* need to be updated ? */
-      if (bpart_is_active(bp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Current end of time-step */
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current, bp->time_bin);
-
-        if (ti_end != ti_current)
-          error("Computing time-step of rogue particle.");
-#endif
-        /* Get new time-step */
-        const integertime_t ti_new_step = get_bpart_timestep(bp, e);
-
-        /* Update particle */
-        bp->time_bin = get_time_bin(ti_new_step);
-        bp->gpart->time_bin = get_time_bin(ti_new_step);
-
-        /* Number of updated s-particles */
-        b_updated++;
-        g_updated++;
-
-        ti_black_holes_end_min =
-            min(ti_current + ti_new_step, ti_black_holes_end_min);
-        ti_black_holes_end_max =
-            max(ti_current + ti_new_step, ti_black_holes_end_max);
-        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
-        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max);
-        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-
-        /* star particle is inactive but not inhibited */
-      } else {
-
-        if (!bpart_is_inhibited(bp, e)) {
-
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, bp->time_bin);
-
-          const integertime_t ti_beg =
-              get_integer_time_begin(ti_current + 1, bp->time_bin);
-
-          ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min);
-          ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max);
-          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max);
-          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-        }
-      }
-    }
-
-  } else {
-
-    /* Loop over the progeny. */
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        /* Recurse */
-        runner_do_timestep(r, cp, 0);
-
-        /* And aggregate */
-        updated += cp->hydro.updated;
-        g_updated += cp->grav.updated;
-        s_updated += cp->stars.updated;
-        b_updated += cp->black_holes.updated;
-
-        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
-        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
-        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
-
-        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
-        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
-        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
-
-        ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min);
-        ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max);
-        ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max);
-
-        ti_black_holes_end_min =
-            min(cp->black_holes.ti_end_min, ti_black_holes_end_min);
-        ti_black_holes_end_max =
-            max(cp->grav.ti_end_max, ti_black_holes_end_max);
-        ti_black_holes_beg_max =
-            max(cp->grav.ti_beg_max, ti_black_holes_beg_max);
-      }
-    }
-  }
-
-  /* Store the values. */
-  c->hydro.updated = updated;
-  c->grav.updated = g_updated;
-  c->stars.updated = s_updated;
-  c->black_holes.updated = b_updated;
-
-  c->hydro.ti_end_min = ti_hydro_end_min;
-  c->hydro.ti_end_max = ti_hydro_end_max;
-  c->hydro.ti_beg_max = ti_hydro_beg_max;
-  c->grav.ti_end_min = ti_gravity_end_min;
-  c->grav.ti_end_max = ti_gravity_end_max;
-  c->grav.ti_beg_max = ti_gravity_beg_max;
-  c->stars.ti_end_min = ti_stars_end_min;
-  c->stars.ti_end_max = ti_stars_end_max;
-  c->stars.ti_beg_max = ti_stars_beg_max;
-  c->black_holes.ti_end_min = ti_black_holes_end_min;
-  c->black_holes.ti_end_max = ti_black_holes_end_max;
-  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->hydro.ti_end_min == e->ti_current &&
-      c->hydro.ti_end_min < max_nr_timesteps)
-    error("End of next hydro step is current time!");
-  if (c->grav.ti_end_min == e->ti_current &&
-      c->grav.ti_end_min < max_nr_timesteps)
-    error("End of next gravity step is current time!");
-  if (c->stars.ti_end_min == e->ti_current &&
-      c->stars.ti_end_min < max_nr_timesteps)
-    error("End of next stars step is current time!");
-  if (c->black_holes.ti_end_min == e->ti_current &&
-      c->black_holes.ti_end_min < max_nr_timesteps)
-    error("End of next black holes step is current time!");
-#endif
-
-  if (timer) TIMER_TOC(timer_timestep);
-}
-
-/**
- * @brief Apply the time-step limiter to all awaken particles in a cell
- * hierarchy.
- *
- * @param r The task #runner.
- * @param c The #cell.
- * @param force Limit the particles irrespective of the #cell flags.
- * @param timer Are we timing this ?
- */
-void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) {
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const int count = c->hydro.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Check that we only limit local cells. */
-  if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope.");
-#endif
-
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
-                ti_gravity_beg_max = 0;
-
-  /* Limit irrespective of cell flags? */
-  force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter));
-
-  /* Early abort? */
-  if (c->hydro.count == 0) {
-
-    /* Clear the limiter flags. */
-    cell_clear_flag(
-        c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
-    return;
-  }
-
-  /* Loop over the progeny ? */
-  if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        /* Recurse */
-        runner_do_limiter(r, cp, force, 0);
-
-        /* And aggregate */
-        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
-        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
-        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
-        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
-        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
-        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
-      }
-    }
-
-    /* Store the updated values */
-    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
-    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
-    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
-    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
-    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
-    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
-
-  } else if (!c->split && force) {
-
-    ti_hydro_end_min = c->hydro.ti_end_min;
-    ti_hydro_end_max = c->hydro.ti_end_max;
-    ti_hydro_beg_max = c->hydro.ti_beg_max;
-    ti_gravity_end_min = c->grav.ti_end_min;
-    ti_gravity_end_max = c->grav.ti_end_max;
-    ti_gravity_beg_max = c->grav.ti_beg_max;
-
-    /* Loop over the gas particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* Avoid inhibited particles */
-      if (part_is_inhibited(p, e)) continue;
-
-      /* If the particle will be active no need to wake it up */
-      if (part_is_active(p, e) && p->wakeup != time_bin_not_awake)
-        p->wakeup = time_bin_not_awake;
-
-      /* Bip, bip, bip... wake-up time */
-      if (p->wakeup <= time_bin_awake) {
-
-        /* Apply the limiter and get the new time-step size */
-        const integertime_t ti_new_step = timestep_limit_part(p, xp, e);
-
-        /* What is the next sync-point ? */
-        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
-        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
-
-        /* Also limit the gpart counter-part */
-        if (p->gpart != NULL) {
-
-          /* Register the time-bin */
-          p->gpart->time_bin = p->time_bin;
-
-          /* What is the next sync-point ? */
-          ti_gravity_end_min =
-              min(ti_current + ti_new_step, ti_gravity_end_min);
-          ti_gravity_end_max =
-              max(ti_current + ti_new_step, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-        }
-      }
-    }
-
-    /* Store the updated values */
-    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
-    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
-    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
-    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
-    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
-    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
-  }
-
-  /* Clear the limiter flags. */
-  cell_clear_flag(c,
-                  cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
-
-  if (timer) TIMER_TOC(timer_do_limiter);
-}
-
-/**
- * @brief End the hydro force calculation of all active particles in a cell
- * by multiplying the acccelerations by the relevant constants
- *
- * @param r The #runner thread.
- * @param c The #cell.
- * @param timer Are we timing this ?
- */
-void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0);
-  } else {
-
-    const struct cosmology *cosmo = e->cosmology;
-    const int count = c->hydro.count;
-    struct part *restrict parts = c->hydro.parts;
-
-    /* Loop over the gas particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-
-      if (part_is_active(p, e)) {
-
-        /* Finish the force loop */
-        hydro_end_force(p, cosmo);
-        chemistry_end_force(p, cosmo);
-
-#ifdef SWIFT_BOUNDARY_PARTICLES
-
-        /* Get the ID of the part */
-        const long long id = p->id;
-
-        /* Cancel hdyro forces of these particles */
-        if (id < SWIFT_BOUNDARY_PARTICLES) {
-
-          /* Don't move ! */
-          hydro_reset_acceleration(p);
-
-#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH)
-
-          /* Some values need to be reset in the Gizmo case. */
-          hydro_prepare_force(p, &c->hydro.xparts[k], cosmo,
-                              e->hydro_properties, 0);
-#endif
-        }
-#endif
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_end_hydro_force);
-}
-
-/**
- * @brief End the gravity force calculation of all active particles in a cell
- * by multiplying the acccelerations by the relevant constants
- *
- * @param r The #runner thread.
- * @param c The #cell.
- * @param timer Are we timing this ?
- */
-void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0);
-  } else {
-
-    const struct space *s = e->s;
-    const int periodic = s->periodic;
-    const float G_newton = e->physical_constants->const_newton_G;
-
-    /* Potential normalisation in the case of periodic gravity */
-    float potential_normalisation = 0.;
-    if (periodic && (e->policy & engine_policy_self_gravity)) {
-      const double volume = s->dim[0] * s->dim[1] * s->dim[2];
-      const double r_s = e->mesh->r_s;
-      potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume;
-    }
-
-    const int gcount = c->grav.count;
-    struct gpart *restrict gparts = c->grav.parts;
-
-    /* Loop over the g-particles in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the gpart. */
-      struct gpart *restrict gp = &gparts[k];
-
-      if (gpart_is_active(gp, e)) {
-
-        /* Finish the force calculation */
-        gravity_end_force(gp, G_newton, potential_normalisation, periodic);
-
-#ifdef SWIFT_MAKE_GRAVITY_GLASS
-
-        /* Negate the gravity forces */
-        gp->a_grav[0] *= -1.f;
-        gp->a_grav[1] *= -1.f;
-        gp->a_grav[2] *= -1.f;
-#endif
-
-#ifdef SWIFT_NO_GRAVITY_BELOW_ID
-
-        /* Get the ID of the gpart */
-        long long id = 0;
-        if (gp->type == swift_type_gas)
-          id = e->s->parts[-gp->id_or_neg_offset].id;
-        else if (gp->type == swift_type_stars)
-          id = e->s->sparts[-gp->id_or_neg_offset].id;
-        else if (gp->type == swift_type_black_hole)
-          error("Unexisting type");
-        else
-          id = gp->id_or_neg_offset;
-
-        /* Cancel gravity forces of these particles */
-        if (id < SWIFT_NO_GRAVITY_BELOW_ID) {
-
-          /* Don't move ! */
-          gp->a_grav[0] = 0.f;
-          gp->a_grav[1] = 0.f;
-          gp->a_grav[2] = 0.f;
-        }
-#endif
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if ((e->policy & engine_policy_self_gravity) &&
-            !(e->policy & engine_policy_black_holes)) {
-
-          /* Let's add a self interaction to simplify the count */
-          gp->num_interacted++;
-
-          /* Check that this gpart has interacted with all the other
-           * particles (via direct or multipoles) in the box */
-          if (gp->num_interacted !=
-              e->total_nr_gparts - e->count_inhibited_gparts) {
-
-            /* Get the ID of the gpart */
-            long long my_id = 0;
-            if (gp->type == swift_type_gas)
-              my_id = e->s->parts[-gp->id_or_neg_offset].id;
-            else if (gp->type == swift_type_stars)
-              my_id = e->s->sparts[-gp->id_or_neg_offset].id;
-            else if (gp->type == swift_type_black_hole)
-              error("Unexisting type");
-            else
-              my_id = gp->id_or_neg_offset;
-
-            error(
-                "g-particle (id=%lld, type=%s) did not interact "
-                "gravitationally with all other gparts "
-                "gp->num_interacted=%lld, total_gparts=%lld (local "
-                "num_gparts=%zd inhibited_gparts=%lld)",
-                my_id, part_type_names[gp->type], gp->num_interacted,
-                e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts);
-          }
-        }
-#endif
-      }
-    }
-  }
-  if (timer) TIMER_TOC(timer_end_grav_force);
-}
-
-/**
- * @brief Process all the gas particles in a cell that have been flagged for
- * swallowing by a black hole.
- *
- * This is done by recursing down to the leaf-level and skipping the sub-cells
- * that have not been drifted as they would not have any particles with
- * swallowing flag. We then loop over the particles with a flag and look into
- * the space-wide list of black holes for the particle with the corresponding
- * ID. If found, the BH swallows the gas particle and the gas particle is
- * removed. If the cell is local, we may be looking for a foreign BH, in which
- * case, we do not update the BH (that will be done on its node) but just remove
- * the gas particle.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) {
-
-  struct engine *e = r->e;
-  struct space *s = e->s;
-  struct bpart *bparts = s->bparts;
-  const size_t nr_bpart = s->nr_bparts;
-#ifdef WITH_MPI
-  struct bpart *bparts_foreign = s->bparts_foreign;
-  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
-#endif
-
-  struct part *parts = c->hydro.parts;
-  struct xpart *xparts = c->hydro.xparts;
-
-  /* Early abort?
-   * (We only want cells for which we drifted the gas as these are
-   * the only ones that could have gas particles that have been flagged
-   * for swallowing) */
-  if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) {
-    return;
-  }
-
-  /* Loop over the progeny ? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        runner_do_gas_swallow(r, cp, 0);
-      }
-    }
-  } else {
-
-    /* Loop over all the gas particles in the cell
-     * Note that the cell (and hence the parts) may be local or foreign. */
-    const size_t nr_parts = c->hydro.count;
-    for (size_t k = 0; k < nr_parts; k++) {
-
-      /* Get a handle on the part. */
-      struct part *const p = &parts[k];
-      struct xpart *const xp = &xparts[k];
-
-      /* Ignore inhibited particles (they have already been removed!) */
-      if (part_is_inhibited(p, e)) continue;
-
-      /* Get the ID of the black holes that will swallow this part */
-      const long long swallow_id =
-          black_holes_get_part_swallow_id(&p->black_holes_data);
-
-      /* Has this particle been flagged for swallowing? */
-      if (swallow_id >= 0) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (p->ti_drift != e->ti_current)
-          error("Trying to swallow an un-drifted particle.");
-#endif
-
-        /* ID of the BH swallowing this particle */
-        const long long BH_id = swallow_id;
-
-        /* Have we found this particle's BH already? */
-        int found = 0;
-
-        /* Let's look for the hungry black hole in the local list */
-        for (size_t i = 0; i < nr_bpart; ++i) {
-
-          /* Get a handle on the bpart. */
-          struct bpart *bp = &bparts[i];
-
-          if (bp->id == BH_id) {
-
-            /* Lock the space as we are going to work directly on the bpart list
-             */
-            lock_lock(&s->lock);
-
-            /* Swallow the gas particle (i.e. update the BH properties) */
-            black_holes_swallow_part(bp, p, xp, e->cosmology);
-
-            /* Release the space as we are done updating the bpart */
-            if (lock_unlock(&s->lock) != 0)
-              error("Failed to unlock the space.");
-
-            message("BH %lld swallowing gas particle %lld", bp->id, p->id);
-
-            /* If the gas particle is local, remove it */
-            if (c->nodeID == e->nodeID) {
-
-              message("BH %lld removing gas particle %lld", bp->id, p->id);
-
-              lock_lock(&e->s->lock);
-
-              /* Re-check that the particle has not been removed
-               * by another thread before we do the deed. */
-              if (!part_is_inhibited(p, e)) {
-
-                /* Finally, remove the gas particle from the system
-                 * Recall that the gpart associated with it is also removed
-                 * at the same time. */
-                cell_remove_part(e, c, p, xp);
-              }
-
-              if (lock_unlock(&e->s->lock) != 0)
-                error("Failed to unlock the space!");
-            }
-
-            /* In any case, prevent the particle from being re-swallowed */
-            black_holes_mark_part_as_swallowed(&p->black_holes_data);
-
-            found = 1;
-            break;
-          }
-
-        } /* Loop over local BHs */
-
-#ifdef WITH_MPI
-
-        /* We could also be in the case of a local gas particle being
-         * swallowed by a foreign BH. In this case, we won't update the
-         * BH but just remove the particle from the local list. */
-        if (c->nodeID == e->nodeID && !found) {
-
-          /* Let's look for the foreign hungry black hole */
-          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
-
-            /* Get a handle on the bpart. */
-            struct bpart *bp = &bparts_foreign[i];
-
-            if (bp->id == BH_id) {
-
-              message("BH %lld removing gas particle %lld (foreign BH case)",
-                      bp->id, p->id);
-
-              lock_lock(&e->s->lock);
-
-              /* Re-check that the particle has not been removed
-               * by another thread before we do the deed. */
-              if (!part_is_inhibited(p, e)) {
-
-                /* Finally, remove the gas particle from the system */
-                cell_remove_part(e, c, p, xp);
-              }
-
-              if (lock_unlock(&e->s->lock) != 0)
-                error("Failed to unlock the space!");
-
-              found = 1;
-              break;
-            }
-          } /* Loop over foreign BHs */
-        }   /* Is the cell local? */
-#endif
-
-        /* If we have a local particle, we must have found the BH in one
-         * of our list of black holes. */
-        if (c->nodeID == e->nodeID && !found) {
-          error("Gas particle %lld could not find BH %lld to be swallowed",
-                p->id, swallow_id);
-        }
-      } /* Part was flagged for swallowing */
-    }   /* Loop over the parts */
-  }     /* Cell is not split */
-}
-
-/**
- * @brief Processing of gas particles to swallow - self task case.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
-  if (!cell_is_active_black_holes(c, r->e))
-    error("Running self task on inactive cell");
-#endif
-
-  runner_do_gas_swallow(r, c, timer);
-}
-
-/**
- * @brief Processing of gas particles to swallow - pair task case.
- *
- * @param r The thread #runner.
- * @param ci First #cell.
- * @param cj Second #cell.
- * @param timer Are we timing this?
- */
-void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci,
-                                struct cell *cj, int timer) {
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
-    error("Running pair task on foreign node");
-#endif
-
-  /* Run the swallowing loop only in the cell that is the neighbour of the
-   * active BH */
-  if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer);
-  if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer);
-}
-
-/**
- * @brief Process all the BH particles in a cell that have been flagged for
- * swallowing by a black hole.
- *
- * This is done by recursing down to the leaf-level and skipping the sub-cells
- * that have not been drifted as they would not have any particles with
- * swallowing flag. We then loop over the particles with a flag and look into
- * the space-wide list of black holes for the particle with the corresponding
- * ID. If found, the BH swallows the BH particle and the BH particle is
- * removed. If the cell is local, we may be looking for a foreign BH, in which
- * case, we do not update the BH (that will be done on its node) but just remove
- * the BH particle.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) {
-
-  struct engine *e = r->e;
-  struct space *s = e->s;
-  struct bpart *bparts = s->bparts;
-  const size_t nr_bpart = s->nr_bparts;
-#ifdef WITH_MPI
-  struct bpart *bparts_foreign = s->bparts_foreign;
-  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
-#endif
-
-  struct bpart *cell_bparts = c->black_holes.parts;
-
-  /* Early abort?
-   * (We only want cells for which we drifted the BH as these are
-   * the only ones that could have BH particles that have been flagged
-   * for swallowing) */
-  if (c->black_holes.count == 0 ||
-      c->black_holes.ti_old_part != e->ti_current) {
-    return;
-  }
-
-  /* Loop over the progeny ? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        runner_do_bh_swallow(r, cp, 0);
-      }
-    }
-  } else {
-
-    /* Loop over all the gas particles in the cell
-     * Note that the cell (and hence the bparts) may be local or foreign. */
-    const size_t nr_cell_bparts = c->black_holes.count;
-    for (size_t k = 0; k < nr_cell_bparts; k++) {
-
-      /* Get a handle on the part. */
-      struct bpart *const cell_bp = &cell_bparts[k];
-
-      /* Ignore inhibited particles (they have already been removed!) */
-      if (bpart_is_inhibited(cell_bp, e)) continue;
-
-      /* Get the ID of the black holes that will swallow this part */
-      const long long swallow_id =
-          black_holes_get_bpart_swallow_id(&cell_bp->merger_data);
-
-      /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */
-      /* 	      swallow_id); */
-
-      /* Has this particle been flagged for swallowing? */
-      if (swallow_id >= 0) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (cell_bp->ti_drift != e->ti_current)
-          error("Trying to swallow an un-drifted particle.");
-#endif
-
-        /* ID of the BH swallowing this particle */
-        const long long BH_id = swallow_id;
-
-        /* Have we found this particle's BH already? */
-        int found = 0;
-
-        /* Let's look for the hungry black hole in the local list */
-        for (size_t i = 0; i < nr_bpart; ++i) {
-
-          /* Get a handle on the bpart. */
-          struct bpart *bp = &bparts[i];
-
-          if (bp->id == BH_id) {
-
-            /* Lock the space as we are going to work directly on the bpart list
-             */
-            lock_lock(&s->lock);
-
-            /* Swallow the gas particle (i.e. update the BH properties) */
-            black_holes_swallow_bpart(bp, cell_bp, e->cosmology);
-
-            /* Release the space as we are done updating the bpart */
-            if (lock_unlock(&s->lock) != 0)
-              error("Failed to unlock the space.");
-
-            message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id);
-
-            /* If the gas particle is local, remove it */
-            if (c->nodeID == e->nodeID) {
-
-              message("BH %lld removing BH particle %lld", bp->id, cell_bp->id);
-
-              /* Finally, remove the gas particle from the system
-               * Recall that the gpart associated with it is also removed
-               * at the same time. */
-              cell_remove_bpart(e, c, cell_bp);
-            }
-
-            /* In any case, prevent the particle from being re-swallowed */
-            black_holes_mark_bpart_as_merged(&cell_bp->merger_data);
-
-            found = 1;
-            break;
-          }
-
-        } /* Loop over local BHs */
-
-#ifdef WITH_MPI
-
-        /* We could also be in the case of a local BH particle being
-         * swallowed by a foreign BH. In this case, we won't update the
-         * foreign BH but just remove the particle from the local list. */
-        if (c->nodeID == e->nodeID && !found) {
-
-          /* Let's look for the foreign hungry black hole */
-          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
-
-            /* Get a handle on the bpart. */
-            struct bpart *bp = &bparts_foreign[i];
-
-            if (bp->id == BH_id) {
-
-              message("BH %lld removing BH particle %lld (foreign BH case)",
-                      bp->id, cell_bp->id);
-
-              /* Finally, remove the gas particle from the system */
-              cell_remove_bpart(e, c, cell_bp);
-
-              found = 1;
-              break;
-            }
-          } /* Loop over foreign BHs */
-        }   /* Is the cell local? */
-#endif
-
-        /* If we have a local particle, we must have found the BH in one
-         * of our list of black holes. */
-        if (c->nodeID == e->nodeID && !found) {
-          error("BH particle %lld could not find BH %lld to be swallowed",
-                cell_bp->id, swallow_id);
-        }
-      } /* Part was flagged for swallowing */
-    }   /* Loop over the parts */
-  }     /* Cell is not split */
-}
-
-/**
- * @brief Processing of bh particles to swallow - self task case.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
-  if (!cell_is_active_black_holes(c, r->e))
-    error("Running self task on inactive cell");
-#endif
-
-  runner_do_bh_swallow(r, c, timer);
-}
-
-/**
- * @brief Processing of bh particles to swallow - pair task case.
- *
- * @param r The thread #runner.
- * @param ci First #cell.
- * @param cj Second #cell.
- * @param timer Are we timing this?
- */
-void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci,
-                               struct cell *cj, int timer) {
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
-    error("Running pair task on foreign node");
-#endif
-
-  /* Run the swallowing loop only in the cell that is the neighbour of the
-   * active BH */
-  if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer);
-  if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer);
-}
-
-/**
- * @brief Construct the cell properties from the received #part.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
- * @param timer Are we timing this ?
- */
-void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
-                         int timer) {
-#ifdef WITH_MPI
-
-  const struct part *restrict parts = c->hydro.parts;
-  const size_t nr_parts = c->hydro.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_hydro_end_min = max_nr_timesteps;
-  integertime_t ti_hydro_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-  float h_max = 0.f;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* Clear this cell's sorted mask. */
-  if (clear_sorts) c->hydro.sorted = 0;
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_parts; k++) {
-      if (parts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, parts[k].time_bin);
-      time_bin_max = max(time_bin_max, parts[k].time_bin);
-      h_max = max(h_max, parts[k].h);
-    }
-
-    /* Convert into a time */
-    ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
-        runner_do_recv_part(r, c->progeny[k], clear_sorts, 0);
-        ti_hydro_end_min =
-            min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min);
-        ti_hydro_end_max =
-            max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max);
-        h_max = max(h_max, c->progeny[k]->hydro.h_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_hydro_end_min < ti_current)
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_hydro_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->hydro.ti_end_min = ti_hydro_end_min;
-  // c->hydro.ti_end_max = ti_hydro_end_max;
-  c->hydro.ti_old_part = ti_current;
-  c->hydro.h_max = h_max;
-
-  if (timer) TIMER_TOC(timer_dorecv_part);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief Construct the cell properties from the received #gpart.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
-
-#ifdef WITH_MPI
-
-  const struct gpart *restrict gparts = c->grav.parts;
-  const size_t nr_gparts = c->grav.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_gravity_end_min = max_nr_timesteps;
-  integertime_t ti_gravity_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_gparts; k++) {
-      if (gparts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, gparts[k].time_bin);
-      time_bin_max = max(time_bin_max, gparts[k].time_bin);
-    }
-
-    /* Convert into a time */
-    ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) {
-        runner_do_recv_gpart(r, c->progeny[k], 0);
-        ti_gravity_end_min =
-            min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min);
-        ti_gravity_end_max =
-            max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_gravity_end_min < ti_current)
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_gravity_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->grav.ti_end_min = ti_gravity_end_min;
-  // c->grav.ti_end_max = ti_gravity_end_max;
-  c->grav.ti_old_part = ti_current;
-
-  if (timer) TIMER_TOC(timer_dorecv_gpart);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief Construct the cell properties from the received #spart.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
- * @param timer Are we timing this ?
- */
-void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts,
-                          int timer) {
-
-#ifdef WITH_MPI
-
-  struct spart *restrict sparts = c->stars.parts;
-  const size_t nr_sparts = c->stars.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_stars_end_min = max_nr_timesteps;
-  integertime_t ti_stars_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-  float h_max = 0.f;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* Clear this cell's sorted mask. */
-  if (clear_sorts) c->stars.sorted = 0;
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_sparts; k++) {
-#ifdef DEBUG_INTERACTIONS_STARS
-      sparts[k].num_ngb_force = 0;
-#endif
-      if (sparts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, sparts[k].time_bin);
-      time_bin_max = max(time_bin_max, sparts[k].time_bin);
-      h_max = max(h_max, sparts[k].h);
-    }
-
-    /* Convert into a time */
-    ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
-        runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0);
-        ti_stars_end_min =
-            min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min);
-        ti_stars_end_max =
-            max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max);
-        h_max = max(h_max, c->progeny[k]->stars.h_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_stars_end_min < ti_current &&
-      !(r->e->policy & engine_policy_star_formation))
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_stars_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->grav.ti_end_min = ti_gravity_end_min;
-  // c->grav.ti_end_max = ti_gravity_end_max;
-  c->stars.ti_old_part = ti_current;
-  c->stars.h_max = h_max;
-
-  if (timer) TIMER_TOC(timer_dorecv_spart);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief Construct the cell properties from the received #bpart.
- *
- * Note that we do not need to clear the sorts since we do not sort
- * the black holes.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
- * @param timer Are we timing this ?
- */
-void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts,
-                          int timer) {
-
-#ifdef WITH_MPI
-
-  struct bpart *restrict bparts = c->black_holes.parts;
-  const size_t nr_bparts = c->black_holes.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_black_holes_end_min = max_nr_timesteps;
-  integertime_t ti_black_holes_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-  float h_max = 0.f;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_bparts; k++) {
-#ifdef DEBUG_INTERACTIONS_BLACK_HOLES
-      bparts[k].num_ngb_force = 0;
-#endif
-
-      /* message("Receiving bparts id=%lld time_bin=%d", */
-      /* 	      bparts[k].id, bparts[k].time_bin); */
-
-      if (bparts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, bparts[k].time_bin);
-      time_bin_max = max(time_bin_max, bparts[k].time_bin);
-      h_max = max(h_max, bparts[k].h);
-    }
-
-    /* Convert into a time */
-    ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) {
-        runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0);
-        ti_black_holes_end_min =
-            min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min);
-        ti_black_holes_end_max =
-            max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max);
-        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_black_holes_end_min < ti_current)
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_black_holes_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->grav.ti_end_min = ti_gravity_end_min;
-  // c->grav.ti_end_max = ti_gravity_end_max;
-  c->black_holes.ti_old_part = ti_current;
-  c->black_holes.h_max = h_max;
-
-  if (timer) TIMER_TOC(timer_dorecv_bpart);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief The #runner main thread routine.
- *
- * @param data A pointer to this thread's data.
- */
-void *runner_main(void *data) {
-
-  struct runner *r = (struct runner *)data;
-  struct engine *e = r->e;
-  struct scheduler *sched = &e->sched;
-  unsigned int seed = r->id;
-  pthread_setspecific(sched->local_seed_pointer, &seed);
-  /* Main loop. */
-  while (1) {
-
-    /* Wait at the barrier. */
-    engine_barrier(e);
-
-    /* Can we go home yet? */
-    if (e->step_props & engine_step_prop_done) break;
-
-    /* Re-set the pointer to the previous task, as there is none. */
-    struct task *t = NULL;
-    struct task *prev = NULL;
-
-    /* Loop while there are tasks... */
-    while (1) {
-
-      /* If there's no old task, try to get a new one. */
-      if (t == NULL) {
-
-        /* Get the task. */
-        TIMER_TIC
-        t = scheduler_gettask(sched, r->qid, prev);
-        TIMER_TOC(timer_gettask);
-
-        /* Did I get anything? */
-        if (t == NULL) break;
-      }
-
-      /* Get the cells. */
-      struct cell *ci = t->ci;
-      struct cell *cj = t->cj;
-
-#ifdef SWIFT_DEBUG_TASKS
-      /* Mark the thread we run on */
-      t->rid = r->cpuid;
-
-      /* And recover the pair direction */
-      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
-        struct cell *ci_temp = ci;
-        struct cell *cj_temp = cj;
-        double shift[3];
-        t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift);
-      } else {
-        t->sid = -1;
-      }
-#endif
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that we haven't scheduled an inactive task */
-      t->ti_run = e->ti_current;
-      /* Store the task that will be running (for debugging only) */
-      r->t = t;
-#endif
-
-      /* Different types of tasks... */
-      switch (t->type) {
-        case task_type_self:
-          if (t->subtype == task_subtype_density)
-            runner_doself1_branch_density(r, ci);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_doself1_branch_gradient(r, ci);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_doself2_branch_force(r, ci);
-          else if (t->subtype == task_subtype_limiter)
-            runner_doself2_branch_limiter(r, ci);
-          else if (t->subtype == task_subtype_grav)
-            runner_doself_recursive_grav(r, ci, 1);
-          else if (t->subtype == task_subtype_external_grav)
-            runner_do_grav_external(r, ci, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_doself_branch_stars_density(r, ci);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_doself_branch_stars_feedback(r, ci);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_doself_branch_bh_density(r, ci);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_doself_branch_bh_swallow(r, ci);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_doself_branch_bh_feedback(r, ci);
-          else
-            error("Unknown/invalid task subtype (%s).",
-                  subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_pair:
-          if (t->subtype == task_subtype_density)
-            runner_dopair1_branch_density(r, ci, cj);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_dopair1_branch_gradient(r, ci, cj);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_dopair2_branch_force(r, ci, cj);
-          else if (t->subtype == task_subtype_limiter)
-            runner_dopair2_branch_limiter(r, ci, cj);
-          else if (t->subtype == task_subtype_grav)
-            runner_dopair_recursive_grav(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_dopair_branch_stars_density(r, ci, cj);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_dopair_branch_stars_feedback(r, ci, cj);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_dopair_branch_bh_density(r, ci, cj);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_dopair_branch_bh_swallow(r, ci, cj);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_dopair_branch_bh_feedback(r, ci, cj);
-          else
-            error("Unknown/invalid task subtype (%s/%s).",
-                  taskID_names[t->type], subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_sub_self:
-          if (t->subtype == task_subtype_density)
-            runner_dosub_self1_density(r, ci, 1);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_dosub_self1_gradient(r, ci, 1);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_dosub_self2_force(r, ci, 1);
-          else if (t->subtype == task_subtype_limiter)
-            runner_dosub_self2_limiter(r, ci, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_dosub_self_stars_density(r, ci, 1);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_dosub_self_stars_feedback(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_dosub_self_bh_density(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_dosub_self_bh_swallow(r, ci, 1);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_dosub_self_bh_feedback(r, ci, 1);
-          else
-            error("Unknown/invalid task subtype (%s/%s).",
-                  taskID_names[t->type], subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_sub_pair:
-          if (t->subtype == task_subtype_density)
-            runner_dosub_pair1_density(r, ci, cj, 1);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_dosub_pair1_gradient(r, ci, cj, 1);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_dosub_pair2_force(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_limiter)
-            runner_dosub_pair2_limiter(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_dosub_pair_stars_density(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_dosub_pair_stars_feedback(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_dosub_pair_bh_density(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_dosub_pair_bh_swallow(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_dosub_pair_bh_feedback(r, ci, cj, 1);
-          else
-            error("Unknown/invalid task subtype (%s/%s).",
-                  taskID_names[t->type], subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_sort:
-          /* Cleanup only if any of the indices went stale. */
-          runner_do_hydro_sort(
-              r, ci, t->flags,
-              ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
-          /* Reset the sort flags as our work here is done. */
-          t->flags = 0;
-          break;
-        case task_type_stars_sort:
-          /* Cleanup only if any of the indices went stale. */
-          runner_do_stars_sort(
-              r, ci, t->flags,
-              ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
-          /* Reset the sort flags as our work here is done. */
-          t->flags = 0;
-          break;
-        case task_type_init_grav:
-          runner_do_init_grav(r, ci, 1);
-          break;
-        case task_type_ghost:
-          runner_do_ghost(r, ci, 1);
-          break;
-#ifdef EXTRA_HYDRO_LOOP
-        case task_type_extra_ghost:
-          runner_do_extra_ghost(r, ci, 1);
-          break;
-#endif
-        case task_type_stars_ghost:
-          runner_do_stars_ghost(r, ci, 1);
-          break;
-        case task_type_bh_density_ghost:
-          runner_do_black_holes_density_ghost(r, ci, 1);
-          break;
-        case task_type_bh_swallow_ghost3:
-          runner_do_black_holes_swallow_ghost(r, ci, 1);
-          break;
-        case task_type_drift_part:
-          runner_do_drift_part(r, ci, 1);
-          break;
-        case task_type_drift_spart:
-          runner_do_drift_spart(r, ci, 1);
-          break;
-        case task_type_drift_bpart:
-          runner_do_drift_bpart(r, ci, 1);
-          break;
-        case task_type_drift_gpart:
-          runner_do_drift_gpart(r, ci, 1);
-          break;
-        case task_type_kick1:
-          runner_do_kick1(r, ci, 1);
-          break;
-        case task_type_kick2:
-          runner_do_kick2(r, ci, 1);
-          break;
-        case task_type_end_hydro_force:
-          runner_do_end_hydro_force(r, ci, 1);
-          break;
-        case task_type_end_grav_force:
-          runner_do_end_grav_force(r, ci, 1);
-          break;
-        case task_type_logger:
-          runner_do_logger(r, ci, 1);
-          break;
-        case task_type_timestep:
-          runner_do_timestep(r, ci, 1);
-          break;
-        case task_type_timestep_limiter:
-          runner_do_limiter(r, ci, 0, 1);
-          break;
-#ifdef WITH_MPI
-        case task_type_send:
-          if (t->subtype == task_subtype_tend_part) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_gpart) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_spart) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_bpart) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_sf_counts) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_part_swallow) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_bpart_merger) {
-            free(t->buff);
-          }
-          break;
-        case task_type_recv:
-          if (t->subtype == task_subtype_tend_part) {
-            cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_gpart) {
-            cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_spart) {
-            cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_bpart) {
-            cell_unpack_end_step_black_holes(
-                ci, (struct pcell_step_black_holes *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_sf_counts) {
-            cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff);
-            cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_xv) {
-            runner_do_recv_part(r, ci, 1, 1);
-          } else if (t->subtype == task_subtype_rho) {
-            runner_do_recv_part(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_gradient) {
-            runner_do_recv_part(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_part_swallow) {
-            cell_unpack_part_swallow(ci,
-                                     (struct black_holes_part_data *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_bpart_merger) {
-            cell_unpack_bpart_swallow(ci,
-                                      (struct black_holes_bpart_data *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_limiter) {
-            runner_do_recv_part(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_gpart) {
-            runner_do_recv_gpart(r, ci, 1);
-          } else if (t->subtype == task_subtype_spart) {
-            runner_do_recv_spart(r, ci, 1, 1);
-          } else if (t->subtype == task_subtype_bpart_rho) {
-            runner_do_recv_bpart(r, ci, 1, 1);
-          } else if (t->subtype == task_subtype_bpart_swallow) {
-            runner_do_recv_bpart(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_bpart_feedback) {
-            runner_do_recv_bpart(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_multipole) {
-            cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff);
-            free(t->buff);
-          } else {
-            error("Unknown/invalid task subtype (%d).", t->subtype);
-          }
-          break;
-#endif
-        case task_type_grav_down:
-          runner_do_grav_down(r, t->ci, 1);
-          break;
-        case task_type_grav_mesh:
-          runner_do_grav_mesh(r, t->ci, 1);
-          break;
-        case task_type_grav_long_range:
-          runner_do_grav_long_range(r, t->ci, 1);
-          break;
-        case task_type_grav_mm:
-          runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj);
-          break;
-        case task_type_cooling:
-          runner_do_cooling(r, t->ci, 1);
-          break;
-        case task_type_star_formation:
-          runner_do_star_formation(r, t->ci, 1);
-          break;
-        case task_type_stars_resort:
-          runner_do_stars_resort(r, t->ci, 1);
-          break;
-        case task_type_fof_self:
-          runner_do_fof_self(r, t->ci, 1);
-          break;
-        case task_type_fof_pair:
-          runner_do_fof_pair(r, t->ci, t->cj, 1);
-          break;
-        default:
-          error("Unknown/invalid task type (%d).", t->type);
-      }
-
-/* Mark that we have run this task on these cells */
-#ifdef SWIFT_DEBUG_CHECKS
-      if (ci != NULL) {
-        ci->tasks_executed[t->type]++;
-        ci->subtasks_executed[t->subtype]++;
-      }
-      if (cj != NULL) {
-        cj->tasks_executed[t->type]++;
-        cj->subtasks_executed[t->subtype]++;
-      }
-
-      /* This runner is not doing a task anymore */
-      r->t = NULL;
-#endif
-
-      /* We're done with this task, see if we get a next one. */
-      prev = t;
-      t = scheduler_done(sched, t);
-
-    } /* main loop. */
-  }
-
-  /* Be kind, rewind. */
-  return NULL;
-}
-
-/**
- * @brief Write the required particles through the logger.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_logger(struct runner *r, struct cell *c, int timer) {
-
-#ifdef WITH_LOGGER
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int count = c->hydro.count;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? Avoid spending too much time in useless cells. */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs to be log */
-      /* This is the same function than part_is_active, except for
-       * debugging checks */
-      if (part_is_active(p, e)) {
-
-        if (logger_should_write(&xp->logger_data, e->logger)) {
-          /* Write particle */
-          /* Currently writing everything, should adapt it through time */
-          logger_log_part(e->logger, p,
-                          logger_mask_data[logger_x].mask |
-                              logger_mask_data[logger_v].mask |
-                              logger_mask_data[logger_a].mask |
-                              logger_mask_data[logger_u].mask |
-                              logger_mask_data[logger_h].mask |
-                              logger_mask_data[logger_rho].mask |
-                              logger_mask_data[logger_consts].mask,
-                          &xp->logger_data.last_offset);
-
-          /* Set counter back to zero */
-          xp->logger_data.steps_since_last_output = 0;
-        } else
-          /* Update counter */
-          xp->logger_data.steps_since_last_output += 1;
-      }
-    }
-  }
-
-  if (c->grav.count > 0) error("gparts not implemented");
-
-  if (c->stars.count > 0) error("sparts not implemented");
-
-  if (timer) TIMER_TOC(timer_logger);
-
-#else
-  error("Logger disabled, please enable it during configuration");
-#endif
-}
-
-/**
- * @brief Recursively search for FOF groups in a single cell.
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_fof_self(struct runner *r, struct cell *c, int timer) {
-
-#ifdef WITH_FOF
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const int periodic = s->periodic;
-  const struct gpart *const gparts = s->gparts;
-  const double search_r2 = e->fof_properties->l_x2;
-
-  rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c);
-
-  if (timer) TIMER_TOC(timer_fof_self);
-
-#else
-  error("SWIFT was not compiled with FOF enabled!");
-#endif
-}
-
-/**
- * @brief Recursively search for FOF groups between a pair of cells.
- *
- * @param r runner task
- * @param ci cell i
- * @param cj cell j
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
-                        int timer) {
-
-#ifdef WITH_FOF
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const int periodic = s->periodic;
-  const struct gpart *const gparts = s->gparts;
-  const double search_r2 = e->fof_properties->l_x2;
-
-  rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci,
-                      cj);
-
-  if (timer) TIMER_TOC(timer_fof_pair);
-#else
-  error("SWIFT was not compiled with FOF enabled!");
-#endif
-}
diff --git a/src/runner.h b/src/runner.h
index be175eef423faee23ef97ba86a7faf2f43e8ef5d..7e8d0459efb5485ea1301c923e8c7a3396b6fc7e 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -26,13 +26,21 @@
 /* Config parameters. */
 #include "../config.h"
 
-/* Includes. */
+/* Local headers. */
 #include "cache.h"
 #include "gravity_cache.h"
-#include "task.h"
 
 struct cell;
 struct engine;
+struct task;
+
+/* Unique identifier of loop types */
+#define TASK_LOOP_DENSITY 0
+#define TASK_LOOP_GRADIENT 1
+#define TASK_LOOP_FORCE 2
+#define TASK_LOOP_LIMITER 3
+#define TASK_LOOP_FEEDBACK 4
+#define TASK_LOOP_SWALLOW 5
 
 /**
  * @brief A struct representing a runner's thread and its data.
@@ -75,6 +83,12 @@ struct runner {
 /* Function prototypes. */
 void runner_do_ghost(struct runner *r, struct cell *c, int timer);
 void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer);
+void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer);
+void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c,
+                                         int timer);
+void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c,
+                                         int timer);
+void runner_do_init_grav(struct runner *r, struct cell *c, int timer);
 void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag,
                           int cleanup, int clock);
 void runner_do_stars_sort(struct runner *r, struct cell *c, int flag,
@@ -84,19 +98,38 @@ void runner_do_all_stars_sort(struct runner *r, struct cell *c);
 void runner_do_drift_part(struct runner *r, struct cell *c, int timer);
 void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer);
 void runner_do_drift_spart(struct runner *r, struct cell *c, int timer);
+void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer);
 void runner_do_kick1(struct runner *r, struct cell *c, int timer);
 void runner_do_kick2(struct runner *r, struct cell *c, int timer);
+void runner_do_timestep(struct runner *r, struct cell *c, int timer);
 void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer);
+void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer);
 void runner_do_init(struct runner *r, struct cell *c, int timer);
 void runner_do_cooling(struct runner *r, struct cell *c, int timer);
+void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer);
+void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer);
 void runner_do_grav_external(struct runner *r, struct cell *c, int timer);
 void runner_do_grav_fft(struct runner *r, int timer);
 void runner_do_logger(struct runner *r, struct cell *c, int timer);
 void runner_do_fof_self(struct runner *r, struct cell *c, int timer);
 void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
                         int timer);
+void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer);
+void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer);
+void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci,
+                                struct cell *cj, int timer);
+void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci,
+                               struct cell *cj, int timer);
+void runner_do_star_formation(struct runner *r, struct cell *c, int timer);
+void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer);
+
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer);
+void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
+                         int timer);
+void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer);
+void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer);
 void *runner_main(void *data);
-void runner_do_drift_all_mapper(void *map_data, int num_elements,
-                                void *extra_data);
 
 #endif /* SWIFT_RUNNER_H */
diff --git a/src/runner_black_holes.c b/src/runner_black_holes.c
new file mode 100644
index 0000000000000000000000000000000000000000..d9bb62201d7b087670aef0ce2346a51bf61a3868
--- /dev/null
+++ b/src/runner_black_holes.c
@@ -0,0 +1,459 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Process all the gas particles in a cell that have been flagged for
+ * swallowing by a black hole.
+ *
+ * This is done by recursing down to the leaf-level and skipping the sub-cells
+ * that have not been drifted as they would not have any particles with
+ * swallowing flag. We then loop over the particles with a flag and look into
+ * the space-wide list of black holes for the particle with the corresponding
+ * ID. If found, the BH swallows the gas particle and the gas particle is
+ * removed. If the cell is local, we may be looking for a foreign BH, in which
+ * case, we do not update the BH (that will be done on its node) but just remove
+ * the gas particle.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) {
+
+  struct engine *e = r->e;
+  struct space *s = e->s;
+  struct bpart *bparts = s->bparts;
+  const size_t nr_bpart = s->nr_bparts;
+#ifdef WITH_MPI
+  struct bpart *bparts_foreign = s->bparts_foreign;
+  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
+#endif
+
+  struct part *parts = c->hydro.parts;
+  struct xpart *xparts = c->hydro.xparts;
+
+  /* Early abort?
+   * (We only want cells for which we drifted the gas as these are
+   * the only ones that could have gas particles that have been flagged
+   * for swallowing) */
+  if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) {
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        runner_do_gas_swallow(r, cp, 0);
+      }
+    }
+  } else {
+
+    /* Loop over all the gas particles in the cell
+     * Note that the cell (and hence the parts) may be local or foreign. */
+    const size_t nr_parts = c->hydro.count;
+    for (size_t k = 0; k < nr_parts; k++) {
+
+      /* Get a handle on the part. */
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
+
+      /* Ignore inhibited particles (they have already been removed!) */
+      if (part_is_inhibited(p, e)) continue;
+
+      /* Get the ID of the black holes that will swallow this part */
+      const long long swallow_id =
+          black_holes_get_part_swallow_id(&p->black_holes_data);
+
+      /* Has this particle been flagged for swallowing? */
+      if (swallow_id >= 0) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->ti_drift != e->ti_current)
+          error("Trying to swallow an un-drifted particle.");
+#endif
+
+        /* ID of the BH swallowing this particle */
+        const long long BH_id = swallow_id;
+
+        /* Have we found this particle's BH already? */
+        int found = 0;
+
+        /* Let's look for the hungry black hole in the local list */
+        for (size_t i = 0; i < nr_bpart; ++i) {
+
+          /* Get a handle on the bpart. */
+          struct bpart *bp = &bparts[i];
+
+          if (bp->id == BH_id) {
+
+            /* Lock the space as we are going to work directly on the bpart list
+             */
+            lock_lock(&s->lock);
+
+            /* Swallow the gas particle (i.e. update the BH properties) */
+            black_holes_swallow_part(bp, p, xp, e->cosmology);
+
+            /* Release the space as we are done updating the bpart */
+            if (lock_unlock(&s->lock) != 0)
+              error("Failed to unlock the space.");
+
+            message("BH %lld swallowing gas particle %lld", bp->id, p->id);
+
+            /* If the gas particle is local, remove it */
+            if (c->nodeID == e->nodeID) {
+
+              message("BH %lld removing gas particle %lld", bp->id, p->id);
+
+              lock_lock(&e->s->lock);
+
+              /* Re-check that the particle has not been removed
+               * by another thread before we do the deed. */
+              if (!part_is_inhibited(p, e)) {
+
+                /* Finally, remove the gas particle from the system
+                 * Recall that the gpart associated with it is also removed
+                 * at the same time. */
+                cell_remove_part(e, c, p, xp);
+              }
+
+              if (lock_unlock(&e->s->lock) != 0)
+                error("Failed to unlock the space!");
+            }
+
+            /* In any case, prevent the particle from being re-swallowed */
+            black_holes_mark_part_as_swallowed(&p->black_holes_data);
+
+            found = 1;
+            break;
+          }
+
+        } /* Loop over local BHs */
+
+#ifdef WITH_MPI
+
+        /* We could also be in the case of a local gas particle being
+         * swallowed by a foreign BH. In this case, we won't update the
+         * BH but just remove the particle from the local list. */
+        if (c->nodeID == e->nodeID && !found) {
+
+          /* Let's look for the foreign hungry black hole */
+          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
+
+            /* Get a handle on the bpart. */
+            struct bpart *bp = &bparts_foreign[i];
+
+            if (bp->id == BH_id) {
+
+              message("BH %lld removing gas particle %lld (foreign BH case)",
+                      bp->id, p->id);
+
+              lock_lock(&e->s->lock);
+
+              /* Re-check that the particle has not been removed
+               * by another thread before we do the deed. */
+              if (!part_is_inhibited(p, e)) {
+
+                /* Finally, remove the gas particle from the system */
+                cell_remove_part(e, c, p, xp);
+              }
+
+              if (lock_unlock(&e->s->lock) != 0)
+                error("Failed to unlock the space!");
+
+              found = 1;
+              break;
+            }
+          } /* Loop over foreign BHs */
+        }   /* Is the cell local? */
+#endif
+
+        /* If we have a local particle, we must have found the BH in one
+         * of our list of black holes. */
+        if (c->nodeID == e->nodeID && !found) {
+          error("Gas particle %lld could not find BH %lld to be swallowed",
+                p->id, swallow_id);
+        }
+      } /* Part was flagged for swallowing */
+    }   /* Loop over the parts */
+  }     /* Cell is not split */
+}
+
+/**
+ * @brief Processing of gas particles to swallow - self task case.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
+  if (!cell_is_active_black_holes(c, r->e))
+    error("Running self task on inactive cell");
+#endif
+
+  runner_do_gas_swallow(r, c, timer);
+}
+
+/**
+ * @brief Processing of gas particles to swallow - pair task case.
+ *
+ * @param r The thread #runner.
+ * @param ci First #cell.
+ * @param cj Second #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci,
+                                struct cell *cj, int timer) {
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
+    error("Running pair task on foreign node");
+#endif
+
+  /* Run the swallowing loop only in the cell that is the neighbour of the
+   * active BH */
+  if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer);
+  if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer);
+}
+
+/**
+ * @brief Process all the BH particles in a cell that have been flagged for
+ * swallowing by a black hole.
+ *
+ * This is done by recursing down to the leaf-level and skipping the sub-cells
+ * that have not been drifted as they would not have any particles with
+ * swallowing flag. We then loop over the particles with a flag and look into
+ * the space-wide list of black holes for the particle with the corresponding
+ * ID. If found, the BH swallows the BH particle and the BH particle is
+ * removed. If the cell is local, we may be looking for a foreign BH, in which
+ * case, we do not update the BH (that will be done on its node) but just remove
+ * the BH particle.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) {
+
+  struct engine *e = r->e;
+  struct space *s = e->s;
+  struct bpart *bparts = s->bparts;
+  const size_t nr_bpart = s->nr_bparts;
+#ifdef WITH_MPI
+  struct bpart *bparts_foreign = s->bparts_foreign;
+  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
+#endif
+
+  struct bpart *cell_bparts = c->black_holes.parts;
+
+  /* Early abort?
+   * (We only want cells for which we drifted the BH as these are
+   * the only ones that could have BH particles that have been flagged
+   * for swallowing) */
+  if (c->black_holes.count == 0 ||
+      c->black_holes.ti_old_part != e->ti_current) {
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        runner_do_bh_swallow(r, cp, 0);
+      }
+    }
+  } else {
+
+    /* Loop over all the gas particles in the cell
+     * Note that the cell (and hence the bparts) may be local or foreign. */
+    const size_t nr_cell_bparts = c->black_holes.count;
+    for (size_t k = 0; k < nr_cell_bparts; k++) {
+
+      /* Get a handle on the part. */
+      struct bpart *const cell_bp = &cell_bparts[k];
+
+      /* Ignore inhibited particles (they have already been removed!) */
+      if (bpart_is_inhibited(cell_bp, e)) continue;
+
+      /* Get the ID of the black holes that will swallow this part */
+      const long long swallow_id =
+          black_holes_get_bpart_swallow_id(&cell_bp->merger_data);
+
+      /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */
+      /* 	      swallow_id); */
+
+      /* Has this particle been flagged for swallowing? */
+      if (swallow_id >= 0) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (cell_bp->ti_drift != e->ti_current)
+          error("Trying to swallow an un-drifted particle.");
+#endif
+
+        /* ID of the BH swallowing this particle */
+        const long long BH_id = swallow_id;
+
+        /* Have we found this particle's BH already? */
+        int found = 0;
+
+        /* Let's look for the hungry black hole in the local list */
+        for (size_t i = 0; i < nr_bpart; ++i) {
+
+          /* Get a handle on the bpart. */
+          struct bpart *bp = &bparts[i];
+
+          if (bp->id == BH_id) {
+
+            /* Lock the space as we are going to work directly on the bpart list
+             */
+            lock_lock(&s->lock);
+
+            /* Swallow the gas particle (i.e. update the BH properties) */
+            black_holes_swallow_bpart(bp, cell_bp, e->cosmology);
+
+            /* Release the space as we are done updating the bpart */
+            if (lock_unlock(&s->lock) != 0)
+              error("Failed to unlock the space.");
+
+            message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id);
+
+            /* If the gas particle is local, remove it */
+            if (c->nodeID == e->nodeID) {
+
+              message("BH %lld removing BH particle %lld", bp->id, cell_bp->id);
+
+              /* Finally, remove the gas particle from the system
+               * Recall that the gpart associated with it is also removed
+               * at the same time. */
+              cell_remove_bpart(e, c, cell_bp);
+            }
+
+            /* In any case, prevent the particle from being re-swallowed */
+            black_holes_mark_bpart_as_merged(&cell_bp->merger_data);
+
+            found = 1;
+            break;
+          }
+
+        } /* Loop over local BHs */
+
+#ifdef WITH_MPI
+
+        /* We could also be in the case of a local BH particle being
+         * swallowed by a foreign BH. In this case, we won't update the
+         * foreign BH but just remove the particle from the local list. */
+        if (c->nodeID == e->nodeID && !found) {
+
+          /* Let's look for the foreign hungry black hole */
+          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
+
+            /* Get a handle on the bpart. */
+            struct bpart *bp = &bparts_foreign[i];
+
+            if (bp->id == BH_id) {
+
+              message("BH %lld removing BH particle %lld (foreign BH case)",
+                      bp->id, cell_bp->id);
+
+              /* Finally, remove the gas particle from the system */
+              cell_remove_bpart(e, c, cell_bp);
+
+              found = 1;
+              break;
+            }
+          } /* Loop over foreign BHs */
+        }   /* Is the cell local? */
+#endif
+
+        /* If we have a local particle, we must have found the BH in one
+         * of our list of black holes. */
+        if (c->nodeID == e->nodeID && !found) {
+          error("BH particle %lld could not find BH %lld to be swallowed",
+                cell_bp->id, swallow_id);
+        }
+      } /* Part was flagged for swallowing */
+    }   /* Loop over the parts */
+  }     /* Cell is not split */
+}
+
+/**
+ * @brief Processing of bh particles to swallow - self task case.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
+  if (!cell_is_active_black_holes(c, r->e))
+    error("Running self task on inactive cell");
+#endif
+
+  runner_do_bh_swallow(r, c, timer);
+}
+
+/**
+ * @brief Processing of bh particles to swallow - pair task case.
+ *
+ * @param r The thread #runner.
+ * @param ci First #cell.
+ * @param cj Second #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci,
+                               struct cell *cj, int timer) {
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
+    error("Running pair task on foreign node");
+#endif
+
+  /* Run the swallowing loop only in the cell that is the neighbour of the
+   * active BH */
+  if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer);
+  if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer);
+}
diff --git a/src/runner_doiact_black_holes.c b/src/runner_doiact_black_holes.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c139eada6cf7403076194c42261948db5e0f7f4
--- /dev/null
+++ b/src/runner_doiact_black_holes.c
@@ -0,0 +1,53 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "runner.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/* Import the black hole density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_functions_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION swallow
+#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW
+#include "runner_doiact_functions_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_functions_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
diff --git a/src/runner_doiact_black_holes.h b/src/runner_doiact_black_holes.h
index ce159c7ac24a508bc625070ed50b3aad7dd9fa8d..763e557babb9ca94a05a28d1ea5ed0f1141684ff 100644
--- a/src/runner_doiact_black_holes.h
+++ b/src/runner_doiact_black_holes.h
@@ -85,852 +85,20 @@
 #define _IACT_BH_BH(f) PASTE(runner_iact_nonsym_bh_bh, f)
 #define IACT_BH_BH _IACT_BH_BH(FUNCTION)
 
-/**
- * @brief Calculate the number density of #part around the #bpart
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void DOSELF1_BH(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (c->black_holes.count == 0) return;
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  const int bcount = c->black_holes.count;
-  const int count = c->hydro.count;
-  struct bpart *restrict bparts = c->black_holes.parts;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-
-  /* Do we actually have any gas neighbours? */
-  if (c->hydro.count != 0) {
-
-    /* Loop over the bparts in ci. */
-    for (int bid = 0; bid < bcount; bid++) {
-
-      /* Get a hold of the ith bpart in ci. */
-      struct bpart *restrict bi = &bparts[bid];
-
-      /* Skip inactive particles */
-      if (!bpart_is_active(bi, e)) continue;
-
-      const float hi = bi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
-                            (float)(bi->x[1] - c->loc[1]),
-                            (float)(bi->x[2] - c->loc[2])};
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count; pjd++) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts[pjd];
-        struct xpart *restrict xpj = &xparts[pjd];
-        const float hj = pj->h;
-
-        /* Early abort? */
-        if (part_is_inhibited(pj, e)) continue;
-
-        /* Compute the pairwise distance. */
-        const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
-                              (float)(pj->x[1] - c->loc[1]),
-                              (float)(pj->x[2] - c->loc[2])};
-        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (bi->ti_drift != e->ti_current)
-          error("Particle bi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        if (r2 < hig2) {
-          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
-                      ti_current);
-        }
-      } /* loop over the parts in ci. */
-    }   /* loop over the bparts in ci. */
-  }     /* Do we have gas particles in the cell? */
-
-    /* When doing BH swallowing, we need a quick loop also over the BH
-     * neighbours */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-
-  /* Loop over the bparts in ci. */
-  for (int bid = 0; bid < bcount; bid++) {
-
-    /* Get a hold of the ith bpart in ci. */
-    struct bpart *restrict bi = &bparts[bid];
-
-    /* Skip inactive particles */
-    if (!bpart_is_active(bi, e)) continue;
-
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
-                          (float)(bi->x[1] - c->loc[1]),
-                          (float)(bi->x[2] - c->loc[2])};
-
-    /* Loop over the parts in cj. */
-    for (int bjd = 0; bjd < bcount; bjd++) {
-
-      /* Skip self interaction */
-      if (bid == bjd) continue;
-
-      /* Get a pointer to the jth particle. */
-      struct bpart *restrict bj = &bparts[bjd];
-      const float hj = bj->h;
-
-      /* Early abort? */
-      if (bpart_is_inhibited(bj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float bjx[3] = {(float)(bj->x[0] - c->loc[0]),
-                            (float)(bj->x[1] - c->loc[1]),
-                            (float)(bj->x[2] - c->loc[2])};
-      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (bi->ti_drift != e->ti_current)
-        error("Particle bi not drifted to current time");
-      if (bj->ti_drift != e->ti_current)
-        error("Particle bj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
-                   ti_current);
-      }
-    } /* loop over the bparts in ci. */
-  }   /* loop over the bparts in ci. */
-
-#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
-
-  TIMER_TOC(TIMER_DOSELF_BH);
-}
-
-/**
- * @brief Calculate the number density of cj #part around the ci #bpart
- *
- * @param r runner task
- * @param ci The first #cell
- * @param cj The second #cell
- */
-void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
-                              struct cell *restrict cj) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-  if (cj->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (ci->black_holes.count == 0) return;
-  if (!cell_is_active_black_holes(ci, e)) return;
-
-  const int bcount_i = ci->black_holes.count;
-  const int count_j = cj->hydro.count;
-  struct bpart *restrict bparts_i = ci->black_holes.parts;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  /* Do we actually have any gas neighbours? */
-  if (cj->hydro.count != 0) {
-
-    /* Loop over the bparts in ci. */
-    for (int bid = 0; bid < bcount_i; bid++) {
-
-      /* Get a hold of the ith bpart in ci. */
-      struct bpart *restrict bi = &bparts_i[bid];
-
-      /* Skip inactive particles */
-      if (!bpart_is_active(bi, e)) continue;
-
-      const float hi = bi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
-                            (float)(bi->x[1] - (cj->loc[1] + shift[1])),
-                            (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count_j; pjd++) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[pjd];
-        struct xpart *restrict xpj = &xparts_j[pjd];
-        const float hj = pj->h;
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        /* Compute the pairwise distance. */
-        const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
-                              (float)(pj->x[1] - cj->loc[1]),
-                              (float)(pj->x[2] - cj->loc[2])};
-        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (bi->ti_drift != e->ti_current)
-          error("Particle bi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        if (r2 < hig2) {
-          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
-                      ti_current);
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the bparts in ci. */
-  }     /* Do we have gas particles in the cell? */
-
-    /* When doing BH swallowing, we need a quick loop also over the BH
-     * neighbours */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-
-  const int bcount_j = cj->black_holes.count;
-  struct bpart *restrict bparts_j = cj->black_holes.parts;
-
-  /* Loop over the bparts in ci. */
-  for (int bid = 0; bid < bcount_i; bid++) {
-
-    /* Get a hold of the ith bpart in ci. */
-    struct bpart *restrict bi = &bparts_i[bid];
-
-    /* Skip inactive particles */
-    if (!bpart_is_active(bi, e)) continue;
-
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
-                          (float)(bi->x[1] - (cj->loc[1] + shift[1])),
-                          (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
-
-    /* Loop over the bparts in cj. */
-    for (int bjd = 0; bjd < bcount_j; bjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct bpart *restrict bj = &bparts_j[bjd];
-      const float hj = bj->h;
-
-      /* Skip inhibited particles. */
-      if (bpart_is_inhibited(bj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]),
-                            (float)(bj->x[1] - cj->loc[1]),
-                            (float)(bj->x[2] - cj->loc[2])};
-      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (bi->ti_drift != e->ti_current)
-        error("Particle bi not drifted to current time");
-      if (bj->ti_drift != e->ti_current)
-        error("Particle bj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
-                   ti_current);
-      }
-    } /* loop over the bparts in cj. */
-  }   /* loop over the bparts in ci. */
-
-#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
-}
-
-void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
-                      struct cell *restrict cj, int timer) {
-
-  TIMER_TIC;
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_bh = ci->nodeID == r->e->nodeID;
-  const int do_cj_bh = cj->nodeID == r->e->nodeID;
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_bh = cj->nodeID == r->e->nodeID;
-  const int do_cj_bh = ci->nodeID == r->e->nodeID;
-#else
-  /* The swallow task is executed on both sides */
-  const int do_ci_bh = 1;
-  const int do_cj_bh = 1;
-#endif
-
-  if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj);
-  if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci);
-
-  TIMER_TOC(TIMER_DOPAIR_BH);
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * Version using a brute-force algorithm.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts_i The #bpart to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- * @param cj The second #cell.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci,
-                             struct bpart *restrict bparts_i, int *restrict ind,
-                             const int bcount, struct cell *restrict cj,
-                             const double *shift) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  const int count_j = cj->hydro.count;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Early abort? */
-  if (count_j == 0) return;
-
-  /* Loop over the parts_i. */
-  for (int bid = 0; bid < bcount; bid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct bpart *restrict bi = &bparts_i[ind[bid]];
-
-    const double bix = bi->x[0] - (shift[0]);
-    const double biy = bi->x[1] - (shift[1]);
-    const double biz = bi->x[2] - (shift[2]);
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!bpart_is_active(bi, e))
-      error("Trying to correct smoothing length of inactive particle !");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Skip inhibited particles */
-      if (part_is_inhibited(pj, e)) continue;
-
-      const double pjx = pj->x[0];
-      const double pjy = pj->x[1];
-      const double pjz = pj->x[2];
-      const float hj = pj->h;
-
-      /* Compute the pairwise distance. */
-      float dx[3] = {(float)(bix - pjx), (float)(biy - pjy),
-                     (float)(biz - pjz)};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
-                    ti_current);
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts The #bpart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- */
-void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci,
-                       struct bpart *restrict bparts, int *restrict ind,
-                       const int bcount) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  const int count_i = ci->hydro.count;
-  struct part *restrict parts_j = ci->hydro.parts;
-  struct xpart *restrict xparts_j = ci->hydro.xparts;
+void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c);
+void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj);
 
-  /* Early abort? */
-  if (count_i == 0) return;
-
-  /* Loop over the parts in ci. */
-  for (int bid = 0; bid < bcount; bid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct bpart *bi = &bparts[ind[bid]];
-    const float bix[3] = {(float)(bi->x[0] - ci->loc[0]),
-                          (float)(bi->x[1] - ci->loc[1]),
-                          (float)(bi->x[2] - ci->loc[2])};
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_i; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Early abort? */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
-                            (float)(pj->x[1] - ci->loc[1]),
-                            (float)(pj->x[2] - ci->loc[2])};
-      float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo,
-                    e->gravity_properties, ti_current);
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
+void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer);
+void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj,
+                    int gettimer);
 
-/**
- * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called
- * depending on the optimisation level.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts The #bpart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- */
 void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
                               struct bpart *restrict bparts, int *restrict ind,
-                              const int bcount) {
-
-  DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount);
-}
-
-/**
- * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called
- * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH
- * needs to be called at all.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts_i The #bpart to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- * @param cj The second #cell.
- */
+                              const int bcount);
 void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
                               struct bpart *restrict bparts_i,
                               int *restrict ind, int const bcount,
-                              struct cell *restrict cj) {
-
-  const struct engine *e = r->e;
-
-  /* Anything to do here? */
-  if (cj->hydro.count == 0) return;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift);
-}
+                              struct cell *restrict cj);
 
 void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts,
-                     int *ind, const int bcount, struct cell *cj,
-                     int gettimer) {
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-
-  /* Should we even bother? */
-  if (!cell_is_active_black_holes(ci, e) &&
-      (cj == NULL || !cell_is_active_black_holes(cj, e)))
-    return;
-
-  /* Find out in which sub-cell of ci the parts are. */
-  struct cell *sub = NULL;
-  if (ci->split) {
-    for (int k = 0; k < 8; k++) {
-      if (ci->progeny[k] != NULL) {
-        if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] &&
-            &bparts[ind[0]] <
-                &ci->progeny[k]
-                     ->black_holes.parts[ci->progeny[k]->black_holes.count]) {
-          sub = ci->progeny[k];
-          break;
-        }
-      }
-    }
-  }
-
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_self_black_holes_task(ci)) {
-
-      /* Loop over all progeny. */
-      DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0);
-      for (int j = 0; j < 8; j++)
-        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
-          DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0);
-
-    }
-
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount);
-  } /* self-interaction. */
-
-  /* Otherwise, it's a pair interaction. */
-  else {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
-        cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
-
-      /* Get the type of pair and flip ci/cj if needed. */
-      double shift[3] = {0.0, 0.0, 0.0};
-      const int sid = space_getsid(s, &ci, &cj, shift);
-
-      struct cell_split_pair *csp = &cell_split_pairs[sid];
-      for (int k = 0; k < csp->count; k++) {
-        const int pid = csp->pairs[k].pid;
-        const int pjd = csp->pairs[k].pjd;
-        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
-          DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount,
-                          cj->progeny[pjd], 0);
-        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
-          DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount,
-                          ci->progeny[pid], 0);
-      }
-    }
-
-    /* Otherwise, compute the pair directly. */
-    else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) {
-
-      /* Do any of the cells need to be drifted first? */
-      if (cell_is_active_black_holes(ci, e)) {
-        if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!");
-        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
-      }
-
-      DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj);
-    }
-
-  } /* otherwise, pair interaction. */
-}
-
-/**
- * @brief Determine which version of DOSELF1_BH needs to be called depending
- * on the optimisation level.
- *
- * @param r #runner
- * @param c #cell c
- *
- */
-void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) {
-
-  const struct engine *restrict e = r->e;
-
-  /* Anything to do here? */
-  if (c->black_holes.count == 0) return;
-
-  /* Anything to do here? */
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  /* Did we mess up the recursion? */
-  if (c->black_holes.h_max_old * kernel_gamma > c->dmin)
-    error("Cell smaller than smoothing length");
-
-  DOSELF1_BH(r, c, 1);
-}
-
-/**
- * @brief Determine which version of DOPAIR1_BH needs to be called depending
- * on the orientation of the cells or whether DOPAIR1_BH needs to be called
- * at all.
- *
- * @param r #runner
- * @param ci #cell ci
- * @param cj #cell cj
- *
- */
-void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) {
-
-  const struct engine *restrict e = r->e;
-
-  const int ci_active = cell_is_active_black_holes(ci, e);
-  const int cj_active = cell_is_active_black_holes(cj, e);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_bh = ci->nodeID == e->nodeID;
-  const int do_cj_bh = cj->nodeID == e->nodeID;
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_bh = cj->nodeID == e->nodeID;
-  const int do_cj_bh = ci->nodeID == e->nodeID;
-#else
-  /* The swallow task is executed on both sides */
-  const int do_ci_bh = 1;
-  const int do_cj_bh = 1;
-#endif
-
-  const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 &&
-                     ci_active && do_ci_bh);
-  const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 &&
-                     cj_active && do_cj_bh);
-
-  /* Anything to do here? */
-  if (!do_ci && !do_cj) return;
-
-  /* Check that cells are drifted. */
-  if (do_ci &&
-      (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  if (do_cj &&
-      (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  /* No sorted intreactions here -> use the naive ones */
-  DOPAIR1_BH_NAIVE(r, ci, cj, 1);
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for pairs
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param gettimer Do we have a timer ?
- *
- * @todo Hard-code the sid on the recursive calls to avoid the
- * redundant computations to find the sid on-the-fly.
- */
-void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj,
-                    int gettimer) {
-
-  TIMER_TIC;
-
-  struct space *s = r->e->s;
-  const struct engine *e = r->e;
-
-  /* Should we even bother?
-   * In the swallow case we care about BH-BH and BH-gas
-   * interactions.
-   * In all other cases only BH-gas so we can abort if there is
-   * is no gas in the cell */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-  const int should_do_ci =
-      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
-  const int should_do_cj =
-      cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e);
-#else
-  const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 &&
-                           cell_is_active_black_holes(ci, e);
-  const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 &&
-                           cell_is_active_black_holes(cj, e);
-
-#endif
-
-  if (!should_do_ci && !should_do_cj) return;
-
-  /* Get the type of pair and flip ci/cj if needed. */
-  double shift[3];
-  const int sid = space_getsid(s, &ci, &cj, shift);
-
-  /* Recurse? */
-  if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
-      cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
-    struct cell_split_pair *csp = &cell_split_pairs[sid];
-    for (int k = 0; k < csp->count; k++) {
-      const int pid = csp->pairs[k].pid;
-      const int pjd = csp->pairs[k].pjd;
-      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
-        DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0);
-    }
-  }
-
-  /* Otherwise, compute the pair directly. */
-  else {
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-    const int do_ci_bh = ci->nodeID == e->nodeID;
-    const int do_cj_bh = cj->nodeID == e->nodeID;
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-    /* Here we are updating the hydro -> switch ci, cj */
-    const int do_ci_bh = cj->nodeID == e->nodeID;
-    const int do_cj_bh = ci->nodeID == e->nodeID;
-#else
-    /* Here we perform the task on both sides */
-    const int do_ci_bh = 1;
-    const int do_cj_bh = 1;
-#endif
-
-    const int do_ci = ci->black_holes.count != 0 &&
-                      cell_is_active_black_holes(ci, e) && do_ci_bh;
-    const int do_cj = cj->black_holes.count != 0 &&
-                      cell_is_active_black_holes(cj, e) && do_cj_bh;
-
-    if (do_ci) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (!cell_are_bpart_drifted(ci, e))
-        error("Interacting undrifted cells (bparts).");
-
-      if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e))
-        error("Interacting undrifted cells (parts).");
-    }
-
-    if (do_cj) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
-        error("Interacting undrifted cells (parts).");
-
-      if (!cell_are_bpart_drifted(cj, e))
-        error("Interacting undrifted cells (bparts).");
-    }
-
-    if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_PAIR_BH);
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for self tasks
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param gettimer Do we have a timer ?
- */
-void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) {
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank)
-    error("This function should not be called on foreign cells");
-#endif
-
-    /* Should we even bother?
-     * In the swallow case we care about BH-BH and BH-gas
-     * interactions.
-     * In all other cases only BH-gas so we can abort if there is
-     * is no gas in the cell */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-  const int should_do_ci =
-      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
-#else
-  const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 &&
-                           cell_is_active_black_holes(ci, e);
-#endif
-
-  if (!should_do_ci) return;
-
-  /* Recurse? */
-  if (cell_can_recurse_in_self_black_holes_task(ci)) {
-
-    /* Loop over all progeny. */
-    for (int k = 0; k < 8; k++)
-      if (ci->progeny[k] != NULL) {
-        DOSUB_SELF1_BH(r, ci->progeny[k], 0);
-        for (int j = k + 1; j < 8; j++)
-          if (ci->progeny[j] != NULL)
-            DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0);
-      }
-  }
-
-  /* Otherwise, compute self-interaction. */
-  else {
-
-    /* Check we did drift to the current time */
-    if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell.");
-
-    if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
-      error("Interacting undrifted cells (bparts).");
-
-    DOSELF1_BRANCH_BH(r, ci);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_SELF_BH);
-}
+                     int *ind, const int bcount, struct cell *cj, int gettimer);
diff --git a/src/runner_doiact_functions_black_holes.h b/src/runner_doiact_functions_black_holes.h
new file mode 100644
index 0000000000000000000000000000000000000000..f8af37c751a9f7a89455ae5c9a7ef72ec55a1c64
--- /dev/null
+++ b/src/runner_doiact_functions_black_holes.h
@@ -0,0 +1,877 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Before including this file, define FUNCTION, which is the
+   name of the interaction function. This creates the interaction functions
+   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
+   and runner_dosub_FUNCTION calling the pairwise interaction function
+   runner_iact_FUNCTION. */
+
+#include "runner_doiact_black_holes.h"
+
+/**
+ * @brief Calculate the number density of #part around the #bpart
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void DOSELF1_BH(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (c->black_holes.count == 0) return;
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  const int bcount = c->black_holes.count;
+  const int count = c->hydro.count;
+  struct bpart *restrict bparts = c->black_holes.parts;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  /* Do we actually have any gas neighbours? */
+  if (c->hydro.count != 0) {
+
+    /* Loop over the bparts in ci. */
+    for (int bid = 0; bid < bcount; bid++) {
+
+      /* Get a hold of the ith bpart in ci. */
+      struct bpart *restrict bi = &bparts[bid];
+
+      /* Skip inactive particles */
+      if (!bpart_is_active(bi, e)) continue;
+
+      const float hi = bi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
+                            (float)(bi->x[1] - c->loc[1]),
+                            (float)(bi->x[2] - c->loc[2])};
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count; pjd++) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts[pjd];
+        struct xpart *restrict xpj = &xparts[pjd];
+        const float hj = pj->h;
+
+        /* Early abort? */
+        if (part_is_inhibited(pj, e)) continue;
+
+        /* Compute the pairwise distance. */
+        const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
+                              (float)(pj->x[1] - c->loc[1]),
+                              (float)(pj->x[2] - c->loc[2])};
+        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (bi->ti_drift != e->ti_current)
+          error("Particle bi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        if (r2 < hig2) {
+          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
+                      ti_current);
+        }
+      } /* loop over the parts in ci. */
+    }   /* loop over the bparts in ci. */
+  }     /* Do we have gas particles in the cell? */
+
+    /* When doing BH swallowing, we need a quick loop also over the BH
+     * neighbours */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+
+  /* Loop over the bparts in ci. */
+  for (int bid = 0; bid < bcount; bid++) {
+
+    /* Get a hold of the ith bpart in ci. */
+    struct bpart *restrict bi = &bparts[bid];
+
+    /* Skip inactive particles */
+    if (!bpart_is_active(bi, e)) continue;
+
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
+                          (float)(bi->x[1] - c->loc[1]),
+                          (float)(bi->x[2] - c->loc[2])};
+
+    /* Loop over the parts in cj. */
+    for (int bjd = 0; bjd < bcount; bjd++) {
+
+      /* Skip self interaction */
+      if (bid == bjd) continue;
+
+      /* Get a pointer to the jth particle. */
+      struct bpart *restrict bj = &bparts[bjd];
+      const float hj = bj->h;
+
+      /* Early abort? */
+      if (bpart_is_inhibited(bj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float bjx[3] = {(float)(bj->x[0] - c->loc[0]),
+                            (float)(bj->x[1] - c->loc[1]),
+                            (float)(bj->x[2] - c->loc[2])};
+      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (bi->ti_drift != e->ti_current)
+        error("Particle bi not drifted to current time");
+      if (bj->ti_drift != e->ti_current)
+        error("Particle bj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
+                   ti_current);
+      }
+    } /* loop over the bparts in ci. */
+  }   /* loop over the bparts in ci. */
+
+#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
+
+  TIMER_TOC(TIMER_DOSELF_BH);
+}
+
+/**
+ * @brief Calculate the number density of cj #part around the ci #bpart
+ *
+ * @param r runner task
+ * @param ci The first #cell
+ * @param cj The second #cell
+ */
+void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
+                              struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+  if (cj->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (ci->black_holes.count == 0) return;
+  if (!cell_is_active_black_holes(ci, e)) return;
+
+  const int bcount_i = ci->black_holes.count;
+  const int count_j = cj->hydro.count;
+  struct bpart *restrict bparts_i = ci->black_holes.parts;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Do we actually have any gas neighbours? */
+  if (cj->hydro.count != 0) {
+
+    /* Loop over the bparts in ci. */
+    for (int bid = 0; bid < bcount_i; bid++) {
+
+      /* Get a hold of the ith bpart in ci. */
+      struct bpart *restrict bi = &bparts_i[bid];
+
+      /* Skip inactive particles */
+      if (!bpart_is_active(bi, e)) continue;
+
+      const float hi = bi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
+                            (float)(bi->x[1] - (cj->loc[1] + shift[1])),
+                            (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count_j; pjd++) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts_j[pjd];
+        struct xpart *restrict xpj = &xparts_j[pjd];
+        const float hj = pj->h;
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        /* Compute the pairwise distance. */
+        const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
+                              (float)(pj->x[1] - cj->loc[1]),
+                              (float)(pj->x[2] - cj->loc[2])};
+        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (bi->ti_drift != e->ti_current)
+          error("Particle bi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        if (r2 < hig2) {
+          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
+                      ti_current);
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the bparts in ci. */
+  }     /* Do we have gas particles in the cell? */
+
+    /* When doing BH swallowing, we need a quick loop also over the BH
+     * neighbours */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+
+  const int bcount_j = cj->black_holes.count;
+  struct bpart *restrict bparts_j = cj->black_holes.parts;
+
+  /* Loop over the bparts in ci. */
+  for (int bid = 0; bid < bcount_i; bid++) {
+
+    /* Get a hold of the ith bpart in ci. */
+    struct bpart *restrict bi = &bparts_i[bid];
+
+    /* Skip inactive particles */
+    if (!bpart_is_active(bi, e)) continue;
+
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
+                          (float)(bi->x[1] - (cj->loc[1] + shift[1])),
+                          (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
+
+    /* Loop over the bparts in cj. */
+    for (int bjd = 0; bjd < bcount_j; bjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct bpart *restrict bj = &bparts_j[bjd];
+      const float hj = bj->h;
+
+      /* Skip inhibited particles. */
+      if (bpart_is_inhibited(bj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]),
+                            (float)(bj->x[1] - cj->loc[1]),
+                            (float)(bj->x[2] - cj->loc[2])};
+      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (bi->ti_drift != e->ti_current)
+        error("Particle bi not drifted to current time");
+      if (bj->ti_drift != e->ti_current)
+        error("Particle bj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
+                   ti_current);
+      }
+    } /* loop over the bparts in cj. */
+  }   /* loop over the bparts in ci. */
+
+#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
+}
+
+void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
+                      struct cell *restrict cj, int timer) {
+
+  TIMER_TIC;
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_bh = ci->nodeID == r->e->nodeID;
+  const int do_cj_bh = cj->nodeID == r->e->nodeID;
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_bh = cj->nodeID == r->e->nodeID;
+  const int do_cj_bh = ci->nodeID == r->e->nodeID;
+#else
+  /* The swallow task is executed on both sides */
+  const int do_ci_bh = 1;
+  const int do_cj_bh = 1;
+#endif
+
+  if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj);
+  if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci);
+
+  TIMER_TOC(TIMER_DOPAIR_BH);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * Version using a brute-force algorithm.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts_i The #bpart to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ * @param cj The second #cell.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci,
+                             struct bpart *restrict bparts_i, int *restrict ind,
+                             const int bcount, struct cell *restrict cj,
+                             const double *shift) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  const int count_j = cj->hydro.count;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Early abort? */
+  if (count_j == 0) return;
+
+  /* Loop over the parts_i. */
+  for (int bid = 0; bid < bcount; bid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct bpart *restrict bi = &bparts_i[ind[bid]];
+
+    const double bix = bi->x[0] - (shift[0]);
+    const double biy = bi->x[1] - (shift[1]);
+    const double biz = bi->x[2] - (shift[2]);
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!bpart_is_active(bi, e))
+      error("Trying to correct smoothing length of inactive particle !");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Skip inhibited particles */
+      if (part_is_inhibited(pj, e)) continue;
+
+      const double pjx = pj->x[0];
+      const double pjy = pj->x[1];
+      const double pjz = pj->x[2];
+      const float hj = pj->h;
+
+      /* Compute the pairwise distance. */
+      float dx[3] = {(float)(bix - pjx), (float)(biy - pjy),
+                     (float)(biz - pjz)};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
+                    ti_current);
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts The #bpart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci,
+                       struct bpart *restrict bparts, int *restrict ind,
+                       const int bcount) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  const int count_i = ci->hydro.count;
+  struct part *restrict parts_j = ci->hydro.parts;
+  struct xpart *restrict xparts_j = ci->hydro.xparts;
+
+  /* Early abort? */
+  if (count_i == 0) return;
+
+  /* Loop over the parts in ci. */
+  for (int bid = 0; bid < bcount; bid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct bpart *bi = &bparts[ind[bid]];
+    const float bix[3] = {(float)(bi->x[0] - ci->loc[0]),
+                          (float)(bi->x[1] - ci->loc[1]),
+                          (float)(bi->x[2] - ci->loc[2])};
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_i; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Early abort? */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
+                            (float)(pj->x[1] - ci->loc[1]),
+                            (float)(pj->x[2] - ci->loc[2])};
+      float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo,
+                    e->gravity_properties, ti_current);
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called
+ * depending on the optimisation level.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts The #bpart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
+                              struct bpart *restrict bparts, int *restrict ind,
+                              const int bcount) {
+
+  DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount);
+}
+
+/**
+ * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called
+ * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH
+ * needs to be called at all.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts_i The #bpart to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
+                              struct bpart *restrict bparts_i,
+                              int *restrict ind, int const bcount,
+                              struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  /* Anything to do here? */
+  if (cj->hydro.count == 0) return;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift);
+}
+
+void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts,
+                     int *ind, const int bcount, struct cell *cj,
+                     int gettimer) {
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+
+  /* Should we even bother? */
+  if (!cell_is_active_black_holes(ci, e) &&
+      (cj == NULL || !cell_is_active_black_holes(cj, e)))
+    return;
+
+  /* Find out in which sub-cell of ci the parts are. */
+  struct cell *sub = NULL;
+  if (ci->split) {
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL) {
+        if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] &&
+            &bparts[ind[0]] <
+                &ci->progeny[k]
+                     ->black_holes.parts[ci->progeny[k]->black_holes.count]) {
+          sub = ci->progeny[k];
+          break;
+        }
+      }
+    }
+  }
+
+  /* Is this a single cell? */
+  if (cj == NULL) {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_self_black_holes_task(ci)) {
+
+      /* Loop over all progeny. */
+      DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0);
+      for (int j = 0; j < 8; j++)
+        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
+          DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0);
+
+    }
+
+    /* Otherwise, compute self-interaction. */
+    else
+      DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount);
+  } /* self-interaction. */
+
+  /* Otherwise, it's a pair interaction. */
+  else {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
+        cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
+
+      /* Get the type of pair and flip ci/cj if needed. */
+      double shift[3] = {0.0, 0.0, 0.0};
+      const int sid = space_getsid(s, &ci, &cj, shift);
+
+      struct cell_split_pair *csp = &cell_split_pairs[sid];
+      for (int k = 0; k < csp->count; k++) {
+        const int pid = csp->pairs[k].pid;
+        const int pjd = csp->pairs[k].pjd;
+        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
+          DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount,
+                          cj->progeny[pjd], 0);
+        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
+          DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount,
+                          ci->progeny[pid], 0);
+      }
+    }
+
+    /* Otherwise, compute the pair directly. */
+    else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) {
+
+      /* Do any of the cells need to be drifted first? */
+      if (cell_is_active_black_holes(ci, e)) {
+        if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!");
+        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
+      }
+
+      DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj);
+    }
+
+  } /* otherwise, pair interaction. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_BH needs to be called depending
+ * on the optimisation level.
+ *
+ * @param r #runner
+ * @param c #cell c
+ *
+ */
+void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Anything to do here? */
+  if (c->black_holes.count == 0) return;
+
+  /* Anything to do here? */
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  /* Did we mess up the recursion? */
+  if (c->black_holes.h_max_old * kernel_gamma > c->dmin)
+    error("Cell smaller than smoothing length");
+
+  DOSELF1_BH(r, c, 1);
+}
+
+/**
+ * @brief Determine which version of DOPAIR1_BH needs to be called depending
+ * on the orientation of the cells or whether DOPAIR1_BH needs to be called
+ * at all.
+ *
+ * @param r #runner
+ * @param ci #cell ci
+ * @param cj #cell cj
+ *
+ */
+void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *restrict e = r->e;
+
+  const int ci_active = cell_is_active_black_holes(ci, e);
+  const int cj_active = cell_is_active_black_holes(cj, e);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_bh = ci->nodeID == e->nodeID;
+  const int do_cj_bh = cj->nodeID == e->nodeID;
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_bh = cj->nodeID == e->nodeID;
+  const int do_cj_bh = ci->nodeID == e->nodeID;
+#else
+  /* The swallow task is executed on both sides */
+  const int do_ci_bh = 1;
+  const int do_cj_bh = 1;
+#endif
+
+  const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 &&
+                     ci_active && do_ci_bh);
+  const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 &&
+                     cj_active && do_cj_bh);
+
+  /* Anything to do here? */
+  if (!do_ci && !do_cj) return;
+
+  /* Check that cells are drifted. */
+  if (do_ci &&
+      (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  if (do_cj &&
+      (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  /* No sorted intreactions here -> use the naive ones */
+  DOPAIR1_BH_NAIVE(r, ci, cj, 1);
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for pairs
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param gettimer Do we have a timer ?
+ *
+ * @todo Hard-code the sid on the recursive calls to avoid the
+ * redundant computations to find the sid on-the-fly.
+ */
+void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj,
+                    int gettimer) {
+
+  TIMER_TIC;
+
+  struct space *s = r->e->s;
+  const struct engine *e = r->e;
+
+  /* Should we even bother?
+   * In the swallow case we care about BH-BH and BH-gas
+   * interactions.
+   * In all other cases only BH-gas so we can abort if there is
+   * is no gas in the cell */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+  const int should_do_ci =
+      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
+  const int should_do_cj =
+      cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e);
+#else
+  const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 &&
+                           cell_is_active_black_holes(ci, e);
+  const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 &&
+                           cell_is_active_black_holes(cj, e);
+
+#endif
+
+  if (!should_do_ci && !should_do_cj) return;
+
+  /* Get the type of pair and flip ci/cj if needed. */
+  double shift[3];
+  const int sid = space_getsid(s, &ci, &cj, shift);
+
+  /* Recurse? */
+  if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
+      cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
+    struct cell_split_pair *csp = &cell_split_pairs[sid];
+    for (int k = 0; k < csp->count; k++) {
+      const int pid = csp->pairs[k].pid;
+      const int pjd = csp->pairs[k].pjd;
+      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
+        DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0);
+    }
+  }
+
+  /* Otherwise, compute the pair directly. */
+  else {
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+    const int do_ci_bh = ci->nodeID == e->nodeID;
+    const int do_cj_bh = cj->nodeID == e->nodeID;
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+    /* Here we are updating the hydro -> switch ci, cj */
+    const int do_ci_bh = cj->nodeID == e->nodeID;
+    const int do_cj_bh = ci->nodeID == e->nodeID;
+#else
+    /* Here we perform the task on both sides */
+    const int do_ci_bh = 1;
+    const int do_cj_bh = 1;
+#endif
+
+    const int do_ci = ci->black_holes.count != 0 &&
+                      cell_is_active_black_holes(ci, e) && do_ci_bh;
+    const int do_cj = cj->black_holes.count != 0 &&
+                      cell_is_active_black_holes(cj, e) && do_cj_bh;
+
+    if (do_ci) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (!cell_are_bpart_drifted(ci, e))
+        error("Interacting undrifted cells (bparts).");
+
+      if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e))
+        error("Interacting undrifted cells (parts).");
+    }
+
+    if (do_cj) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
+        error("Interacting undrifted cells (parts).");
+
+      if (!cell_are_bpart_drifted(cj, e))
+        error("Interacting undrifted cells (bparts).");
+    }
+
+    if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_PAIR_BH);
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) {
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank)
+    error("This function should not be called on foreign cells");
+#endif
+
+    /* Should we even bother?
+     * In the swallow case we care about BH-BH and BH-gas
+     * interactions.
+     * In all other cases only BH-gas so we can abort if there is
+     * is no gas in the cell */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+  const int should_do_ci =
+      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
+#else
+  const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 &&
+                           cell_is_active_black_holes(ci, e);
+#endif
+
+  if (!should_do_ci) return;
+
+  /* Recurse? */
+  if (cell_can_recurse_in_self_black_holes_task(ci)) {
+
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF1_BH(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0);
+      }
+  }
+
+  /* Otherwise, compute self-interaction. */
+  else {
+
+    /* Check we did drift to the current time */
+    if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell.");
+
+    if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
+      error("Interacting undrifted cells (bparts).");
+
+    DOSELF1_BRANCH_BH(r, ci);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_SELF_BH);
+}
diff --git a/src/runner_doiact.h b/src/runner_doiact_functions_hydro.h
similarity index 96%
rename from src/runner_doiact.h
rename to src/runner_doiact_functions_hydro.h
index 8aabb05d177385c6bbee1a91eb2ea231ccbca3e4..c324c759b5acc9db75cf0849d0e417b2141978f4 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact_functions_hydro.h
@@ -24,106 +24,7 @@
    and runner_dosub_FUNCTION calling the pairwise interaction function
    runner_iact_FUNCTION. */
 
-#define PASTE(x, y) x##_##y
-
-#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
-#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)
-
-#define _DOPAIR1(f) PASTE(runner_dopair1, f)
-#define DOPAIR1 _DOPAIR1(FUNCTION)
-
-#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
-#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)
-
-#define _DOPAIR2(f) PASTE(runner_dopair2, f)
-#define DOPAIR2 _DOPAIR2(FUNCTION)
-
-#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
-#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
-
-#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f)
-#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION)
-
-#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
-#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)
-
-#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
-#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)
-
-#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
-#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)
-
-#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
-#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
-
-#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f)
-#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION)
-
-#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
-#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
-
-#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f)
-#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION)
-
-#define _DOSELF1(f) PASTE(runner_doself1, f)
-#define DOSELF1 _DOSELF1(FUNCTION)
-
-#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f)
-#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION)
-
-#define _DOSELF2(f) PASTE(runner_doself2, f)
-#define DOSELF2 _DOSELF2(FUNCTION)
-
-#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
-#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
-
-#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f)
-#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION)
-
-#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
-#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)
-
-#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
-#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)
-
-#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
-#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
-
-#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
-#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
-
-#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
-#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
-
-#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
-#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
-
-#define _IACT(f) PASTE(runner_iact, f)
-#define IACT _IACT(FUNCTION)
-
-#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
-#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
-
-#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
-#define IACT_VEC _IACT_VEC(FUNCTION)
-
-#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
-#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
-
-#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
-#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
-
-#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
-#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)
-
-#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
-#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
-
-#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
-#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
-
-#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
-#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
+#include "runner_doiact_hydro.h"
 
 /**
  * @brief Compute the interactions between a cell pair (non-symmetric case).
diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0d731857e9b4b0474e47c3ac3fca540eecb1cbb
--- /dev/null
+++ b/src/runner_doiact_functions_stars.h
@@ -0,0 +1,1332 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Before including this file, define FUNCTION, which is the
+   name of the interaction function. This creates the interaction functions
+   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
+   and runner_dosub_FUNCTION calling the pairwise interaction function
+   runner_iact_FUNCTION. */
+
+#include "runner_doiact_stars.h"
+
+/**
+ * @brief Calculate the number density of #part around the #spart
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (c->hydro.count == 0 || c->stars.count == 0) return;
+  if (!cell_is_active_stars(c, e)) return;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int scount = c->stars.count;
+  const int count = c->hydro.count;
+  struct spart *restrict sparts = c->stars.parts;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  /* Loop over the sparts in ci. */
+  for (int sid = 0; sid < scount; sid++) {
+
+    /* Get a hold of the ith spart in ci. */
+    struct spart *restrict si = &sparts[sid];
+
+    /* Skip inactive particles */
+    if (!spart_is_active(si, e)) continue;
+
+    /* Skip inactive particles */
+    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
+
+    const float hi = si->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float six[3] = {(float)(si->x[0] - c->loc[0]),
+                          (float)(si->x[1] - c->loc[1]),
+                          (float)(si->x[2] - c->loc[2])};
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts[pjd];
+      struct xpart *restrict xpj = &xparts[pjd];
+      const float hj = pj->h;
+
+      /* Early abort? */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
+                            (float)(pj->x[1] - c->loc[1]),
+                            (float)(pj->x[2] - c->loc[2])};
+      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                            ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                          ti_current);
+#endif
+      }
+    } /* loop over the parts in ci. */
+  }   /* loop over the sparts in ci. */
+
+  TIMER_TOC(TIMER_DOSELF_STARS);
+}
+
+/**
+ * @brief Calculate the number density of cj #part around the ci #spart
+ *
+ * @param r runner task
+ * @param ci The first #cell
+ * @param cj The second #cell
+ */
+void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
+                                 struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#else
+  if (cj->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+#endif
+
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (cj->hydro.count == 0 || ci->stars.count == 0) return;
+  if (!cell_is_active_stars(ci, e)) return;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int scount_i = ci->stars.count;
+  const int count_j = cj->hydro.count;
+  struct spart *restrict sparts_i = ci->stars.parts;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Loop over the sparts in ci. */
+  for (int sid = 0; sid < scount_i; sid++) {
+
+    /* Get a hold of the ith spart in ci. */
+    struct spart *restrict si = &sparts_i[sid];
+
+    /* Skip inactive particles */
+    if (!spart_is_active(si, e)) continue;
+
+    /* Skip inactive particles */
+    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
+
+    const float hi = si->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])),
+                          (float)(si->x[1] - (cj->loc[1] + shift[1])),
+                          (float)(si->x[2] - (cj->loc[2] + shift[2]))};
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+      const float hj = pj->h;
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
+                            (float)(pj->x[1] - cj->loc[1]),
+                            (float)(pj->x[2] - cj->loc[2])};
+      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                            ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                          ti_current);
+#endif
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Compute the interactions between a cell pair.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param sid The direction of the pair.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
+                        const int sid, const double *shift) {
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  /* Get the cutoff shift. */
+  double rshift = 0.0;
+  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) &&
+                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
+  const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) &&
+                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
+#else
+  /* here we are updating the hydro -> switch ci, cj for local */
+  const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) &&
+                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
+  const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) &&
+                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
+#endif
+
+  if (do_ci_stars) {
+
+    /* Pick-out the sorted lists. */
+    const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
+    const struct sort_entry *restrict sort_i = ci->stars.sort[sid];
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Some constants used to checks that the parts are in the right frame */
+    const float shift_threshold_x =
+        2. * ci->width[0] +
+        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
+    const float shift_threshold_y =
+        2. * ci->width[1] +
+        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
+    const float shift_threshold_z =
+        2. * ci->width[2] +
+        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
+#endif /* SWIFT_DEBUG_CHECKS */
+
+    /* Get some other useful values. */
+    const double hi_max = ci->stars.h_max * kernel_gamma - rshift;
+    const int count_i = ci->stars.count;
+    const int count_j = cj->hydro.count;
+    struct spart *restrict sparts_i = ci->stars.parts;
+    struct part *restrict parts_j = cj->hydro.parts;
+    struct xpart *restrict xparts_j = cj->hydro.xparts;
+    const double dj_min = sort_j[0].d;
+    const float dx_max_rshift =
+        (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift;
+    const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort);
+
+    /* Loop over the sparts in ci. */
+    for (int pid = count_i - 1;
+         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
+
+      /* Get a hold of the ith part in ci. */
+      struct spart *restrict spi = &sparts_i[sort_i[pid].i];
+      const float hi = spi->h;
+
+      /* Skip inactive particles */
+      if (!spart_is_active(spi, e)) continue;
+
+      /* Skip inactive particles */
+      if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue;
+
+      /* Compute distance from the other cell. */
+      const double px[3] = {spi->x[0], spi->x[1], spi->x[2]};
+      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
+                   px[2] * runner_shift[sid][2];
+
+      /* Is there anything we need to interact with ? */
+      const double di = dist + hi * kernel_gamma + dx_max_rshift;
+      if (di < dj_min) continue;
+
+      /* Get some additional information about pi */
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float pix = spi->x[0] - (cj->loc[0] + shift[0]);
+      const float piy = spi->x[1] - (cj->loc[1] + shift[1]);
+      const float piz = spi->x[2] - (cj->loc[2] + shift[2]);
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+
+        /* Recover pj */
+        struct part *pj = &parts_j[sort_j[pjd].i];
+        struct xpart *xpj = &xparts_j[sort_j[pjd].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        const float hj = pj->h;
+        const float pjx = pj->x[0] - cj->loc[0];
+        const float pjy = pj->x[1] - cj->loc[1];
+        const float pjz = pj->x[2] - cj->loc[2];
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles are in the correct frame after the shifts */
+        if (pix > shift_threshold_x || pix < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
+              pix, ci->width[0]);
+        if (piy > shift_threshold_y || piy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
+              piy, ci->width[1]);
+        if (piz > shift_threshold_z || piz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
+              piz, ci->width[2]);
+        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
+              pjx, ci->width[0]);
+        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
+              pjy, ci->width[1]);
+        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
+              pjz, ci->width[2]);
+
+        /* Check that particles have been drifted to the current time */
+        if (spi->ti_drift != e->ti_current)
+          error("Particle spi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hig2) {
+          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the parts in ci. */
+  }     /* do_ci_stars */
+
+  if (do_cj_stars) {
+    /* Pick-out the sorted lists. */
+    const struct sort_entry *restrict sort_i = ci->hydro.sort[sid];
+    const struct sort_entry *restrict sort_j = cj->stars.sort[sid];
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Some constants used to checks that the parts are in the right frame */
+    const float shift_threshold_x =
+        2. * ci->width[0] +
+        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
+    const float shift_threshold_y =
+        2. * ci->width[1] +
+        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
+    const float shift_threshold_z =
+        2. * ci->width[2] +
+        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
+#endif /* SWIFT_DEBUG_CHECKS */
+
+    /* Get some other useful values. */
+    const double hj_max = cj->hydro.h_max * kernel_gamma;
+    const int count_i = ci->hydro.count;
+    const int count_j = cj->stars.count;
+    struct part *restrict parts_i = ci->hydro.parts;
+    struct xpart *restrict xparts_i = ci->hydro.xparts;
+    struct spart *restrict sparts_j = cj->stars.parts;
+    const double di_max = sort_i[count_i - 1].d - rshift;
+    const float dx_max_rshift =
+        (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift;
+    const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort);
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
+         pjd++) {
+
+      /* Get a hold of the jth part in cj. */
+      struct spart *spj = &sparts_j[sort_j[pjd].i];
+      const float hj = spj->h;
+
+      /* Skip inactive particles */
+      if (!spart_is_active(spj, e)) continue;
+
+      /* Skip inactive particles */
+      if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue;
+
+      /* Compute distance from the other cell. */
+      const double px[3] = {spj->x[0], spj->x[1], spj->x[2]};
+      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
+                   px[2] * runner_shift[sid][2];
+
+      /* Is there anything we need to interact with ? */
+      const double dj = dist - hj * kernel_gamma - dx_max_rshift;
+      if (dj - rshift > di_max) continue;
+
+      /* Get some additional information about pj */
+      const float hjg2 = hj * hj * kernel_gamma2;
+      const float pjx = spj->x[0] - cj->loc[0];
+      const float pjy = spj->x[1] - cj->loc[1];
+      const float pjz = spj->x[2] - cj->loc[2];
+
+      /* Loop over the parts in ci. */
+      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
+
+        /* Recover pi */
+        struct part *pi = &parts_i[sort_i[pid].i];
+        struct xpart *xpi = &xparts_i[sort_i[pid].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pi, e)) continue;
+
+        const float hi = pi->h;
+        const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
+        const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
+        const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles are in the correct frame after the shifts */
+        if (pix > shift_threshold_x || pix < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
+              pix, ci->width[0]);
+        if (piy > shift_threshold_y || piy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
+              piy, ci->width[1]);
+        if (piz > shift_threshold_z || piz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
+              piz, ci->width[2]);
+        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
+              pjx, ci->width[0]);
+        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
+              pjy, ci->width[1]);
+        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
+              pjz, ci->width[2]);
+
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (spj->ti_drift != e->ti_current)
+          error("Particle spj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hjg2) {
+
+          IACT_STARS(r2, dx, hj, hi, spj, pi, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in ci. */
+    }   /* loop over the parts in cj. */
+  }     /* Cell cj is active */
+
+  TIMER_TOC(TIMER_DOPAIR_STARS);
+}
+
+void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
+                         struct cell *restrict cj, int timer) {
+
+  TIMER_TIC;
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_stars = ci->nodeID == r->e->nodeID;
+  const int do_cj_stars = cj->nodeID == r->e->nodeID;
+#else
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_stars = cj->nodeID == r->e->nodeID;
+  const int do_cj_stars = ci->nodeID == r->e->nodeID;
+#endif
+  if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0)
+    DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj);
+  if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0)
+    DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci);
+
+  TIMER_TOC(TIMER_DOPAIR_STARS);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * Version using a brute-force algorithm.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ * @param cj The second #cell.
+ * @param sid The direction of the pair.
+ * @param flipped Flag to check whether the cells have been flipped or not.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
+                          struct spart *restrict sparts_i, int *restrict ind,
+                          int scount, struct cell *restrict cj, const int sid,
+                          const int flipped, const double *shift) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int count_j = cj->hydro.count;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Early abort? */
+  if (count_j == 0) return;
+
+  /* Pick-out the sorted lists. */
+  const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
+  const float dxj = cj->hydro.dx_max_sort;
+
+  /* Sparts are on the left? */
+  if (!flipped) {
+
+    /* Loop over the sparts_i. */
+    for (int pid = 0; pid < scount; pid++) {
+
+      /* Get a hold of the ith spart in ci. */
+      struct spart *restrict spi = &sparts_i[ind[pid]];
+      const double pix = spi->x[0] - (shift[0]);
+      const double piy = spi->x[1] - (shift[1]);
+      const double piz = spi->x[2] - (shift[2]);
+      const float hi = spi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
+                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        const double pjx = pj->x[0];
+        const double pjy = pj->x[1];
+        const double pjz = pj->x[2];
+        const float hj = pj->h;
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
+                       (float)(piz - pjz)};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (spi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hig2) {
+          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the sparts in ci. */
+  }
+
+  /* Sparts are on the right. */
+  else {
+
+    /* Loop over the sparts_i. */
+    for (int pid = 0; pid < scount; pid++) {
+
+      /* Get a hold of the ith spart in ci. */
+      struct spart *restrict spi = &sparts_i[ind[pid]];
+      const double pix = spi->x[0] - (shift[0]);
+      const double piy = spi->x[1] - (shift[1]);
+      const double piz = spi->x[2] - (shift[2]);
+      const float hi = spi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
+                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
+
+      /* Loop over the parts in cj. */
+      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        const double pjx = pj->x[0];
+        const double pjy = pj->x[1];
+        const double pjz = pj->x[2];
+        const float hj = pj->h;
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
+                       (float)(piz - pjz)};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (spi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hig2) {
+          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the sparts in ci. */
+  }
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * Version using a brute-force algorithm.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ * @param cj The second #cell.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
+                                struct spart *restrict sparts_i,
+                                int *restrict ind, int scount,
+                                struct cell *restrict cj, const double *shift) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int count_j = cj->hydro.count;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Early abort? */
+  if (count_j == 0) return;
+
+  /* Loop over the parts_i. */
+  for (int pid = 0; pid < scount; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct spart *restrict spi = &sparts_i[ind[pid]];
+
+    const double pix = spi->x[0] - (shift[0]);
+    const double piy = spi->x[1] - (shift[1]);
+    const double piz = spi->x[2] - (shift[2]);
+    const float hi = spi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!spart_is_active(spi, e))
+      error("Trying to correct smoothing length of inactive particle !");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Skip inhibited particles */
+      if (part_is_inhibited(pj, e)) continue;
+
+      const double pjx = pj->x[0];
+      const double pjy = pj->x[1];
+      const double pjz = pj->x[2];
+      const float hj = pj->h;
+
+      /* Compute the pairwise distance. */
+      float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
+                     (float)(piz - pjz)};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                          ti_current);
+#endif
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts The #spart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
+                          struct spart *restrict sparts, int *restrict ind,
+                          int scount) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int count_i = ci->hydro.count;
+  struct part *restrict parts_j = ci->hydro.parts;
+  struct xpart *restrict xparts_j = ci->hydro.xparts;
+
+  /* Early abort? */
+  if (count_i == 0) return;
+
+  /* Loop over the parts in ci. */
+  for (int spid = 0; spid < scount; spid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct spart *spi = &sparts[ind[spid]];
+    const float spix[3] = {(float)(spi->x[0] - ci->loc[0]),
+                           (float)(spi->x[1] - ci->loc[1]),
+                           (float)(spi->x[2] - ci->loc[2])};
+    const float hi = spi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!spart_is_active(spi, e))
+      error("Inactive particle in subset function!");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_i; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Early abort? */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
+                            (float)(pj->x[1] - ci->loc[1]),
+                            (float)(pj->x[2] - ci->loc[2])};
+      float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj,
+                                            cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj,
+                                          cosmo, ti_current);
+#endif
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called
+ * depending on the optimisation level.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts The #spart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
+                                 struct spart *restrict sparts,
+                                 int *restrict ind, int scount) {
+
+  DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount);
+}
+
+/**
+ * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called
+ * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS
+ * needs to be called at all.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts_i The #spart to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
+                                 struct spart *restrict sparts_i,
+                                 int *restrict ind, int scount,
+                                 struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  /* Anything to do here? */
+  if (cj->hydro.count == 0) return;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
+  DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift);
+#else
+  /* Get the sorting index. */
+  int sid = 0;
+  for (int k = 0; k < 3; k++)
+    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
+                         ? 0
+                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
+
+  /* Switch the cells around? */
+  const int flipped = runner_flip[sid];
+  sid = sortlistID[sid];
+
+  /* Has the cell cj been sorted? */
+  if (!(cj->hydro.sorted & (1 << sid)) ||
+      cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)
+    error("Interacting unsorted cells.");
+
+  DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift);
+#endif
+}
+
+void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts,
+                        int *ind, int scount, struct cell *cj, int gettimer) {
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+
+  /* Should we even bother? */
+  if (!cell_is_active_stars(ci, e) &&
+      (cj == NULL || !cell_is_active_stars(cj, e)))
+    return;
+
+  /* Find out in which sub-cell of ci the parts are. */
+  struct cell *sub = NULL;
+  if (ci->split) {
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL) {
+        if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] &&
+            &sparts[ind[0]] <
+                &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) {
+          sub = ci->progeny[k];
+          break;
+        }
+      }
+    }
+  }
+
+  /* Is this a single cell? */
+  if (cj == NULL) {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_self_stars_task(ci)) {
+
+      /* Loop over all progeny. */
+      DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0);
+      for (int j = 0; j < 8; j++)
+        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
+          DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0);
+
+    }
+
+    /* Otherwise, compute self-interaction. */
+    else
+      DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount);
+  } /* self-interaction. */
+
+  /* Otherwise, it's a pair interaction. */
+  else {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
+        cell_can_recurse_in_pair_stars_task(cj, ci)) {
+
+      /* Get the type of pair and flip ci/cj if needed. */
+      double shift[3] = {0.0, 0.0, 0.0};
+      const int sid = space_getsid(s, &ci, &cj, shift);
+
+      struct cell_split_pair *csp = &cell_split_pairs[sid];
+      for (int k = 0; k < csp->count; k++) {
+        const int pid = csp->pairs[k].pid;
+        const int pjd = csp->pairs[k].pjd;
+        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
+          DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount,
+                             cj->progeny[pjd], 0);
+        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
+          DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount,
+                             ci->progeny[pid], 0);
+      }
+    }
+
+    /* Otherwise, compute the pair directly. */
+    else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) {
+
+      /* Do any of the cells need to be drifted first? */
+      if (cell_is_active_stars(ci, e)) {
+        if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!");
+        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
+      }
+
+      DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj);
+    }
+
+  } /* otherwise, pair interaction. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_STARS needs to be called depending
+ * on the optimisation level.
+ *
+ * @param r #runner
+ * @param c #cell c
+ *
+ */
+void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Anything to do here? */
+  if (c->stars.count == 0) return;
+
+  /* Anything to do here? */
+  if (!cell_is_active_stars(c, e)) return;
+
+  /* Did we mess up the recursion? */
+  if (c->stars.h_max_old * kernel_gamma > c->dmin)
+    error("Cell smaller than smoothing length");
+
+  DOSELF1_STARS(r, c, 1);
+}
+
+#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid)                          \
+  ({                                                                        \
+    const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid];          \
+                                                                            \
+    for (int pjd = 0; pjd < cj->TYPE.count; pjd++) {                        \
+      const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i];                \
+      if (PART##_is_inhibited(p, e)) continue;                              \
+                                                                            \
+      const float d = p->x[0] * runner_shift[sid][0] +                      \
+                      p->x[1] * runner_shift[sid][1] +                      \
+                      p->x[2] * runner_shift[sid][2];                       \
+      if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
+              1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) &&           \
+          (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
+              cj->width[0] * 1.0e-10)                                       \
+        error(                                                              \
+            "particle shift diff exceeds dx_max_sort in cell cj. "          \
+            "cj->nodeID=%d "                                                \
+            "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE                \
+            ".dx_max_sort=%e "                                              \
+            "cj->" #TYPE                                                    \
+            ".dx_max_sort_old=%e, cellID=%i super->cellID=%i"               \
+            "cj->depth=%d cj->maxdepth=%d",                                 \
+            cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \
+            cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID,  \
+            cj->depth, cj->maxdepth);                                       \
+    }                                                                       \
+  })
+
+/**
+ * @brief Determine which version of DOPAIR1_STARS needs to be called depending
+ * on the orientation of the cells or whether DOPAIR1_STARS needs to be called
+ * at all.
+ *
+ * @param r #runner
+ * @param ci #cell ci
+ * @param cj #cell cj
+ *
+ */
+void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Get the sort ID. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
+
+  const int ci_active = cell_is_active_stars(ci, e);
+  const int cj_active = cell_is_active_stars(cj, e);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_stars = ci->nodeID == e->nodeID;
+  const int do_cj_stars = cj->nodeID == e->nodeID;
+#else
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_stars = cj->nodeID == e->nodeID;
+  const int do_cj_stars = ci->nodeID == e->nodeID;
+#endif
+  const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 &&
+                     ci_active && do_ci_stars);
+  const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 &&
+                     cj_active && do_cj_stars);
+
+  /* Anything to do here? */
+  if (!do_ci && !do_cj) return;
+
+  /* Check that cells are drifted. */
+  if (do_ci &&
+      (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  /* Have the cells been sorted? */
+  if (do_ci && (!(ci->stars.sorted & (1 << sid)) ||
+                ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin))
+    error("Interacting unsorted cells.");
+
+  if (do_ci && (!(cj->hydro.sorted & (1 << sid)) ||
+                cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin))
+    error("Interacting unsorted cells.");
+
+  if (do_cj &&
+      (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  /* Have the cells been sorted? */
+  if (do_cj && (!(ci->hydro.sorted & (1 << sid)) ||
+                ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin))
+    error("Interacting unsorted cells.");
+
+  if (do_cj && (!(cj->stars.sorted & (1 << sid)) ||
+                cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin))
+    error("Interacting unsorted cells.");
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (do_ci) {
+    // MATTHIEU: This test is faulty. To be fixed...
+    // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid);
+    RUNNER_CHECK_SORT(stars, spart, ci, cj, sid);
+  }
+
+  if (do_cj) {
+    // MATTHIEU: This test is faulty. To be fixed...
+    // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid);
+    RUNNER_CHECK_SORT(stars, spart, cj, ci, sid);
+  }
+#endif /* SWIFT_DEBUG_CHECKS */
+
+#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
+  DOPAIR1_STARS_NAIVE(r, ci, cj, 1);
+#else
+  DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift);
+#endif
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for pairs
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param gettimer Do we have a timer ?
+ *
+ * @todo Hard-code the sid on the recursive calls to avoid the
+ * redundant computations to find the sid on-the-fly.
+ */
+void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
+                       int gettimer) {
+
+  TIMER_TIC;
+
+  struct space *s = r->e->s;
+  const struct engine *e = r->e;
+
+  /* Should we even bother? */
+  const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
+                           cell_is_active_stars(ci, e);
+  const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
+                           cell_is_active_stars(cj, e);
+  if (!should_do_ci && !should_do_cj) return;
+
+  /* Get the type of pair and flip ci/cj if needed. */
+  double shift[3];
+  const int sid = space_getsid(s, &ci, &cj, shift);
+
+  /* Recurse? */
+  if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
+      cell_can_recurse_in_pair_stars_task(cj, ci)) {
+    struct cell_split_pair *csp = &cell_split_pairs[sid];
+    for (int k = 0; k < csp->count; k++) {
+      const int pid = csp->pairs[k].pid;
+      const int pjd = csp->pairs[k].pjd;
+      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
+        DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0);
+    }
+  }
+
+  /* Otherwise, compute the pair directly. */
+  else {
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+    const int do_ci_stars = ci->nodeID == e->nodeID;
+    const int do_cj_stars = cj->nodeID == e->nodeID;
+#else
+    /* here we are updating the hydro -> switch ci, cj */
+    const int do_ci_stars = cj->nodeID == e->nodeID;
+    const int do_cj_stars = ci->nodeID == e->nodeID;
+#endif
+    const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
+                      cell_is_active_stars(ci, e) && do_ci_stars;
+    const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
+                      cell_is_active_stars(cj, e) && do_cj_stars;
+
+    if (do_ci) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (!cell_are_spart_drifted(ci, e))
+        error("Interacting undrifted cells (sparts).");
+
+      if (!cell_are_part_drifted(cj, e))
+        error("Interacting undrifted cells (parts).");
+
+      /* Do any of the cells need to be sorted first? */
+      if (!(ci->stars.sorted & (1 << sid)) ||
+          ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) {
+        error("Interacting unsorted cell (sparts).");
+      }
+
+      if (!(cj->hydro.sorted & (1 << sid)) ||
+          cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx)
+        error("Interacting unsorted cell (parts). %i", cj->nodeID);
+    }
+
+    if (do_cj) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (!cell_are_part_drifted(ci, e))
+        error("Interacting undrifted cells (parts).");
+
+      if (!cell_are_spart_drifted(cj, e))
+        error("Interacting undrifted cells (sparts).");
+
+      /* Do any of the cells need to be sorted first? */
+      if (!(ci->hydro.sorted & (1 << sid)) ||
+          ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) {
+        error("Interacting unsorted cell (parts).");
+      }
+
+      if (!(cj->stars.sorted & (1 << sid)) ||
+          cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) {
+        error("Interacting unsorted cell (sparts).");
+      }
+    }
+
+    if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_PAIR_STARS);
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) {
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank)
+    error("This function should not be called on foreign cells");
+#endif
+
+  /* Should we even bother? */
+  if (ci->hydro.count == 0 || ci->stars.count == 0 ||
+      !cell_is_active_stars(ci, r->e))
+    return;
+
+  /* Recurse? */
+  if (cell_can_recurse_in_self_stars_task(ci)) {
+
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF1_STARS(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0);
+      }
+  }
+
+  /* Otherwise, compute self-interaction. */
+  else {
+
+    /* Drift the cell to the current timestep if needed. */
+    if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell.");
+
+    DOSELF1_BRANCH_STARS(r, ci);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_SELF_STARS);
+}
diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c
new file mode 100644
index 0000000000000000000000000000000000000000..372b7524ecc743735b82c146984a1e2f14203c4d
--- /dev/null
+++ b/src/runner_doiact_grav.c
@@ -0,0 +1,1825 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "../config.h"
+
+/* This object's header. */
+#include "runner_doiact_grav.h"
+
+/* Local includes. */
+#include "active.h"
+#include "cell.h"
+#include "gravity.h"
+#include "gravity_cache.h"
+#include "gravity_iact.h"
+#include "inline.h"
+#include "part.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/**
+ * @brief Recursively propagate the multipoles down the tree by applying the
+ * L2L and L2P kernels.
+ *
+ * @param r The #runner.
+ * @param c The #cell we are working on.
+ * @param timer Are we timing this ?
+ */
+void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->grav.ti_old_multipole != e->ti_current)
+    error("c->multipole not drifted.");
+  if (c->grav.multipole->pot.ti_init != e->ti_current)
+    error("c->field tensor not initialised");
+#endif
+
+  if (c->split) {
+
+    /* Node case */
+
+    /* Add the field-tensor to all the 8 progenitors */
+    for (int k = 0; k < 8; ++k) {
+      struct cell *cp = c->progeny[k];
+
+      /* Do we have a progenitor with any active g-particles ? */
+      if (cp != NULL && cell_is_active_gravity(cp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (cp->grav.ti_old_multipole != e->ti_current)
+          error("cp->multipole not drifted.");
+        if (cp->grav.multipole->pot.ti_init != e->ti_current)
+          error("cp->field tensor not initialised");
+#endif
+        /* If the tensor received any contribution, push it down */
+        if (c->grav.multipole->pot.interacted) {
+
+          struct grav_tensor shifted_tensor;
+
+          /* Shift the field tensor */
+          gravity_L2L(&shifted_tensor, &c->grav.multipole->pot,
+                      cp->grav.multipole->CoM, c->grav.multipole->CoM);
+
+          /* Add it to this level's tensor */
+          gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor);
+        }
+
+        /* Recurse */
+        runner_do_grav_down(r, cp, 0);
+      }
+    }
+
+  } else {
+
+    /* Leaf case */
+
+    /* We can abort early if no interactions via multipole happened */
+    if (!c->grav.multipole->pot.interacted) return;
+
+    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
+
+    /* Cell properties */
+    struct gpart *gparts = c->grav.parts;
+    const int gcount = c->grav.count;
+    const struct grav_tensor *pot = &c->grav.multipole->pot;
+    const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1],
+                           c->grav.multipole->CoM[2]};
+
+    /* Apply accelerations to the particles */
+    for (int i = 0; i < gcount; ++i) {
+
+      /* Get a handle on the gpart */
+      struct gpart *gp = &gparts[i];
+
+      /* Update if active */
+      if (gpart_is_active(gp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (gp->ti_drift != e->ti_current)
+          error("gpart not drifted to current time");
+        if (c->grav.multipole->pot.ti_init != e->ti_current)
+          error("c->field tensor not initialised");
+
+        /* Check that we are not updated an inhibited particle */
+        if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!");
+
+        /* Check that the particle was initialised */
+        if (gp->initialised == 0)
+          error("Adding forces to an un-initialised gpart.");
+#endif
+        /* Apply the kernel */
+        gravity_L2P(pot, CoM, gp);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_dograv_down);
+}
+
+/**
+ * @brief Compute the non-truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param cj_cache #gravity_cache contaning the source particles.
+ * @param gcount_i The number of particles in the cell i.
+ * @param gcount_padded_j The number of particles in the cell j padded to the
+ * vector length.
+ * @param periodic Is the calculation using periodic BCs ?
+ * @param dim The size of the simulation volume.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gparts_j The #gpart in cell j (for debugging checks only).
+ * @param gcount_j The number of particles in the cell j (for debugging checks
+ * only).
+ */
+static INLINE void runner_dopair_grav_pp_full(
+    struct gravity_cache *restrict ci_cache,
+    struct gravity_cache *restrict cj_cache, const int gcount_i,
+    const int gcount_j, const int gcount_padded_j, const int periodic,
+    const float dim[3], const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    /* Skip particle that can use the multipole */
+    if (ci_cache->use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!gpart_is_active(&gparts_i[pid], e))
+      error("Inactive particle went through the cache");
+#endif
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded_j, VEC_SIZE);
+
+    /* Loop over every particle in the other cell. */
+    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+      /* Get info about j */
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      const float mass_j = cj_cache->m[pjd];
+      const float h_j = cj_cache->epsilon[pjd];
+
+      /* Compute the pairwise distance. */
+      float dx = x_j - x_i;
+      float dy = y_j - y_i;
+      float dz = z_j - z_i;
+
+      /* Correct for periodic BCs */
+      if (periodic) {
+        dx = nearestf(dx, dim[0]);
+        dy = nearestf(dy, dim[1]);
+        dz = nearestf(dz, dim[2]);
+      }
+
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts_i[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts_j[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts_i[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
+          mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts_i[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
+        gparts_i[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Compute the truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * This function only makes sense in periodic BCs.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param cj_cache #gravity_cache contaning the source particles.
+ * @param gcount_i The number of particles in the cell i.
+ * @param gcount_padded_j The number of particles in the cell j padded to the
+ * vector length.
+ * @param dim The size of the simulation volume.
+ * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gparts_j The #gpart in cell j (for debugging checks only).
+ * @param gcount_j The number of particles in the cell j (for debugging checks
+ * only).
+ */
+static INLINE void runner_dopair_grav_pp_truncated(
+    struct gravity_cache *restrict ci_cache,
+    struct gravity_cache *restrict cj_cache, const int gcount_i,
+    const int gcount_j, const int gcount_padded_j, const float dim[3],
+    const float r_s_inv, const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+#endif
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    /* Skip particle that can use the multipole */
+    if (ci_cache->use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!gpart_is_active(&gparts_i[pid], e))
+      error("Inactive particle went through the cache");
+#endif
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded_j, VEC_SIZE);
+
+    /* Loop over every particle in the other cell. */
+    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+      /* Get info about j */
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      const float mass_j = cj_cache->m[pjd];
+      const float h_j = cj_cache->epsilon[pjd];
+
+      /* Compute the pairwise distance. */
+      float dx = x_j - x_i;
+      float dy = y_j - y_i;
+      float dz = z_j - z_i;
+
+      /* Correct for periodic BCs */
+      dx = nearestf(dx, dim[0]);
+      dy = nearestf(dy, dim[1]);
+      dz = nearestf(dz, dim[2]);
+
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts_i[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts_j[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts_i[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
+          mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts_i[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
+                                    &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
+        gparts_i[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Compute the gravity interactions between all particles
+ * of a cell and the multipole of the other cell.
+ *
+ * The calculation is performedusing the pre-filled
+ * #gravity_cache structure. The loop over the i cache should auto-vectorize.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount_padded_i The number of particles in the cell i padded to the
+ * vector length.
+ * @param CoM_j Position of the #multipole in #cell j.
+ * @param multi_j The #multipole in #cell j.
+ * @param periodic Is the calculation using periodic BCs ?
+ * @param dim The size of the simulation volume.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gcount_i The number of particles in the cell i (for debugging checks
+ * only).
+ * @param cj The #cell j (for debugging checks only).
+ */
+static INLINE void runner_dopair_grav_pm_full(
+    struct gravity_cache *ci_cache, const int gcount_padded_i,
+    const float CoM_j[3], const struct multipole *restrict multi_j,
+    const int periodic, const float dim[3], const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const int gcount_i,
+    const struct cell *restrict cj) {
+
+  /* Make the compiler understand we are in happy vectorization land */
+  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, ci_cache->active,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_assume_size(gcount_padded_i, VEC_SIZE);
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_padded_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!active[pid]) continue;
+
+    /* Skip particle that cannot use the multipole */
+    if (!use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
+
+    /* Check that particles have been drifted to the current time */
+    if (gparts_i[pid].ti_drift != e->ti_current)
+      error("gpi not drifted to current time");
+
+    /* Check that we are not updated an inhibited particle */
+    if (gpart_is_inhibited(&gparts_i[pid], e))
+      error("Updating an inhibited particle!");
+
+    /* Check that the particle was initialised */
+    if (gparts_i[pid].initialised == 0)
+      error("Adding forces to an un-initialised gpart.");
+
+    if (pid >= gcount_i) error("Adding forces to padded particle");
+#endif
+
+    const float x_i = x[pid];
+    const float y_i = y[pid];
+    const float z_i = z[pid];
+
+    /* Some powers of the softening length */
+    const float h_i = epsilon[pid];
+    const float h_inv_i = 1.f / h_i;
+
+    /* Distance to the Multipole */
+    float dx = CoM_j[0] - x_i;
+    float dy = CoM_j[1] - y_i;
+    float dz = CoM_j[2] - z_i;
+
+    /* Apply periodic BCs? */
+    if (periodic) {
+      dx = nearestf(dx, dim[0]);
+      dy = nearestf(dy, dim[1]);
+      dz = nearestf(dz, dim[2]);
+    }
+
+    const float r2 = dx * dx + dy * dy + dz * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    const float r_max_j = cj->grav.multipole->r_max;
+    const float r_max2 = r_max_j * r_max_j;
+    const float theta_crit2 = e->gravity_properties->theta_crit2;
+
+    /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */
+    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
+      error(
+          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
+          "%e], rmax=%e r=%e epsilon=%e",
+          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i);
+#endif
+
+    /* Interact! */
+    float f_x, f_y, f_z, pot_ij;
+    runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
+                             &f_z, &pot_ij);
+
+    /* Store it back */
+    a_x[pid] += f_x;
+    a_y[pid] += f_y;
+    a_z[pid] += f_z;
+    pot[pid] += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Update the interaction counter */
+    if (pid < gcount_i)
+      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+#endif
+  }
+}
+
+/**
+ * @brief Compute the gravity interactions between all particles
+ * of a cell and the multipole of the other cell.
+ *
+ * The calculation is performedusing the pre-filled
+ * #gravity_cache structure. The loop over the i cache should auto-vectorize.
+ *
+ * This function only makes sense in periodic BCs.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount_padded_i The number of particles in the cell i padded to the
+ * vector length.
+ * @param CoM_j Position of the #multipole in #cell j.
+ * @param multi_j The #multipole in #cell j.
+ * @param dim The size of the simulation volume.
+ * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gcount_i The number of particles in the cell i (for debugging checks
+ * only).
+ * @param cj The #cell j (for debugging checks only).
+ */
+static INLINE void runner_dopair_grav_pm_truncated(
+    struct gravity_cache *ci_cache, const int gcount_padded_i,
+    const float CoM_j[3], const struct multipole *restrict multi_j,
+    const float dim[3], const float r_s_inv, const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const int gcount_i,
+    const struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+#endif
+
+  /* Make the compiler understand we are in happy vectorization land */
+  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, ci_cache->active,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_assume_size(gcount_padded_i, VEC_SIZE);
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_padded_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!active[pid]) continue;
+
+    /* Skip particle that cannot use the multipole */
+    if (!use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
+
+    /* Check that particles have been drifted to the current time */
+    if (gparts_i[pid].ti_drift != e->ti_current)
+      error("gpi not drifted to current time");
+
+    /* Check that we are not updated an inhibited particle */
+    if (gpart_is_inhibited(&gparts_i[pid], e))
+      error("Updating an inhibited particle!");
+
+    /* Check that the particle was initialised */
+    if (gparts_i[pid].initialised == 0)
+      error("Adding forces to an un-initialised gpart.");
+
+    if (pid >= gcount_i) error("Adding forces to padded particle");
+#endif
+
+    const float x_i = x[pid];
+    const float y_i = y[pid];
+    const float z_i = z[pid];
+
+    /* Some powers of the softening length */
+    const float h_i = epsilon[pid];
+    const float h_inv_i = 1.f / h_i;
+
+    /* Distance to the Multipole */
+    float dx = CoM_j[0] - x_i;
+    float dy = CoM_j[1] - y_i;
+    float dz = CoM_j[2] - z_i;
+
+    /* Apply periodic BCs */
+    dx = nearestf(dx, dim[0]);
+    dy = nearestf(dy, dim[1]);
+    dz = nearestf(dz, dim[2]);
+
+    const float r2 = dx * dx + dy * dy + dz * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    const float r_max_j = cj->grav.multipole->r_max;
+    const float r_max2 = r_max_j * r_max_j;
+    const float theta_crit2 = e->gravity_properties->theta_crit2;
+
+    /* 0.99 and 1.1 to avoid FP rounding false-positives */
+    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
+      error(
+          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
+          "%e], rmax=%e",
+          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j);
+#endif
+
+    /* Interact! */
+    float f_x, f_y, f_z, pot_ij;
+    runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv,
+                                  multi_j, &f_x, &f_y, &f_z, &pot_ij);
+
+    /* Store it back */
+    a_x[pid] += f_x;
+    a_y[pid] += f_y;
+    a_z[pid] += f_z;
+    pot[pid] += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Update the interaction counter */
+    if (pid < gcount_i)
+      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+#endif
+  }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * particles of another cell.
+ *
+ * This function switches between the full potential and the truncated one
+ * depending on needs. It will also use the M2P (multipole) interaction
+ * for the subset of particles in either cell for which the distance criterion
+ * is valid.
+ *
+ * This function starts by constructing the require #gravity_cache for both
+ * cells and then call the specialised functions doing the actual work on
+ * the caches. It then write the data back to the particles.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The other #cell.
+ * @param symmetric Are we updating both cells (1) or just ci (0) ?
+ * @param allow_mpole Are we allowing the use of P2M interactions ?
+ */
+INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci,
+                                  struct cell *cj, const int symmetric,
+                                  const int allow_mpole) {
+
+  /* Recover some useful constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
+                        (float)e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+  const double min_trunc = e->mesh->r_cut_min;
+
+  TIMER_TIC;
+
+  /* Record activity status */
+  const int ci_active =
+      cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID);
+  const int cj_active =
+      cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID);
+
+  /* Anything to do here? */
+  if (!ci_active && !cj_active) return;
+  if (!ci_active && !symmetric) return;
+
+  /* Check that we are not doing something stupid */
+  if (ci->split || cj->split) error("Running P-P on splitable cells");
+
+  /* Let's start by checking things are drifted */
+  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
+  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");
+  if (cj_active && ci->grav.ti_old_multipole != e->ti_current)
+    error("Un-drifted multipole");
+  if (ci_active && cj->grav.ti_old_multipole != e->ti_current)
+    error("Un-drifted multipole");
+
+  /* Caches to play with */
+  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
+
+  /* Shift to apply to the particles in each cell */
+  const double shift_i[3] = {0., 0., 0.};
+  const double shift_j[3] = {0., 0., 0.};
+
+  /* Recover the multipole info and shift the CoM locations */
+  const float rmax_i = ci->grav.multipole->r_max;
+  const float rmax_j = cj->grav.multipole->r_max;
+  const float rmax2_i = rmax_i * rmax_i;
+  const float rmax2_j = rmax_j * rmax_j;
+  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
+  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+  const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]),
+                          (float)(ci->grav.multipole->CoM[1] - shift_i[1]),
+                          (float)(ci->grav.multipole->CoM[2] - shift_i[2])};
+  const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]),
+                          (float)(cj->grav.multipole->CoM[1] - shift_j[1]),
+                          (float)(cj->grav.multipole->CoM[2] - shift_j[2])};
+
+  /* Start by constructing particle caches */
+
+  /* Computed the padded counts */
+  const int gcount_i = ci->grav.count;
+  const int gcount_j = cj->grav.count;
+  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we fit in cache */
+  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
+    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
+          gcount_j);
+#endif
+
+  /* Fill the caches */
+  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
+                         ci_cache, ci->grav.parts, gcount_i, gcount_padded_i,
+                         shift_i, CoM_j, rmax2_j, ci, e->gravity_properties);
+  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
+                         cj_cache, cj->grav.parts, gcount_j, gcount_padded_j,
+                         shift_j, CoM_i, rmax2_i, cj, e->gravity_properties);
+
+  /* Can we use the Newtonian version or do we need the truncated one ? */
+  if (!periodic) {
+
+    /* Not periodic -> Can always use Newtonian potential */
+
+    /* Let's updated the active cell(s) only */
+    if (ci_active) {
+
+      /* First the P2P */
+      runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
+                                 gcount_padded_j, periodic, dim, e,
+                                 ci->grav.parts, cj->grav.parts);
+
+      /* Then the M2P */
+      if (allow_mpole)
+        runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                   periodic, dim, e, ci->grav.parts, gcount_i,
+                                   cj);
+    }
+    if (cj_active && symmetric) {
+
+      /* First the P2P */
+      runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
+                                 gcount_padded_i, periodic, dim, e,
+                                 cj->grav.parts, ci->grav.parts);
+
+      /* Then the M2P */
+      if (allow_mpole)
+        runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
+                                   periodic, dim, e, cj->grav.parts, gcount_j,
+                                   ci);
+    }
+
+  } else { /* Periodic BC */
+
+    /* Get the relative distance between the CoMs */
+    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
+                          CoM_j[2] - CoM_i[2]};
+    const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+    /* Get the maximal distance between any two particles */
+    const double max_r = sqrt(r2) + rmax_i + rmax_j;
+
+    /* Do we need to use the truncated interactions ? */
+    if (max_r > min_trunc) {
+
+      /* Periodic but far-away cells must use the truncated potential */
+
+      /* Let's updated the active cell(s) only */
+      if (ci_active) {
+
+        /* First the (truncated) P2P */
+        runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j,
+                                        gcount_padded_j, dim, r_s_inv, e,
+                                        ci->grav.parts, cj->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j,
+                                          multi_j, dim, r_s_inv, e,
+                                          ci->grav.parts, gcount_i, cj);
+      }
+      if (cj_active && symmetric) {
+
+        /* First the (truncated) P2P */
+        runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i,
+                                        gcount_padded_i, dim, r_s_inv, e,
+                                        cj->grav.parts, ci->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i,
+                                          multi_i, dim, r_s_inv, e,
+                                          cj->grav.parts, gcount_j, ci);
+      }
+
+    } else {
+
+      /* Periodic but close-by cells can use the full Newtonian potential */
+
+      /* Let's updated the active cell(s) only */
+      if (ci_active) {
+
+        /* First the (Newtonian) P2P */
+        runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
+                                   gcount_padded_j, periodic, dim, e,
+                                   ci->grav.parts, cj->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                     periodic, dim, e, ci->grav.parts, gcount_i,
+                                     cj);
+      }
+      if (cj_active && symmetric) {
+
+        /* First the (Newtonian) P2P */
+        runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
+                                   gcount_padded_i, periodic, dim, e,
+                                   cj->grav.parts, ci->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
+                                     periodic, dim, e, cj->grav.parts, gcount_j,
+                                     ci);
+      }
+    }
+  }
+
+  /* Write back to the particles */
+  if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
+  if (cj_active && symmetric)
+    gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j);
+
+  TIMER_TOC(timer_dopair_grav_pp);
+}
+
+/**
+ * @brief Compute the non-truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount The number of particles in the cell.
+ * @param gcount_padded The number of particles in the cell padded to the
+ * vector length.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts The #gpart in the cell (for debugging checks only).
+ */
+static INLINE void runner_doself_grav_pp_full(
+    struct gravity_cache *restrict ci_cache, const int gcount,
+    const int gcount_padded, const struct engine *e, struct gpart *gparts) {
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded, VEC_SIZE);
+
+    /* Loop over every other particle in the cell. */
+    for (int pjd = 0; pjd < gcount_padded; pjd++) {
+
+      /* No self interaction */
+      if (pid == pjd) continue;
+
+      /* Get info about j */
+      const float x_j = ci_cache->x[pjd];
+      const float y_j = ci_cache->y[pjd];
+      const float z_j = ci_cache->z[pjd];
+      const float mass_j = ci_cache->m[pjd];
+      const float h_j = ci_cache->epsilon[pjd];
+
+      /* Compute the pairwise (square) distance. */
+      /* Note: no need for periodic wrapping inside a cell */
+      const float dx = x_j - x_i;
+      const float dy = y_j - y_i;
+      const float dz = z_j - z_i;
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
+        gparts[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Compute the truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * This function only makes sense in periodic BCs.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount The number of particles in the cell.
+ * @param gcount_padded The number of particles in the cell padded to the
+ * vector length.
+ * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts The #gpart in the cell (for debugging checks only).
+ */
+static INLINE void runner_doself_grav_pp_truncated(
+    struct gravity_cache *restrict ci_cache, const int gcount,
+    const int gcount_padded, const float r_s_inv, const struct engine *e,
+    struct gpart *gparts) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+#endif
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded, VEC_SIZE);
+
+    /* Loop over every other particle in the cell. */
+    for (int pjd = 0; pjd < gcount_padded; pjd++) {
+
+      /* No self interaction */
+      if (pid == pjd) continue;
+
+      /* Get info about j */
+      const float x_j = ci_cache->x[pjd];
+      const float y_j = ci_cache->y[pjd];
+      const float z_j = ci_cache->z[pjd];
+      const float mass_j = ci_cache->m[pjd];
+      const float h_j = ci_cache->epsilon[pjd];
+
+      /* Compute the pairwise (square) distance. */
+      /* Note: no need for periodic wrapping inside a cell */
+      const float dx = x_j - x_i;
+      const float dy = y_j - y_i;
+      const float dz = z_j - z_i;
+
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
+                                    &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
+        gparts[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * other ones.
+ *
+ * This function switches between the full potential and the truncated one
+ * depending on needs.
+ *
+ * This function starts by constructing the require #gravity_cache for the
+ * cell and then call the specialised functions doing the actual work on
+ * the cache. It then write the data back to the particles.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
+INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) {
+
+  /* Recover some useful constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float r_s_inv = e->mesh->r_s_inv;
+  const double min_trunc = e->mesh->r_cut_min;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
+#endif
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Check that we are not doing something stupid */
+  if (c->split) error("Running P-P on a splitable cell");
+
+  /* Do we need to start by drifting things ? */
+  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
+
+  /* Start by constructing a cache for the particles */
+  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+
+  /* Shift to apply to the particles in the cell */
+  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
+                         c->loc[1] + 0.5 * c->width[1],
+                         c->loc[2] + 0.5 * c->width[2]};
+
+  /* Computed the padded counts */
+  const int gcount = c->grav.count;
+  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we fit in cache */
+  if (gcount > ci_cache->count)
+    error("Not enough space in the cache! gcount=%d", gcount);
+#endif
+
+  /* Fill the cache */
+  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts,
+                                  gcount, gcount_padded, loc, c,
+                                  e->gravity_properties);
+
+  /* Can we use the Newtonian version or do we need the truncated one ? */
+  if (!periodic) {
+
+    /* Not periodic -> Can always use Newtonian potential */
+    runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
+                               c->grav.parts);
+
+  } else {
+
+    /* Get the maximal distance between any two particles */
+    const double max_r = 2. * c->grav.multipole->r_max;
+
+    /* Do we need to use the truncated interactions ? */
+    if (max_r > min_trunc) {
+
+      /* Periodic but far-away cells must use the truncated potential */
+      runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv,
+                                      e, c->grav.parts);
+
+    } else {
+
+      /* Periodic but close-by cells can use the full Newtonian potential */
+      runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
+                                 c->grav.parts);
+    }
+  }
+
+  /* Write back to the particles */
+  gravity_cache_write_back(ci_cache, c->grav.parts, gcount);
+
+  TIMER_TOC(timer_doself_grav_pp);
+}
+
+/**
+ * @brief Computes the interaction of the field tensor and multipole
+ * of two cells symmetrically.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
+                                                   struct cell *restrict ci,
+                                                   struct cell *restrict cj) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const struct gravity_props *props = e->gravity_properties;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) ||
+      (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank))
+    error("Invalid state in symmetric M-M calculation!");
+
+  /* Short-cut to the multipole */
+  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
+  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == cj) error("Interacting a cell with itself using M2L");
+
+  if (multi_i->num_gpart == 0)
+    error("Multipole i does not seem to have been set.");
+
+  if (multi_j->num_gpart == 0)
+    error("Multipole j does not seem to have been set.");
+
+  if (ci->grav.multipole->pot.ti_init != e->ti_current)
+    error("ci->grav tensor not initialised.");
+
+  if (ci->grav.multipole->pot.ti_init != e->ti_current)
+    error("cj->grav tensor not initialised.");
+
+  if (ci->grav.ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d "
+        "cj->nodeID=%d e->ti_current=%lld",
+        ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current);
+
+  if (cj->grav.ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
+        "ci->nodeID=%d e->ti_current=%lld",
+        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
+#endif
+
+  /* Let's interact at this level */
+  gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot,
+                        multi_i, multi_j, ci->grav.multipole->CoM,
+                        cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
+
+  TIMER_TOC(timer_dopair_grav_mm);
+}
+
+/**
+ * @brief Computes the interaction of the field tensor in a cell with the
+ * multipole of another cell.
+ *
+ * @param r The #runner.
+ * @param ci The #cell with field tensor to interact.
+ * @param cj The #cell with the multipole.
+ */
+static INLINE void runner_dopair_grav_mm_nonsym(
+    struct runner *r, struct cell *restrict ci,
+    const struct cell *restrict cj) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const struct gravity_props *props = e->gravity_properties;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return;
+
+  /* Short-cut to the multipole */
+  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == cj) error("Interacting a cell with itself using M2L");
+
+  if (multi_j->num_gpart == 0)
+    error("Multipole does not seem to have been set.");
+
+  if (ci->grav.multipole->pot.ti_init != e->ti_current)
+    error("ci->grav tensor not initialised.");
+
+  if (cj->grav.ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
+        "ci->nodeID=%d e->ti_current=%lld",
+        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
+#endif
+
+  /* Let's interact at this level */
+  gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM,
+                     cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
+
+  TIMER_TOC(timer_dopair_grav_mm);
+}
+
+/**
+ * @brief Call the M-M calculation on two cells if active.
+ *
+ * @param r The #runner object.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+static INLINE void runner_dopair_grav_mm(struct runner *r,
+                                         struct cell *restrict ci,
+                                         struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  /* What do we need to do? */
+  const int do_i =
+      cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID);
+  const int do_j =
+      cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID);
+
+  /* Do we need drifting first? */
+  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
+  if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e);
+
+  /* Interact! */
+  if (do_i && do_j)
+    runner_dopair_grav_mm_symmetric(r, ci, cj);
+  else if (do_i)
+    runner_dopair_grav_mm_nonsym(r, ci, cj);
+  else if (do_j)
+    runner_dopair_grav_mm_nonsym(r, cj, ci);
+}
+
+/**
+ * @brief Computes all the M-M interactions between all the well-separated (at
+ * rebuild) pairs of progenies of the two cells.
+ *
+ * @param r The #runner thread.
+ * @param flags The task flag containing the list of well-separated pairs as a
+ * bit-field.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags,
+                                     struct cell *restrict ci,
+                                     struct cell *restrict cj) {
+
+  /* Loop over all pairs of progenies */
+  for (int i = 0; i < 8; i++) {
+    if (ci->progeny[i] != NULL) {
+      for (int j = 0; j < 8; j++) {
+        if (cj->progeny[j] != NULL) {
+
+          struct cell *cpi = ci->progeny[i];
+          struct cell *cpj = cj->progeny[j];
+
+          const int flag = i * 8 + j;
+
+          /* Did we agree to use an M-M interaction here at the last rebuild? */
+          if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj);
+        }
+      }
+    }
+  }
+}
+
+static INLINE void runner_dopair_recursive_grav_pm(struct runner *r,
+                                                   struct cell *ci,
+                                                   const struct cell *cj) {
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
+                        (float)e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  /* Anything to do here? */
+  if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Early abort? */
+  if (ci->grav.count == 0 || cj->grav.count == 0)
+    error("Doing pair gravity on an empty cell !");
+
+  /* Sanity check */
+  if (ci == cj) error("Pair interaction between a cell and itself.");
+
+  if (cj->grav.ti_old_multipole != e->ti_current)
+    error("cj->grav.multipole not drifted.");
+#endif
+
+  /* Can we recurse further? */
+  if (ci->split) {
+
+    /* Loop over ci's children */
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL)
+        runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj);
+    }
+
+    /* Ok, let's do the interaction here */
+  } else {
+
+    /* Start by constructing particle caches */
+
+    /* Cache to play with */
+    struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+
+    /* Computed the padded counts */
+    const int gcount_i = ci->grav.count;
+    const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Check that we fit in cache */
+    if (gcount_i > ci_cache->count)
+      error("Not enough space in the cache! gcount_i=%d", gcount_i);
+#endif
+
+    /* Recover the multipole info and the CoM locations */
+    const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+    const float r_max = cj->grav.multipole->r_max;
+    const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]),
+                            (float)(cj->grav.multipole->CoM[1]),
+                            (float)(cj->grav.multipole->CoM[2])};
+
+    /* Fill the cache */
+    gravity_cache_populate_all_mpole(
+        e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i,
+        gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties);
+
+    /* Can we use the Newtonian version or do we need the truncated one ? */
+    if (!periodic) {
+
+      runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                 periodic, dim, e, ci->grav.parts, gcount_i,
+                                 cj);
+
+    } else {
+
+      runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                      dim, r_s_inv, e, ci->grav.parts, gcount_i,
+                                      cj);
+    }
+
+    /* Write back to the particles */
+    gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
+  }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * particles of another cell.
+ *
+ * This function will try to recurse as far down the tree as possible and only
+ * default to direct summation if there is no better option.
+ *
+ * If using periodic BCs, we will abort the recursion if th distance between the
+ * cells is larger than the set threshold.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The other #cell.
+ * @param gettimer Are we timing this ?
+ */
+void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
+                                  struct cell *cj, int gettimer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int nodeID = e->nodeID;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const double max_distance = e->mesh->r_cut_max;
+
+  /* Anything to do here? */
+  if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) ||
+        (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID)))
+    return;
+
+#ifdef SWIFT_DEBUG_CHECKS
+
+  const int gcount_i = ci->grav.count;
+  const int gcount_j = cj->grav.count;
+
+  /* Early abort? */
+  if (gcount_i == 0 || gcount_j == 0)
+    error("Doing pair gravity on an empty cell !");
+
+  /* Sanity check */
+  if (ci == cj) error("Pair interaction between a cell and itself.");
+
+  if (cell_is_active_gravity(ci, e) &&
+      ci->grav.ti_old_multipole != e->ti_current)
+    error("ci->grav.multipole not drifted.");
+  if (cell_is_active_gravity(cj, e) &&
+      cj->grav.ti_old_multipole != e->ti_current)
+    error("cj->grav.multipole not drifted.");
+#endif
+
+  TIMER_TIC;
+
+  /* Recover the multipole information */
+  struct gravity_tensors *const multi_i = ci->grav.multipole;
+  struct gravity_tensors *const multi_j = cj->grav.multipole;
+
+  /* Get the distance between the CoMs */
+  double dx = multi_i->CoM[0] - multi_j->CoM[0];
+  double dy = multi_i->CoM[1] - multi_j->CoM[1];
+  double dz = multi_i->CoM[2] - multi_j->CoM[2];
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+  const double r2 = dx * dx + dy * dy + dz * dz;
+
+  /* Minimal distance between any 2 particles in the two cells */
+  const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max);
+
+  /* Are we beyond the distance where the truncated forces are 0? */
+  if (periodic && r_lr_check > max_distance) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Need to account for the interactions we missed */
+    if (cell_is_active_gravity(ci, e))
+      multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+    if (cell_is_active_gravity(cj, e))
+      multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
+#endif
+    return;
+  }
+
+  /* OK, we actually need to compute this pair. Let's find the cheapest
+   * option... */
+
+  /* Can we use M-M interactions ? */
+  if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2,
+                         multi_i->m_pole.max_softening,
+                         multi_j->m_pole.max_softening)) {
+
+    /* Go M-M */
+    runner_dopair_grav_mm(r, ci, cj);
+
+  } else if (!ci->split && !cj->split) {
+
+    /* We have two leaves. Go P-P. */
+    runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1);
+
+  } else {
+
+    /* Alright, we'll have to split and recurse. */
+    /* We know at least one of ci and cj is splittable */
+
+    const double ri_max = multi_i->r_max;
+    const double rj_max = multi_j->r_max;
+
+    /* Split the larger of the two cells and start over again */
+    if (ri_max > rj_max) {
+
+      /* Can we actually split that interaction ? */
+      if (ci->split) {
+
+        /* Loop over ci's children */
+        for (int k = 0; k < 8; k++) {
+          if (ci->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
+        }
+
+      } else {
+        /* cj is split */
+
+        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
+
+        /* Loop over cj's children */
+        for (int k = 0; k < 8; k++) {
+          if (cj->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
+        }
+      }
+    } else {
+
+      /* Can we actually split that interaction ? */
+      if (cj->split) {
+
+        /* Loop over cj's children */
+        for (int k = 0; k < 8; k++) {
+          if (cj->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
+        }
+
+      } else {
+        /* ci is split */
+
+        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
+
+        /* Loop over ci's children */
+        for (int k = 0; k < 8; k++) {
+          if (ci->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
+        }
+      }
+    }
+  }
+
+  if (gettimer) TIMER_TOC(timer_dosub_pair_grav);
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell.
+ *
+ * This function will try to recurse as far down the tree as possible and only
+ * default to direct summation if there is no better option.
+ *
+ * @param r The #runner.
+ * @param c The first #cell.
+ * @param gettimer Are we timing this ?
+ */
+void runner_doself_recursive_grav(struct runner *r, struct cell *c,
+                                  int gettimer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Early abort? */
+  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
+#endif
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* If the cell is split, interact each progeny with itself, and with
+     each of its siblings. */
+  if (c->split) {
+
+    for (int j = 0; j < 8; j++) {
+      if (c->progeny[j] != NULL) {
+
+        runner_doself_recursive_grav(r, c->progeny[j], 0);
+
+        for (int k = j + 1; k < 8; k++) {
+          if (c->progeny[k] != NULL) {
+
+            runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0);
+          }
+        }
+      }
+    }
+  }
+
+  /* If the cell is not split, then just go for it... */
+  else {
+
+    runner_doself_grav_pp(r, c);
+  }
+
+  if (gettimer) TIMER_TOC(timer_dosub_self_grav);
+}
+
+/**
+ * @brief Performs all M-M interactions between a given top-level cell and all
+ * the other top-levels that are far enough.
+ *
+ * @param r The thread #runner.
+ * @param ci The #cell of interest.
+ * @param timer Are we timing this ?
+ */
+void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max;
+
+  TIMER_TIC;
+
+  /* Recover the list of top-level cells */
+  struct cell *cells = e->s->cells_top;
+  int *cells_with_particles = e->s->cells_with_particles_top;
+  const int nr_cells_with_particles = e->s->nr_cells_with_particles;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(ci, e)) return;
+
+  if (ci->nodeID != engine_rank)
+    error("Non-local cell in long-range gravity task!");
+
+  /* Check multipole has been drifted */
+  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
+
+  /* Get this cell's multipole information */
+  struct gravity_tensors *const multi_i = ci->grav.multipole;
+
+  /* Find this cell's top-level (great-)parent */
+  struct cell *top = ci;
+  while (top->parent != NULL) top = top->parent;
+
+  /* Recover the top-level multipole (for distance checks) */
+  struct gravity_tensors *const multi_top = top->grav.multipole;
+  const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0],
+                                     multi_top->CoM_rebuild[1],
+                                     multi_top->CoM_rebuild[2]};
+
+  /* Loop over all the top-level cells and go for a M-M interaction if
+   * well-separated */
+  for (int n = 0; n < nr_cells_with_particles; ++n) {
+
+    /* Handle on the top-level cell and it's gravity business*/
+    const struct cell *cj = &cells[cells_with_particles[n]];
+    const struct gravity_tensors *const multi_j = cj->grav.multipole;
+
+    /* Avoid self contributions */
+    if (top == cj) continue;
+
+    /* Skip empty cells */
+    if (multi_j->m_pole.M_000 == 0.f) continue;
+
+    /* Can we escape early in the periodic BC case? */
+    if (periodic) {
+
+      /* Minimal distance between any pair of particles */
+      const double min_radius2 =
+          cell_min_dist2_same_size(top, cj, periodic, dim);
+
+      /* Are we beyond the distance where the truncated forces are 0 ?*/
+      if (min_radius2 > max_distance2) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Need to account for the interactions we missed */
+        multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+#endif
+
+        /* Record that this multipole received a contribution */
+        multi_i->pot.interacted = 1;
+
+        /* We are done here. */
+        continue;
+      }
+    }
+
+    /* Get the distance between the CoMs at the last rebuild*/
+    double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0];
+    double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1];
+    double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2];
+
+    /* Apply BC */
+    if (periodic) {
+      dx_r = nearest(dx_r, dim[0]);
+      dy_r = nearest(dy_r, dim[1]);
+      dz_r = nearest(dz_r, dim[2]);
+    }
+    const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r;
+
+    /* Are we in charge of this cell pair? */
+    if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild,
+                           theta_crit2, r2_rebuild,
+                           multi_top->m_pole.max_softening,
+                           multi_j->m_pole.max_softening)) {
+
+      /* Call the PM interaction fucntion on the active sub-cells of ci */
+      runner_dopair_grav_mm_nonsym(r, ci, cj);
+      // runner_dopair_recursive_grav_pm(r, ci, cj);
+
+      /* Record that this multipole received a contribution */
+      multi_i->pot.interacted = 1;
+
+    } /* We are in charge of this pair */
+  }   /* Loop over top-level cells */
+
+  if (timer) TIMER_TOC(timer_dograv_long_range);
+}
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index b4ee8225a7aada8cf595ae7bca251d61b5226f64..34f3e9ec147574357620cc8f485889b87880f06e 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -20,1810 +20,30 @@
 #ifndef SWIFT_RUNNER_DOIACT_GRAV_H
 #define SWIFT_RUNNER_DOIACT_GRAV_H
 
-/* Includes. */
-#include "active.h"
-#include "cell.h"
-#include "gravity.h"
-#include "gravity_cache.h"
-#include "gravity_iact.h"
-#include "inline.h"
-#include "part.h"
-#include "space_getsid.h"
-#include "timers.h"
+#include "../config.h"
 
-/**
- * @brief Recursively propagate the multipoles down the tree by applying the
- * L2L and L2P kernels.
- *
- * @param r The #runner.
- * @param c The #cell we are working on.
- * @param timer Are we timing this ?
- */
-static INLINE void runner_do_grav_down(struct runner *r, struct cell *c,
-                                       int timer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->grav.ti_old_multipole != e->ti_current)
-    error("c->multipole not drifted.");
-  if (c->grav.multipole->pot.ti_init != e->ti_current)
-    error("c->field tensor not initialised");
-#endif
-
-  if (c->split) {
-
-    /* Node case */
-
-    /* Add the field-tensor to all the 8 progenitors */
-    for (int k = 0; k < 8; ++k) {
-      struct cell *cp = c->progeny[k];
-
-      /* Do we have a progenitor with any active g-particles ? */
-      if (cp != NULL && cell_is_active_gravity(cp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (cp->grav.ti_old_multipole != e->ti_current)
-          error("cp->multipole not drifted.");
-        if (cp->grav.multipole->pot.ti_init != e->ti_current)
-          error("cp->field tensor not initialised");
-#endif
-        /* If the tensor received any contribution, push it down */
-        if (c->grav.multipole->pot.interacted) {
-
-          struct grav_tensor shifted_tensor;
-
-          /* Shift the field tensor */
-          gravity_L2L(&shifted_tensor, &c->grav.multipole->pot,
-                      cp->grav.multipole->CoM, c->grav.multipole->CoM);
-
-          /* Add it to this level's tensor */
-          gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor);
-        }
-
-        /* Recurse */
-        runner_do_grav_down(r, cp, 0);
-      }
-    }
-
-  } else {
-
-    /* Leaf case */
-
-    /* We can abort early if no interactions via multipole happened */
-    if (!c->grav.multipole->pot.interacted) return;
-
-    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
-
-    /* Cell properties */
-    struct gpart *gparts = c->grav.parts;
-    const int gcount = c->grav.count;
-    const struct grav_tensor *pot = &c->grav.multipole->pot;
-    const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1],
-                           c->grav.multipole->CoM[2]};
-
-    /* Apply accelerations to the particles */
-    for (int i = 0; i < gcount; ++i) {
-
-      /* Get a handle on the gpart */
-      struct gpart *gp = &gparts[i];
-
-      /* Update if active */
-      if (gpart_is_active(gp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (gp->ti_drift != e->ti_current)
-          error("gpart not drifted to current time");
-        if (c->grav.multipole->pot.ti_init != e->ti_current)
-          error("c->field tensor not initialised");
-
-        /* Check that we are not updated an inhibited particle */
-        if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!");
-
-        /* Check that the particle was initialised */
-        if (gp->initialised == 0)
-          error("Adding forces to an un-initialised gpart.");
-#endif
-        /* Apply the kernel */
-        gravity_L2P(pot, CoM, gp);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_dograv_down);
-}
-
-/**
- * @brief Compute the non-truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param cj_cache #gravity_cache contaning the source particles.
- * @param gcount_i The number of particles in the cell i.
- * @param gcount_padded_j The number of particles in the cell j padded to the
- * vector length.
- * @param periodic Is the calculation using periodic BCs ?
- * @param dim The size of the simulation volume.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gparts_j The #gpart in cell j (for debugging checks only).
- * @param gcount_j The number of particles in the cell j (for debugging checks
- * only).
- */
-static INLINE void runner_dopair_grav_pp_full(
-    struct gravity_cache *restrict ci_cache,
-    struct gravity_cache *restrict cj_cache, const int gcount_i,
-    const int gcount_j, const int gcount_padded_j, const int periodic,
-    const float dim[3], const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    /* Skip particle that can use the multipole */
-    if (ci_cache->use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!gpart_is_active(&gparts_i[pid], e))
-      error("Inactive particle went through the cache");
-#endif
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration and potential */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded_j, VEC_SIZE);
-
-    /* Loop over every particle in the other cell. */
-    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
-
-      /* Get info about j */
-      const float x_j = cj_cache->x[pjd];
-      const float y_j = cj_cache->y[pjd];
-      const float z_j = cj_cache->z[pjd];
-      const float mass_j = cj_cache->m[pjd];
-      const float h_j = cj_cache->epsilon[pjd];
-
-      /* Compute the pairwise distance. */
-      float dx = x_j - x_i;
-      float dy = y_j - y_i;
-      float dz = z_j - z_i;
-
-      /* Correct for periodic BCs */
-      if (periodic) {
-        dx = nearestf(dx, dim[0]);
-        dy = nearestf(dy, dim[1]);
-        dz = nearestf(dz, dim[2]);
-      }
-
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts_i[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts_j[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts_i[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
-          mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts_i[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Compute the truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * This function only makes sense in periodic BCs.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param cj_cache #gravity_cache contaning the source particles.
- * @param gcount_i The number of particles in the cell i.
- * @param gcount_padded_j The number of particles in the cell j padded to the
- * vector length.
- * @param dim The size of the simulation volume.
- * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gparts_j The #gpart in cell j (for debugging checks only).
- * @param gcount_j The number of particles in the cell j (for debugging checks
- * only).
- */
-static INLINE void runner_dopair_grav_pp_truncated(
-    struct gravity_cache *restrict ci_cache,
-    struct gravity_cache *restrict cj_cache, const int gcount_i,
-    const int gcount_j, const int gcount_padded_j, const float dim[3],
-    const float r_s_inv, const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic)
-    error("Calling truncated PP function in non-periodic setup.");
-#endif
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    /* Skip particle that can use the multipole */
-    if (ci_cache->use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!gpart_is_active(&gparts_i[pid], e))
-      error("Inactive particle went through the cache");
-#endif
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration and potential */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded_j, VEC_SIZE);
-
-    /* Loop over every particle in the other cell. */
-    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
-
-      /* Get info about j */
-      const float x_j = cj_cache->x[pjd];
-      const float y_j = cj_cache->y[pjd];
-      const float z_j = cj_cache->z[pjd];
-      const float mass_j = cj_cache->m[pjd];
-      const float h_j = cj_cache->epsilon[pjd];
-
-      /* Compute the pairwise distance. */
-      float dx = x_j - x_i;
-      float dy = y_j - y_i;
-      float dz = z_j - z_i;
-
-      /* Correct for periodic BCs */
-      dx = nearestf(dx, dim[0]);
-      dy = nearestf(dy, dim[1]);
-      dz = nearestf(dz, dim[2]);
-
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts_i[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts_j[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts_i[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
-          mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts_i[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
-                                    &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Compute the gravity interactions between all particles
- * of a cell and the multipole of the other cell.
- *
- * The calculation is performedusing the pre-filled
- * #gravity_cache structure. The loop over the i cache should auto-vectorize.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount_padded_i The number of particles in the cell i padded to the
- * vector length.
- * @param CoM_j Position of the #multipole in #cell j.
- * @param multi_j The #multipole in #cell j.
- * @param periodic Is the calculation using periodic BCs ?
- * @param dim The size of the simulation volume.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gcount_i The number of particles in the cell i (for debugging checks
- * only).
- * @param cj The #cell j (for debugging checks only).
- */
-static INLINE void runner_dopair_grav_pm_full(
-    struct gravity_cache *ci_cache, const int gcount_padded_i,
-    const float CoM_j[3], const struct multipole *restrict multi_j,
-    const int periodic, const float dim[3], const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const int gcount_i,
-    const struct cell *restrict cj) {
-
-  /* Make the compiler understand we are in happy vectorization land */
-  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, active, ci_cache->active,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_assume_size(gcount_padded_i, VEC_SIZE);
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_padded_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!active[pid]) continue;
-
-    /* Skip particle that cannot use the multipole */
-    if (!use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
-      error("Active particle went through the cache");
-
-    /* Check that particles have been drifted to the current time */
-    if (gparts_i[pid].ti_drift != e->ti_current)
-      error("gpi not drifted to current time");
-
-    /* Check that we are not updated an inhibited particle */
-    if (gpart_is_inhibited(&gparts_i[pid], e))
-      error("Updating an inhibited particle!");
-
-    /* Check that the particle was initialised */
-    if (gparts_i[pid].initialised == 0)
-      error("Adding forces to an un-initialised gpart.");
-
-    if (pid >= gcount_i) error("Adding forces to padded particle");
-#endif
-
-    const float x_i = x[pid];
-    const float y_i = y[pid];
-    const float z_i = z[pid];
-
-    /* Some powers of the softening length */
-    const float h_i = epsilon[pid];
-    const float h_inv_i = 1.f / h_i;
-
-    /* Distance to the Multipole */
-    float dx = CoM_j[0] - x_i;
-    float dy = CoM_j[1] - y_i;
-    float dz = CoM_j[2] - z_i;
-
-    /* Apply periodic BCs? */
-    if (periodic) {
-      dx = nearestf(dx, dim[0]);
-      dy = nearestf(dy, dim[1]);
-      dz = nearestf(dz, dim[2]);
-    }
-
-    const float r2 = dx * dx + dy * dy + dz * dz;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    const float r_max_j = cj->grav.multipole->r_max;
-    const float r_max2 = r_max_j * r_max_j;
-    const float theta_crit2 = e->gravity_properties->theta_crit2;
-
-    /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */
-    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
-      error(
-          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
-          "%e], rmax=%e r=%e epsilon=%e",
-          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i);
-#endif
-
-    /* Interact! */
-    float f_x, f_y, f_z, pot_ij;
-    runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
-                             &f_z, &pot_ij);
-
-    /* Store it back */
-    a_x[pid] += f_x;
-    a_y[pid] += f_y;
-    a_z[pid] += f_z;
-    pot[pid] += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Update the interaction counter */
-    if (pid < gcount_i)
-      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
-#endif
-  }
-}
-
-/**
- * @brief Compute the gravity interactions between all particles
- * of a cell and the multipole of the other cell.
- *
- * The calculation is performedusing the pre-filled
- * #gravity_cache structure. The loop over the i cache should auto-vectorize.
- *
- * This function only makes sense in periodic BCs.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount_padded_i The number of particles in the cell i padded to the
- * vector length.
- * @param CoM_j Position of the #multipole in #cell j.
- * @param multi_j The #multipole in #cell j.
- * @param dim The size of the simulation volume.
- * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gcount_i The number of particles in the cell i (for debugging checks
- * only).
- * @param cj The #cell j (for debugging checks only).
- */
-static INLINE void runner_dopair_grav_pm_truncated(
-    struct gravity_cache *ci_cache, const int gcount_padded_i,
-    const float CoM_j[3], const struct multipole *restrict multi_j,
-    const float dim[3], const float r_s_inv, const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const int gcount_i,
-    const struct cell *restrict cj) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic)
-    error("Calling truncated PP function in non-periodic setup.");
-#endif
-
-  /* Make the compiler understand we are in happy vectorization land */
-  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, active, ci_cache->active,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_assume_size(gcount_padded_i, VEC_SIZE);
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_padded_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!active[pid]) continue;
-
-    /* Skip particle that cannot use the multipole */
-    if (!use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
-      error("Active particle went through the cache");
-
-    /* Check that particles have been drifted to the current time */
-    if (gparts_i[pid].ti_drift != e->ti_current)
-      error("gpi not drifted to current time");
-
-    /* Check that we are not updated an inhibited particle */
-    if (gpart_is_inhibited(&gparts_i[pid], e))
-      error("Updating an inhibited particle!");
-
-    /* Check that the particle was initialised */
-    if (gparts_i[pid].initialised == 0)
-      error("Adding forces to an un-initialised gpart.");
-
-    if (pid >= gcount_i) error("Adding forces to padded particle");
-#endif
-
-    const float x_i = x[pid];
-    const float y_i = y[pid];
-    const float z_i = z[pid];
-
-    /* Some powers of the softening length */
-    const float h_i = epsilon[pid];
-    const float h_inv_i = 1.f / h_i;
-
-    /* Distance to the Multipole */
-    float dx = CoM_j[0] - x_i;
-    float dy = CoM_j[1] - y_i;
-    float dz = CoM_j[2] - z_i;
-
-    /* Apply periodic BCs */
-    dx = nearestf(dx, dim[0]);
-    dy = nearestf(dy, dim[1]);
-    dz = nearestf(dz, dim[2]);
-
-    const float r2 = dx * dx + dy * dy + dz * dz;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    const float r_max_j = cj->grav.multipole->r_max;
-    const float r_max2 = r_max_j * r_max_j;
-    const float theta_crit2 = e->gravity_properties->theta_crit2;
-
-    /* 0.99 and 1.1 to avoid FP rounding false-positives */
-    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
-      error(
-          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
-          "%e], rmax=%e",
-          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j);
-#endif
-
-    /* Interact! */
-    float f_x, f_y, f_z, pot_ij;
-    runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv,
-                                  multi_j, &f_x, &f_y, &f_z, &pot_ij);
-
-    /* Store it back */
-    a_x[pid] += f_x;
-    a_y[pid] += f_y;
-    a_z[pid] += f_z;
-    pot[pid] += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Update the interaction counter */
-    if (pid < gcount_i)
-      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
-#endif
-  }
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell.
- *
- * This function switches between the full potential and the truncated one
- * depending on needs. It will also use the M2P (multipole) interaction
- * for the subset of particles in either cell for which the distance criterion
- * is valid.
- *
- * This function starts by constructing the require #gravity_cache for both
- * cells and then call the specialised functions doing the actual work on
- * the caches. It then write the data back to the particles.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The other #cell.
- * @param symmetric Are we updating both cells (1) or just ci (0) ?
- * @param allow_mpole Are we allowing the use of P2M interactions ?
- */
-static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci,
-                                         struct cell *cj, const int symmetric,
-                                         const int allow_mpole) {
-
-  /* Recover some useful constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
-                        (float)e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-  const double min_trunc = e->mesh->r_cut_min;
-
-  TIMER_TIC;
-
-  /* Record activity status */
-  const int ci_active =
-      cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID);
-  const int cj_active =
-      cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID);
-
-  /* Anything to do here? */
-  if (!ci_active && !cj_active) return;
-  if (!ci_active && !symmetric) return;
-
-  /* Check that we are not doing something stupid */
-  if (ci->split || cj->split) error("Running P-P on splitable cells");
-
-  /* Let's start by checking things are drifted */
-  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
-  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");
-  if (cj_active && ci->grav.ti_old_multipole != e->ti_current)
-    error("Un-drifted multipole");
-  if (ci_active && cj->grav.ti_old_multipole != e->ti_current)
-    error("Un-drifted multipole");
-
-  /* Caches to play with */
-  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
-
-  /* Shift to apply to the particles in each cell */
-  const double shift_i[3] = {0., 0., 0.};
-  const double shift_j[3] = {0., 0., 0.};
-
-  /* Recover the multipole info and shift the CoM locations */
-  const float rmax_i = ci->grav.multipole->r_max;
-  const float rmax_j = cj->grav.multipole->r_max;
-  const float rmax2_i = rmax_i * rmax_i;
-  const float rmax2_j = rmax_j * rmax_j;
-  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
-  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-  const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]),
-                          (float)(ci->grav.multipole->CoM[1] - shift_i[1]),
-                          (float)(ci->grav.multipole->CoM[2] - shift_i[2])};
-  const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]),
-                          (float)(cj->grav.multipole->CoM[1] - shift_j[1]),
-                          (float)(cj->grav.multipole->CoM[2] - shift_j[2])};
-
-  /* Start by constructing particle caches */
-
-  /* Computed the padded counts */
-  const int gcount_i = ci->grav.count;
-  const int gcount_j = cj->grav.count;
-  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
-  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Check that we fit in cache */
-  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
-    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
-          gcount_j);
-#endif
-
-  /* Fill the caches */
-  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
-                         ci_cache, ci->grav.parts, gcount_i, gcount_padded_i,
-                         shift_i, CoM_j, rmax2_j, ci, e->gravity_properties);
-  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
-                         cj_cache, cj->grav.parts, gcount_j, gcount_padded_j,
-                         shift_j, CoM_i, rmax2_i, cj, e->gravity_properties);
-
-  /* Can we use the Newtonian version or do we need the truncated one ? */
-  if (!periodic) {
-
-    /* Not periodic -> Can always use Newtonian potential */
-
-    /* Let's updated the active cell(s) only */
-    if (ci_active) {
-
-      /* First the P2P */
-      runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
-                                 gcount_padded_j, periodic, dim, e,
-                                 ci->grav.parts, cj->grav.parts);
-
-      /* Then the M2P */
-      if (allow_mpole)
-        runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                   periodic, dim, e, ci->grav.parts, gcount_i,
-                                   cj);
-    }
-    if (cj_active && symmetric) {
-
-      /* First the P2P */
-      runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
-                                 gcount_padded_i, periodic, dim, e,
-                                 cj->grav.parts, ci->grav.parts);
-
-      /* Then the M2P */
-      if (allow_mpole)
-        runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
-                                   periodic, dim, e, cj->grav.parts, gcount_j,
-                                   ci);
-    }
-
-  } else { /* Periodic BC */
-
-    /* Get the relative distance between the CoMs */
-    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
-                          CoM_j[2] - CoM_i[2]};
-    const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-    /* Get the maximal distance between any two particles */
-    const double max_r = sqrt(r2) + rmax_i + rmax_j;
-
-    /* Do we need to use the truncated interactions ? */
-    if (max_r > min_trunc) {
-
-      /* Periodic but far-away cells must use the truncated potential */
-
-      /* Let's updated the active cell(s) only */
-      if (ci_active) {
-
-        /* First the (truncated) P2P */
-        runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j,
-                                        gcount_padded_j, dim, r_s_inv, e,
-                                        ci->grav.parts, cj->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j,
-                                          multi_j, dim, r_s_inv, e,
-                                          ci->grav.parts, gcount_i, cj);
-      }
-      if (cj_active && symmetric) {
-
-        /* First the (truncated) P2P */
-        runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i,
-                                        gcount_padded_i, dim, r_s_inv, e,
-                                        cj->grav.parts, ci->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i,
-                                          multi_i, dim, r_s_inv, e,
-                                          cj->grav.parts, gcount_j, ci);
-      }
-
-    } else {
-
-      /* Periodic but close-by cells can use the full Newtonian potential */
-
-      /* Let's updated the active cell(s) only */
-      if (ci_active) {
-
-        /* First the (Newtonian) P2P */
-        runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
-                                   gcount_padded_j, periodic, dim, e,
-                                   ci->grav.parts, cj->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                     periodic, dim, e, ci->grav.parts, gcount_i,
-                                     cj);
-      }
-      if (cj_active && symmetric) {
-
-        /* First the (Newtonian) P2P */
-        runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
-                                   gcount_padded_i, periodic, dim, e,
-                                   cj->grav.parts, ci->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
-                                     periodic, dim, e, cj->grav.parts, gcount_j,
-                                     ci);
-      }
-    }
-  }
-
-  /* Write back to the particles */
-  if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
-  if (cj_active && symmetric)
-    gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j);
-
-  TIMER_TOC(timer_dopair_grav_pp);
-}
-
-/**
- * @brief Compute the non-truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount The number of particles in the cell.
- * @param gcount_padded The number of particles in the cell padded to the
- * vector length.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts The #gpart in the cell (for debugging checks only).
- */
-static INLINE void runner_doself_grav_pp_full(
-    struct gravity_cache *restrict ci_cache, const int gcount,
-    const int gcount_padded, const struct engine *e, struct gpart *gparts) {
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded, VEC_SIZE);
-
-    /* Loop over every other particle in the cell. */
-    for (int pjd = 0; pjd < gcount_padded; pjd++) {
-
-      /* No self interaction */
-      if (pid == pjd) continue;
-
-      /* Get info about j */
-      const float x_j = ci_cache->x[pjd];
-      const float y_j = ci_cache->y[pjd];
-      const float z_j = ci_cache->z[pjd];
-      const float mass_j = ci_cache->m[pjd];
-      const float h_j = ci_cache->epsilon[pjd];
-
-      /* Compute the pairwise (square) distance. */
-      /* Note: no need for periodic wrapping inside a cell */
-      const float dx = x_j - x_i;
-      const float dy = y_j - y_i;
-      const float dz = z_j - z_i;
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Compute the truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * This function only makes sense in periodic BCs.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount The number of particles in the cell.
- * @param gcount_padded The number of particles in the cell padded to the
- * vector length.
- * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts The #gpart in the cell (for debugging checks only).
- */
-static INLINE void runner_doself_grav_pp_truncated(
-    struct gravity_cache *restrict ci_cache, const int gcount,
-    const int gcount_padded, const float r_s_inv, const struct engine *e,
-    struct gpart *gparts) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic)
-    error("Calling truncated PP function in non-periodic setup.");
-#endif
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration and potential */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded, VEC_SIZE);
-
-    /* Loop over every other particle in the cell. */
-    for (int pjd = 0; pjd < gcount_padded; pjd++) {
-
-      /* No self interaction */
-      if (pid == pjd) continue;
-
-      /* Get info about j */
-      const float x_j = ci_cache->x[pjd];
-      const float y_j = ci_cache->y[pjd];
-      const float z_j = ci_cache->z[pjd];
-      const float mass_j = ci_cache->m[pjd];
-      const float h_j = ci_cache->epsilon[pjd];
-
-      /* Compute the pairwise (square) distance. */
-      /* Note: no need for periodic wrapping inside a cell */
-      const float dx = x_j - x_i;
-      const float dy = y_j - y_i;
-      const float dz = z_j - z_i;
-
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
-                                    &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * other ones.
- *
- * This function switches between the full potential and the truncated one
- * depending on needs.
- *
- * This function starts by constructing the require #gravity_cache for the
- * cell and then call the specialised functions doing the actual work on
- * the cache. It then write the data back to the particles.
- *
- * @param r The #runner.
- * @param c The #cell.
- */
-static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) {
-
-  /* Recover some useful constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const float r_s_inv = e->mesh->r_s_inv;
-  const double min_trunc = e->mesh->r_cut_min;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
-#endif
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Check that we are not doing something stupid */
-  if (c->split) error("Running P-P on a splitable cell");
-
-  /* Do we need to start by drifting things ? */
-  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
-
-  /* Start by constructing a cache for the particles */
-  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-
-  /* Shift to apply to the particles in the cell */
-  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
-                         c->loc[1] + 0.5 * c->width[1],
-                         c->loc[2] + 0.5 * c->width[2]};
-
-  /* Computed the padded counts */
-  const int gcount = c->grav.count;
-  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Check that we fit in cache */
-  if (gcount > ci_cache->count)
-    error("Not enough space in the cache! gcount=%d", gcount);
-#endif
-
-  /* Fill the cache */
-  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts,
-                                  gcount, gcount_padded, loc, c,
-                                  e->gravity_properties);
-
-  /* Can we use the Newtonian version or do we need the truncated one ? */
-  if (!periodic) {
-
-    /* Not periodic -> Can always use Newtonian potential */
-    runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
-                               c->grav.parts);
-
-  } else {
-
-    /* Get the maximal distance between any two particles */
-    const double max_r = 2. * c->grav.multipole->r_max;
-
-    /* Do we need to use the truncated interactions ? */
-    if (max_r > min_trunc) {
-
-      /* Periodic but far-away cells must use the truncated potential */
-      runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv,
-                                      e, c->grav.parts);
-
-    } else {
-
-      /* Periodic but close-by cells can use the full Newtonian potential */
-      runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
-                                 c->grav.parts);
-    }
-  }
-
-  /* Write back to the particles */
-  gravity_cache_write_back(ci_cache, c->grav.parts, gcount);
-
-  TIMER_TOC(timer_doself_grav_pp);
-}
-
-/**
- * @brief Computes the interaction of the field tensor and multipole
- * of two cells symmetrically.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
-                                                   struct cell *restrict ci,
-                                                   struct cell *restrict cj) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const struct gravity_props *props = e->gravity_properties;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) ||
-      (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank))
-    error("Invalid state in symmetric M-M calculation!");
-
-  /* Short-cut to the multipole */
-  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
-  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci == cj) error("Interacting a cell with itself using M2L");
-
-  if (multi_i->num_gpart == 0)
-    error("Multipole i does not seem to have been set.");
-
-  if (multi_j->num_gpart == 0)
-    error("Multipole j does not seem to have been set.");
-
-  if (ci->grav.multipole->pot.ti_init != e->ti_current)
-    error("ci->grav tensor not initialised.");
-
-  if (ci->grav.multipole->pot.ti_init != e->ti_current)
-    error("cj->grav tensor not initialised.");
-
-  if (ci->grav.ti_old_multipole != e->ti_current)
-    error(
-        "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d "
-        "cj->nodeID=%d e->ti_current=%lld",
-        ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current);
-
-  if (cj->grav.ti_old_multipole != e->ti_current)
-    error(
-        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
-        "ci->nodeID=%d e->ti_current=%lld",
-        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
-#endif
-
-  /* Let's interact at this level */
-  gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot,
-                        multi_i, multi_j, ci->grav.multipole->CoM,
-                        cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
-
-  TIMER_TOC(timer_dopair_grav_mm);
-}
-
-/**
- * @brief Computes the interaction of the field tensor in a cell with the
- * multipole of another cell.
- *
- * @param r The #runner.
- * @param ci The #cell with field tensor to interact.
- * @param cj The #cell with the multipole.
- */
-static INLINE void runner_dopair_grav_mm_nonsym(
-    struct runner *r, struct cell *restrict ci,
-    const struct cell *restrict cj) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const struct gravity_props *props = e->gravity_properties;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return;
-
-  /* Short-cut to the multipole */
-  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci == cj) error("Interacting a cell with itself using M2L");
-
-  if (multi_j->num_gpart == 0)
-    error("Multipole does not seem to have been set.");
-
-  if (ci->grav.multipole->pot.ti_init != e->ti_current)
-    error("ci->grav tensor not initialised.");
-
-  if (cj->grav.ti_old_multipole != e->ti_current)
-    error(
-        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
-        "ci->nodeID=%d e->ti_current=%lld",
-        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
-#endif
-
-  /* Let's interact at this level */
-  gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM,
-                     cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
-
-  TIMER_TOC(timer_dopair_grav_mm);
-}
-
-/**
- * @brief Call the M-M calculation on two cells if active.
- *
- * @param r The #runner object.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm(struct runner *r,
-                                         struct cell *restrict ci,
-                                         struct cell *restrict cj) {
-
-  const struct engine *e = r->e;
-
-  /* What do we need to do? */
-  const int do_i =
-      cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID);
-  const int do_j =
-      cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID);
-
-  /* Do we need drifting first? */
-  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
-  if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e);
-
-  /* Interact! */
-  if (do_i && do_j)
-    runner_dopair_grav_mm_symmetric(r, ci, cj);
-  else if (do_i)
-    runner_dopair_grav_mm_nonsym(r, ci, cj);
-  else if (do_j)
-    runner_dopair_grav_mm_nonsym(r, cj, ci);
-}
-
-/**
- * @brief Computes all the M-M interactions between all the well-separated (at
- * rebuild) pairs of progenies of the two cells.
- *
- * @param r The #runner thread.
- * @param flags The task flag containing the list of well-separated pairs as a
- * bit-field.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm_progenies(struct runner *r,
-                                                   const long long flags,
-                                                   struct cell *restrict ci,
-                                                   struct cell *restrict cj) {
-
-  /* Loop over all pairs of progenies */
-  for (int i = 0; i < 8; i++) {
-    if (ci->progeny[i] != NULL) {
-      for (int j = 0; j < 8; j++) {
-        if (cj->progeny[j] != NULL) {
-
-          struct cell *cpi = ci->progeny[i];
-          struct cell *cpj = cj->progeny[j];
-
-          const int flag = i * 8 + j;
-
-          /* Did we agree to use an M-M interaction here at the last rebuild? */
-          if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj);
-        }
-      }
-    }
-  }
-}
-
-static INLINE void runner_dopair_recursive_grav_pm(struct runner *r,
-                                                   struct cell *ci,
-                                                   const struct cell *cj) {
-  /* Some constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
-                        (float)e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-
-  /* Anything to do here? */
-  if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Early abort? */
-  if (ci->grav.count == 0 || cj->grav.count == 0)
-    error("Doing pair gravity on an empty cell !");
-
-  /* Sanity check */
-  if (ci == cj) error("Pair interaction between a cell and itself.");
-
-  if (cj->grav.ti_old_multipole != e->ti_current)
-    error("cj->grav.multipole not drifted.");
-#endif
-
-  /* Can we recurse further? */
-  if (ci->split) {
-
-    /* Loop over ci's children */
-    for (int k = 0; k < 8; k++) {
-      if (ci->progeny[k] != NULL)
-        runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj);
-    }
-
-    /* Ok, let's do the interaction here */
-  } else {
-
-    /* Start by constructing particle caches */
-
-    /* Cache to play with */
-    struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-
-    /* Computed the padded counts */
-    const int gcount_i = ci->grav.count;
-    const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Check that we fit in cache */
-    if (gcount_i > ci_cache->count)
-      error("Not enough space in the cache! gcount_i=%d", gcount_i);
-#endif
-
-    /* Recover the multipole info and the CoM locations */
-    const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-    const float r_max = cj->grav.multipole->r_max;
-    const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]),
-                            (float)(cj->grav.multipole->CoM[1]),
-                            (float)(cj->grav.multipole->CoM[2])};
-
-    /* Fill the cache */
-    gravity_cache_populate_all_mpole(
-        e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i,
-        gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties);
-
-    /* Can we use the Newtonian version or do we need the truncated one ? */
-    if (!periodic) {
-
-      runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                 periodic, dim, e, ci->grav.parts, gcount_i,
-                                 cj);
-
-    } else {
-
-      runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                      dim, r_s_inv, e, ci->grav.parts, gcount_i,
-                                      cj);
-    }
-
-    /* Write back to the particles */
-    gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
-  }
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell.
- *
- * This function will try to recurse as far down the tree as possible and only
- * default to direct summation if there is no better option.
- *
- * If using periodic BCs, we will abort the recursion if th distance between the
- * cells is larger than the set threshold.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The other #cell.
- * @param gettimer Are we timing this ?
- */
-static INLINE void runner_dopair_recursive_grav(struct runner *r,
-                                                struct cell *ci,
-                                                struct cell *cj, int gettimer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const int nodeID = e->nodeID;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
-  const double max_distance = e->mesh->r_cut_max;
-
-  /* Anything to do here? */
-  if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) ||
-        (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID)))
-    return;
-
-#ifdef SWIFT_DEBUG_CHECKS
-
-  const int gcount_i = ci->grav.count;
-  const int gcount_j = cj->grav.count;
-
-  /* Early abort? */
-  if (gcount_i == 0 || gcount_j == 0)
-    error("Doing pair gravity on an empty cell !");
-
-  /* Sanity check */
-  if (ci == cj) error("Pair interaction between a cell and itself.");
-
-  if (cell_is_active_gravity(ci, e) &&
-      ci->grav.ti_old_multipole != e->ti_current)
-    error("ci->grav.multipole not drifted.");
-  if (cell_is_active_gravity(cj, e) &&
-      cj->grav.ti_old_multipole != e->ti_current)
-    error("cj->grav.multipole not drifted.");
-#endif
-
-  TIMER_TIC;
-
-  /* Recover the multipole information */
-  struct gravity_tensors *const multi_i = ci->grav.multipole;
-  struct gravity_tensors *const multi_j = cj->grav.multipole;
-
-  /* Get the distance between the CoMs */
-  double dx = multi_i->CoM[0] - multi_j->CoM[0];
-  double dy = multi_i->CoM[1] - multi_j->CoM[1];
-  double dz = multi_i->CoM[2] - multi_j->CoM[2];
-
-  /* Apply BC */
-  if (periodic) {
-    dx = nearest(dx, dim[0]);
-    dy = nearest(dy, dim[1]);
-    dz = nearest(dz, dim[2]);
-  }
-  const double r2 = dx * dx + dy * dy + dz * dz;
-
-  /* Minimal distance between any 2 particles in the two cells */
-  const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max);
-
-  /* Are we beyond the distance where the truncated forces are 0? */
-  if (periodic && r_lr_check > max_distance) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Need to account for the interactions we missed */
-    if (cell_is_active_gravity(ci, e))
-      multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
-    if (cell_is_active_gravity(cj, e))
-      multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
-#endif
-    return;
-  }
-
-  /* OK, we actually need to compute this pair. Let's find the cheapest
-   * option... */
-
-  /* Can we use M-M interactions ? */
-  if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2,
-                         multi_i->m_pole.max_softening,
-                         multi_j->m_pole.max_softening)) {
-
-    /* Go M-M */
-    runner_dopair_grav_mm(r, ci, cj);
-
-  } else if (!ci->split && !cj->split) {
-
-    /* We have two leaves. Go P-P. */
-    runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1);
-
-  } else {
-
-    /* Alright, we'll have to split and recurse. */
-    /* We know at least one of ci and cj is splittable */
-
-    const double ri_max = multi_i->r_max;
-    const double rj_max = multi_j->r_max;
-
-    /* Split the larger of the two cells and start over again */
-    if (ri_max > rj_max) {
-
-      /* Can we actually split that interaction ? */
-      if (ci->split) {
-
-        /* Loop over ci's children */
-        for (int k = 0; k < 8; k++) {
-          if (ci->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
-        }
-
-      } else {
-        /* cj is split */
-
-        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
-
-        /* Loop over cj's children */
-        for (int k = 0; k < 8; k++) {
-          if (cj->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
-        }
-      }
-    } else {
-
-      /* Can we actually split that interaction ? */
-      if (cj->split) {
-
-        /* Loop over cj's children */
-        for (int k = 0; k < 8; k++) {
-          if (cj->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
-        }
-
-      } else {
-        /* ci is split */
-
-        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
-
-        /* Loop over ci's children */
-        for (int k = 0; k < 8; k++) {
-          if (ci->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
-        }
-      }
-    }
-  }
-
-  if (gettimer) TIMER_TOC(timer_dosub_pair_grav);
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell.
- *
- * This function will try to recurse as far down the tree as possible and only
- * default to direct summation if there is no better option.
- *
- * @param r The #runner.
- * @param c The first #cell.
- * @param gettimer Are we timing this ?
- */
-static INLINE void runner_doself_recursive_grav(struct runner *r,
-                                                struct cell *c, int gettimer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Early abort? */
-  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
-#endif
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* If the cell is split, interact each progeny with itself, and with
-     each of its siblings. */
-  if (c->split) {
-
-    for (int j = 0; j < 8; j++) {
-      if (c->progeny[j] != NULL) {
-
-        runner_doself_recursive_grav(r, c->progeny[j], 0);
-
-        for (int k = j + 1; k < 8; k++) {
-          if (c->progeny[k] != NULL) {
-
-            runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0);
-          }
-        }
-      }
-    }
-  }
-
-  /* If the cell is not split, then just go for it... */
-  else {
-
-    runner_doself_grav_pp(r, c);
-  }
-
-  if (gettimer) TIMER_TOC(timer_dosub_self_grav);
-}
-
-/**
- * @brief Performs all M-M interactions between a given top-level cell and all
- * the other top-levels that are far enough.
- *
- * @param r The thread #runner.
- * @param ci The #cell of interest.
- * @param timer Are we timing this ?
- */
-static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
-                                             int timer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
-  const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max;
-
-  TIMER_TIC;
-
-  /* Recover the list of top-level cells */
-  struct cell *cells = e->s->cells_top;
-  int *cells_with_particles = e->s->cells_with_particles_top;
-  const int nr_cells_with_particles = e->s->nr_cells_with_particles;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(ci, e)) return;
-
-  if (ci->nodeID != engine_rank)
-    error("Non-local cell in long-range gravity task!");
-
-  /* Check multipole has been drifted */
-  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
-
-  /* Get this cell's multipole information */
-  struct gravity_tensors *const multi_i = ci->grav.multipole;
-
-  /* Find this cell's top-level (great-)parent */
-  struct cell *top = ci;
-  while (top->parent != NULL) top = top->parent;
-
-  /* Recover the top-level multipole (for distance checks) */
-  struct gravity_tensors *const multi_top = top->grav.multipole;
-  const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0],
-                                     multi_top->CoM_rebuild[1],
-                                     multi_top->CoM_rebuild[2]};
-
-  /* Loop over all the top-level cells and go for a M-M interaction if
-   * well-separated */
-  for (int n = 0; n < nr_cells_with_particles; ++n) {
-
-    /* Handle on the top-level cell and it's gravity business*/
-    const struct cell *cj = &cells[cells_with_particles[n]];
-    const struct gravity_tensors *const multi_j = cj->grav.multipole;
-
-    /* Avoid self contributions */
-    if (top == cj) continue;
-
-    /* Skip empty cells */
-    if (multi_j->m_pole.M_000 == 0.f) continue;
-
-    /* Can we escape early in the periodic BC case? */
-    if (periodic) {
-
-      /* Minimal distance between any pair of particles */
-      const double min_radius2 =
-          cell_min_dist2_same_size(top, cj, periodic, dim);
-
-      /* Are we beyond the distance where the truncated forces are 0 ?*/
-      if (min_radius2 > max_distance2) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Need to account for the interactions we missed */
-        multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
-#endif
-
-        /* Record that this multipole received a contribution */
-        multi_i->pot.interacted = 1;
+struct runner;
+struct cell;
 
-        /* We are done here. */
-        continue;
-      }
-    }
+void runner_do_grav_down(struct runner *r, struct cell *c, int timer);
 
-    /* Get the distance between the CoMs at the last rebuild*/
-    double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0];
-    double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1];
-    double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2];
+void runner_doself_recursive_grav(struct runner *r, struct cell *c,
+                                  int gettimer);
 
-    /* Apply BC */
-    if (periodic) {
-      dx_r = nearest(dx_r, dim[0]);
-      dy_r = nearest(dy_r, dim[1]);
-      dz_r = nearest(dz_r, dim[2]);
-    }
-    const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r;
+void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
+                                  struct cell *cj, int gettimer);
 
-    /* Are we in charge of this cell pair? */
-    if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild,
-                           theta_crit2, r2_rebuild,
-                           multi_top->m_pole.max_softening,
-                           multi_j->m_pole.max_softening)) {
+void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags,
+                                     struct cell *restrict ci,
+                                     struct cell *restrict cj);
 
-      /* Call the PM interaction fucntion on the active sub-cells of ci */
-      runner_dopair_grav_mm_nonsym(r, ci, cj);
-      // runner_dopair_recursive_grav_pm(r, ci, cj);
+void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer);
 
-      /* Record that this multipole received a contribution */
-      multi_i->pot.interacted = 1;
+/* Internal functions (for unit tests and debugging) */
 
-    } /* We are in charge of this pair */
-  }   /* Loop over top-level cells */
+void runner_doself_grav_pp(struct runner *r, struct cell *c);
 
-  if (timer) TIMER_TOC(timer_dograv_long_range);
-}
+void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
+                           const int symmetric, const int allow_mpole);
 
 #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */
diff --git a/src/runner_doiact_hydro.c b/src/runner_doiact_hydro.c
new file mode 100644
index 0000000000000000000000000000000000000000..480ea59f0a536aa340b7e4d8f838bef3a0cca072
--- /dev/null
+++ b/src/runner_doiact_hydro.c
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "pressure_floor_iact.h"
+#include "runner.h"
+#include "runner_doiact_hydro_vec.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/* Import the density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the gradient loop functions (if required). */
+#ifdef EXTRA_HYDRO_LOOP
+#define FUNCTION gradient
+#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+#endif
+
+/* Import the force loop functions. */
+#define FUNCTION force
+#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the limiter loop functions. */
+#define FUNCTION limiter
+#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
diff --git a/src/runner_doiact_hydro.h b/src/runner_doiact_hydro.h
new file mode 100644
index 0000000000000000000000000000000000000000..1fd54c1037e2d0b9c7a671311cfee4720ebe8d84
--- /dev/null
+++ b/src/runner_doiact_hydro.h
@@ -0,0 +1,151 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Before including this file, define FUNCTION, which is the
+   name of the interaction function. This creates the interaction functions
+   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
+   and runner_dosub_FUNCTION calling the pairwise interaction function
+   runner_iact_FUNCTION. */
+
+#define PASTE(x, y) x##_##y
+
+#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
+#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)
+
+#define _DOPAIR1(f) PASTE(runner_dopair1, f)
+#define DOPAIR1 _DOPAIR1(FUNCTION)
+
+#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
+#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)
+
+#define _DOPAIR2(f) PASTE(runner_dopair2, f)
+#define DOPAIR2 _DOPAIR2(FUNCTION)
+
+#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
+#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
+
+#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f)
+#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION)
+
+#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
+#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)
+
+#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
+#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)
+
+#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
+#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)
+
+#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
+#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
+
+#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f)
+#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION)
+
+#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
+#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
+
+#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f)
+#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION)
+
+#define _DOSELF1(f) PASTE(runner_doself1, f)
+#define DOSELF1 _DOSELF1(FUNCTION)
+
+#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f)
+#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION)
+
+#define _DOSELF2(f) PASTE(runner_doself2, f)
+#define DOSELF2 _DOSELF2(FUNCTION)
+
+#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
+#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
+
+#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f)
+#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION)
+
+#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
+#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)
+
+#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
+#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)
+
+#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
+#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
+
+#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
+#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
+
+#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
+#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
+
+#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
+#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
+
+#define _IACT(f) PASTE(runner_iact, f)
+#define IACT _IACT(FUNCTION)
+
+#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
+#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
+
+#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
+#define IACT_VEC _IACT_VEC(FUNCTION)
+
+#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
+#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
+
+#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
+#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
+
+#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
+#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)
+
+#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
+#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
+
+#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
+#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
+
+#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
+#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
+
+void DOSELF1_BRANCH(struct runner *r, struct cell *c);
+void DOSELF2_BRANCH(struct runner *r, struct cell *c);
+
+void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj);
+void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj);
+
+void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer);
+void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer);
+
+void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj,
+                 int gettimer);
+void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj,
+                 int gettimer);
+
+void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci,
+                          struct part *restrict parts, int *restrict ind,
+                          int count);
+
+void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci,
+                          struct part *restrict parts_i, int *restrict ind,
+                          int count, struct cell *restrict cj);
+
+void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
+                  int *ind, int count, struct cell *cj, int gettimer);
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_hydro_vec.c
similarity index 99%
rename from src/runner_doiact_vec.c
rename to src/runner_doiact_hydro_vec.c
index 68f34b0d3b8fc9c79097522f8a1618f86957612e..59401e4050dcb4481d1c56aa8857106558a06880 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_hydro_vec.c
@@ -21,7 +21,7 @@
 #include "../config.h"
 
 /* This object's header. */
-#include "runner_doiact_vec.h"
+#include "runner_doiact_hydro_vec.h"
 
 #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
 
diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_hydro_vec.h
similarity index 100%
rename from src/runner_doiact_vec.h
rename to src/runner_doiact_hydro_vec.h
diff --git a/src/runner_doiact_stars.c b/src/runner_doiact_stars.c
new file mode 100644
index 0000000000000000000000000000000000000000..1e1267df5195f727a19252b6ee654629e23149b6
--- /dev/null
+++ b/src/runner_doiact_stars.c
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "feedback.h"
+#include "runner.h"
+#include "space_getsid.h"
+#include "stars.h"
+#include "timers.h"
+
+/* Import the stars density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_functions_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the stars feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_functions_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h
index 7e9780def83bbdbab83a431a757a52f3ba51d2e4..2d41d5a0bd1b1003039e1795eec205889b46baf6 100644
--- a/src/runner_doiact_stars.h
+++ b/src/runner_doiact_stars.h
@@ -86,1307 +86,21 @@
 #define _IACT_STARS(f) PASTE(runner_iact_nonsym_stars, f)
 #define IACT_STARS _IACT_STARS(FUNCTION)
 
-/**
- * @brief Calculate the number density of #part around the #spart
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (c->hydro.count == 0 || c->stars.count == 0) return;
-  if (!cell_is_active_stars(c, e)) return;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int scount = c->stars.count;
-  const int count = c->hydro.count;
-  struct spart *restrict sparts = c->stars.parts;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-
-  /* Loop over the sparts in ci. */
-  for (int sid = 0; sid < scount; sid++) {
-
-    /* Get a hold of the ith spart in ci. */
-    struct spart *restrict si = &sparts[sid];
-
-    /* Skip inactive particles */
-    if (!spart_is_active(si, e)) continue;
-
-    /* Skip inactive particles */
-    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
-
-    const float hi = si->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float six[3] = {(float)(si->x[0] - c->loc[0]),
-                          (float)(si->x[1] - c->loc[1]),
-                          (float)(si->x[2] - c->loc[2])};
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts[pjd];
-      struct xpart *restrict xpj = &xparts[pjd];
-      const float hj = pj->h;
-
-      /* Early abort? */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
-                            (float)(pj->x[1] - c->loc[1]),
-                            (float)(pj->x[2] - c->loc[2])};
-      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                            ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                          ti_current);
-#endif
-      }
-    } /* loop over the parts in ci. */
-  }   /* loop over the sparts in ci. */
-
-  TIMER_TOC(TIMER_DOSELF_STARS);
-}
-
-/**
- * @brief Calculate the number density of cj #part around the ci #spart
- *
- * @param r runner task
- * @param ci The first #cell
- * @param cj The second #cell
- */
-void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
-                                 struct cell *restrict cj) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#else
-  if (cj->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-#endif
-
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (cj->hydro.count == 0 || ci->stars.count == 0) return;
-  if (!cell_is_active_stars(ci, e)) return;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int scount_i = ci->stars.count;
-  const int count_j = cj->hydro.count;
-  struct spart *restrict sparts_i = ci->stars.parts;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  /* Loop over the sparts in ci. */
-  for (int sid = 0; sid < scount_i; sid++) {
-
-    /* Get a hold of the ith spart in ci. */
-    struct spart *restrict si = &sparts_i[sid];
-
-    /* Skip inactive particles */
-    if (!spart_is_active(si, e)) continue;
-
-    /* Skip inactive particles */
-    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
-
-    const float hi = si->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])),
-                          (float)(si->x[1] - (cj->loc[1] + shift[1])),
-                          (float)(si->x[2] - (cj->loc[2] + shift[2]))};
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-      const float hj = pj->h;
-
-      /* Skip inhibited particles. */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
-                            (float)(pj->x[1] - cj->loc[1]),
-                            (float)(pj->x[2] - cj->loc[2])};
-      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                            ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                          ti_current);
-#endif
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
-
-/**
- * @brief Compute the interactions between a cell pair.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param sid The direction of the pair.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
-                        const int sid, const double *shift) {
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  /* Get the cutoff shift. */
-  double rshift = 0.0;
-  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) &&
-                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
-  const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) &&
-                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
-#else
-  /* here we are updating the hydro -> switch ci, cj for local */
-  const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) &&
-                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
-  const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) &&
-                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
-#endif
-
-  if (do_ci_stars) {
-
-    /* Pick-out the sorted lists. */
-    const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
-    const struct sort_entry *restrict sort_i = ci->stars.sort[sid];
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Some constants used to checks that the parts are in the right frame */
-    const float shift_threshold_x =
-        2. * ci->width[0] +
-        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
-    const float shift_threshold_y =
-        2. * ci->width[1] +
-        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
-    const float shift_threshold_z =
-        2. * ci->width[2] +
-        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
-#endif /* SWIFT_DEBUG_CHECKS */
-
-    /* Get some other useful values. */
-    const double hi_max = ci->stars.h_max * kernel_gamma - rshift;
-    const int count_i = ci->stars.count;
-    const int count_j = cj->hydro.count;
-    struct spart *restrict sparts_i = ci->stars.parts;
-    struct part *restrict parts_j = cj->hydro.parts;
-    struct xpart *restrict xparts_j = cj->hydro.xparts;
-    const double dj_min = sort_j[0].d;
-    const float dx_max_rshift =
-        (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift;
-    const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort);
-
-    /* Loop over the sparts in ci. */
-    for (int pid = count_i - 1;
-         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
-
-      /* Get a hold of the ith part in ci. */
-      struct spart *restrict spi = &sparts_i[sort_i[pid].i];
-      const float hi = spi->h;
-
-      /* Skip inactive particles */
-      if (!spart_is_active(spi, e)) continue;
-
-      /* Skip inactive particles */
-      if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue;
-
-      /* Compute distance from the other cell. */
-      const double px[3] = {spi->x[0], spi->x[1], spi->x[2]};
-      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
-                   px[2] * runner_shift[sid][2];
-
-      /* Is there anything we need to interact with ? */
-      const double di = dist + hi * kernel_gamma + dx_max_rshift;
-      if (di < dj_min) continue;
-
-      /* Get some additional information about pi */
-      const float hig2 = hi * hi * kernel_gamma2;
-      const float pix = spi->x[0] - (cj->loc[0] + shift[0]);
-      const float piy = spi->x[1] - (cj->loc[1] + shift[1]);
-      const float piz = spi->x[2] - (cj->loc[2] + shift[2]);
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
-
-        /* Recover pj */
-        struct part *pj = &parts_j[sort_j[pjd].i];
-        struct xpart *xpj = &xparts_j[sort_j[pjd].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        const float hj = pj->h;
-        const float pjx = pj->x[0] - cj->loc[0];
-        const float pjy = pj->x[1] - cj->loc[1];
-        const float pjz = pj->x[2] - cj->loc[2];
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles are in the correct frame after the shifts */
-        if (pix > shift_threshold_x || pix < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
-              pix, ci->width[0]);
-        if (piy > shift_threshold_y || piy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
-              piy, ci->width[1]);
-        if (piz > shift_threshold_z || piz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
-              piz, ci->width[2]);
-        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
-              pjx, ci->width[0]);
-        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
-              pjy, ci->width[1]);
-        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
-              pjz, ci->width[2]);
-
-        /* Check that particles have been drifted to the current time */
-        if (spi->ti_drift != e->ti_current)
-          error("Particle spi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hig2) {
-          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the parts in ci. */
-  }     /* do_ci_stars */
-
-  if (do_cj_stars) {
-    /* Pick-out the sorted lists. */
-    const struct sort_entry *restrict sort_i = ci->hydro.sort[sid];
-    const struct sort_entry *restrict sort_j = cj->stars.sort[sid];
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Some constants used to checks that the parts are in the right frame */
-    const float shift_threshold_x =
-        2. * ci->width[0] +
-        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
-    const float shift_threshold_y =
-        2. * ci->width[1] +
-        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
-    const float shift_threshold_z =
-        2. * ci->width[2] +
-        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
-#endif /* SWIFT_DEBUG_CHECKS */
-
-    /* Get some other useful values. */
-    const double hj_max = cj->hydro.h_max * kernel_gamma;
-    const int count_i = ci->hydro.count;
-    const int count_j = cj->stars.count;
-    struct part *restrict parts_i = ci->hydro.parts;
-    struct xpart *restrict xparts_i = ci->hydro.xparts;
-    struct spart *restrict sparts_j = cj->stars.parts;
-    const double di_max = sort_i[count_i - 1].d - rshift;
-    const float dx_max_rshift =
-        (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift;
-    const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort);
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
-         pjd++) {
-
-      /* Get a hold of the jth part in cj. */
-      struct spart *spj = &sparts_j[sort_j[pjd].i];
-      const float hj = spj->h;
-
-      /* Skip inactive particles */
-      if (!spart_is_active(spj, e)) continue;
-
-      /* Skip inactive particles */
-      if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue;
-
-      /* Compute distance from the other cell. */
-      const double px[3] = {spj->x[0], spj->x[1], spj->x[2]};
-      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
-                   px[2] * runner_shift[sid][2];
-
-      /* Is there anything we need to interact with ? */
-      const double dj = dist - hj * kernel_gamma - dx_max_rshift;
-      if (dj - rshift > di_max) continue;
-
-      /* Get some additional information about pj */
-      const float hjg2 = hj * hj * kernel_gamma2;
-      const float pjx = spj->x[0] - cj->loc[0];
-      const float pjy = spj->x[1] - cj->loc[1];
-      const float pjz = spj->x[2] - cj->loc[2];
-
-      /* Loop over the parts in ci. */
-      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
-
-        /* Recover pi */
-        struct part *pi = &parts_i[sort_i[pid].i];
-        struct xpart *xpi = &xparts_i[sort_i[pid].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pi, e)) continue;
-
-        const float hi = pi->h;
-        const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
-        const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
-        const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles are in the correct frame after the shifts */
-        if (pix > shift_threshold_x || pix < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
-              pix, ci->width[0]);
-        if (piy > shift_threshold_y || piy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
-              piy, ci->width[1]);
-        if (piz > shift_threshold_z || piz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
-              piz, ci->width[2]);
-        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
-              pjx, ci->width[0]);
-        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
-              pjy, ci->width[1]);
-        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
-              pjz, ci->width[2]);
-
-        /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current)
-          error("Particle pi not drifted to current time");
-        if (spj->ti_drift != e->ti_current)
-          error("Particle spj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hjg2) {
-
-          IACT_STARS(r2, dx, hj, hi, spj, pi, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in ci. */
-    }   /* loop over the parts in cj. */
-  }     /* Cell cj is active */
-
-  TIMER_TOC(TIMER_DOPAIR_STARS);
-}
-
-void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
-                         struct cell *restrict cj, int timer) {
-
-  TIMER_TIC;
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_stars = ci->nodeID == r->e->nodeID;
-  const int do_cj_stars = cj->nodeID == r->e->nodeID;
-#else
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_stars = cj->nodeID == r->e->nodeID;
-  const int do_cj_stars = ci->nodeID == r->e->nodeID;
-#endif
-  if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0)
-    DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj);
-  if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0)
-    DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci);
-
-  TIMER_TOC(TIMER_DOPAIR_STARS);
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * Version using a brute-force algorithm.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts_i The #part to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- * @param cj The second #cell.
- * @param sid The direction of the pair.
- * @param flipped Flag to check whether the cells have been flipped or not.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
-                          struct spart *restrict sparts_i, int *restrict ind,
-                          int scount, struct cell *restrict cj, const int sid,
-                          const int flipped, const double *shift) {
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int count_j = cj->hydro.count;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Early abort? */
-  if (count_j == 0) return;
-
-  /* Pick-out the sorted lists. */
-  const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
-  const float dxj = cj->hydro.dx_max_sort;
-
-  /* Sparts are on the left? */
-  if (!flipped) {
-
-    /* Loop over the sparts_i. */
-    for (int pid = 0; pid < scount; pid++) {
-
-      /* Get a hold of the ith spart in ci. */
-      struct spart *restrict spi = &sparts_i[ind[pid]];
-      const double pix = spi->x[0] - (shift[0]);
-      const double piy = spi->x[1] - (shift[1]);
-      const double piz = spi->x[2] - (shift[2]);
-      const float hi = spi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
-                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[sort_j[pjd].i];
-        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        const double pjx = pj->x[0];
-        const double pjy = pj->x[1];
-        const double pjz = pj->x[2];
-        const float hj = pj->h;
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
-                       (float)(piz - pjz)};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (spi->ti_drift != e->ti_current)
-          error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hig2) {
-          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the sparts in ci. */
-  }
-
-  /* Sparts are on the right. */
-  else {
-
-    /* Loop over the sparts_i. */
-    for (int pid = 0; pid < scount; pid++) {
-
-      /* Get a hold of the ith spart in ci. */
-      struct spart *restrict spi = &sparts_i[ind[pid]];
-      const double pix = spi->x[0] - (shift[0]);
-      const double piy = spi->x[1] - (shift[1]);
-      const double piz = spi->x[2] - (shift[2]);
-      const float hi = spi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
-                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
-
-      /* Loop over the parts in cj. */
-      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[sort_j[pjd].i];
-        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        const double pjx = pj->x[0];
-        const double pjy = pj->x[1];
-        const double pjz = pj->x[2];
-        const float hj = pj->h;
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
-                       (float)(piz - pjz)};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (spi->ti_drift != e->ti_current)
-          error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hig2) {
-          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the sparts in ci. */
-  }
-}
+void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c);
+void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj);
 
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * Version using a brute-force algorithm.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts_i The #part to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- * @param cj The second #cell.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
-                                struct spart *restrict sparts_i,
-                                int *restrict ind, int scount,
-                                struct cell *restrict cj, const double *shift) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int count_j = cj->hydro.count;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Early abort? */
-  if (count_j == 0) return;
-
-  /* Loop over the parts_i. */
-  for (int pid = 0; pid < scount; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct spart *restrict spi = &sparts_i[ind[pid]];
-
-    const double pix = spi->x[0] - (shift[0]);
-    const double piy = spi->x[1] - (shift[1]);
-    const double piz = spi->x[2] - (shift[2]);
-    const float hi = spi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!spart_is_active(spi, e))
-      error("Trying to correct smoothing length of inactive particle !");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Skip inhibited particles */
-      if (part_is_inhibited(pj, e)) continue;
-
-      const double pjx = pj->x[0];
-      const double pjy = pj->x[1];
-      const double pjz = pj->x[2];
-      const float hj = pj->h;
-
-      /* Compute the pairwise distance. */
-      float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
-                     (float)(piz - pjz)};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                          ti_current);
-#endif
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts The #spart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- */
-void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
-                          struct spart *restrict sparts, int *restrict ind,
-                          int scount) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int count_i = ci->hydro.count;
-  struct part *restrict parts_j = ci->hydro.parts;
-  struct xpart *restrict xparts_j = ci->hydro.xparts;
-
-  /* Early abort? */
-  if (count_i == 0) return;
-
-  /* Loop over the parts in ci. */
-  for (int spid = 0; spid < scount; spid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct spart *spi = &sparts[ind[spid]];
-    const float spix[3] = {(float)(spi->x[0] - ci->loc[0]),
-                           (float)(spi->x[1] - ci->loc[1]),
-                           (float)(spi->x[2] - ci->loc[2])};
-    const float hi = spi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!spart_is_active(spi, e))
-      error("Inactive particle in subset function!");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_i; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Early abort? */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
-                            (float)(pj->x[1] - ci->loc[1]),
-                            (float)(pj->x[2] - ci->loc[2])};
-      float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj,
-                                            cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj,
-                                          cosmo, ti_current);
-#endif
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
+void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer);
+void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
+                       int gettimer);
 
-/**
- * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called
- * depending on the optimisation level.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts The #spart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- */
 void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
                                  struct spart *restrict sparts,
-                                 int *restrict ind, int scount) {
+                                 int *restrict ind, int scount);
 
-  DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount);
-}
-
-/**
- * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called
- * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS
- * needs to be called at all.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts_i The #spart to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- * @param cj The second #cell.
- */
 void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
                                  struct spart *restrict sparts_i,
                                  int *restrict ind, int scount,
-                                 struct cell *restrict cj) {
-
-  const struct engine *e = r->e;
-
-  /* Anything to do here? */
-  if (cj->hydro.count == 0) return;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
-  DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift);
-#else
-  /* Get the sorting index. */
-  int sid = 0;
-  for (int k = 0; k < 3; k++)
-    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
-                         ? 0
-                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
-
-  /* Switch the cells around? */
-  const int flipped = runner_flip[sid];
-  sid = sortlistID[sid];
-
-  /* Has the cell cj been sorted? */
-  if (!(cj->hydro.sorted & (1 << sid)) ||
-      cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)
-    error("Interacting unsorted cells.");
-
-  DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift);
-#endif
-}
+                                 struct cell *restrict cj);
 
 void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts,
-                        int *ind, int scount, struct cell *cj, int gettimer) {
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-
-  /* Should we even bother? */
-  if (!cell_is_active_stars(ci, e) &&
-      (cj == NULL || !cell_is_active_stars(cj, e)))
-    return;
-
-  /* Find out in which sub-cell of ci the parts are. */
-  struct cell *sub = NULL;
-  if (ci->split) {
-    for (int k = 0; k < 8; k++) {
-      if (ci->progeny[k] != NULL) {
-        if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] &&
-            &sparts[ind[0]] <
-                &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) {
-          sub = ci->progeny[k];
-          break;
-        }
-      }
-    }
-  }
-
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_self_stars_task(ci)) {
-
-      /* Loop over all progeny. */
-      DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0);
-      for (int j = 0; j < 8; j++)
-        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
-          DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0);
-
-    }
-
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount);
-  } /* self-interaction. */
-
-  /* Otherwise, it's a pair interaction. */
-  else {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
-        cell_can_recurse_in_pair_stars_task(cj, ci)) {
-
-      /* Get the type of pair and flip ci/cj if needed. */
-      double shift[3] = {0.0, 0.0, 0.0};
-      const int sid = space_getsid(s, &ci, &cj, shift);
-
-      struct cell_split_pair *csp = &cell_split_pairs[sid];
-      for (int k = 0; k < csp->count; k++) {
-        const int pid = csp->pairs[k].pid;
-        const int pjd = csp->pairs[k].pjd;
-        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
-          DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount,
-                             cj->progeny[pjd], 0);
-        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
-          DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount,
-                             ci->progeny[pid], 0);
-      }
-    }
-
-    /* Otherwise, compute the pair directly. */
-    else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) {
-
-      /* Do any of the cells need to be drifted first? */
-      if (cell_is_active_stars(ci, e)) {
-        if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!");
-        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
-      }
-
-      DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj);
-    }
-
-  } /* otherwise, pair interaction. */
-}
-
-/**
- * @brief Determine which version of DOSELF1_STARS needs to be called depending
- * on the optimisation level.
- *
- * @param r #runner
- * @param c #cell c
- *
- */
-void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) {
-
-  const struct engine *restrict e = r->e;
-
-  /* Anything to do here? */
-  if (c->stars.count == 0) return;
-
-  /* Anything to do here? */
-  if (!cell_is_active_stars(c, e)) return;
-
-  /* Did we mess up the recursion? */
-  if (c->stars.h_max_old * kernel_gamma > c->dmin)
-    error("Cell smaller than smoothing length");
-
-  DOSELF1_STARS(r, c, 1);
-}
-
-#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid)                          \
-  ({                                                                        \
-    const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid];          \
-                                                                            \
-    for (int pjd = 0; pjd < cj->TYPE.count; pjd++) {                        \
-      const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i];                \
-      if (PART##_is_inhibited(p, e)) continue;                              \
-                                                                            \
-      const float d = p->x[0] * runner_shift[sid][0] +                      \
-                      p->x[1] * runner_shift[sid][1] +                      \
-                      p->x[2] * runner_shift[sid][2];                       \
-      if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
-              1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) &&           \
-          (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
-              cj->width[0] * 1.0e-10)                                       \
-        error(                                                              \
-            "particle shift diff exceeds dx_max_sort in cell cj. "          \
-            "cj->nodeID=%d "                                                \
-            "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE                \
-            ".dx_max_sort=%e "                                              \
-            "cj->" #TYPE                                                    \
-            ".dx_max_sort_old=%e, cellID=%i super->cellID=%i"               \
-            "cj->depth=%d cj->maxdepth=%d",                                 \
-            cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \
-            cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID,  \
-            cj->depth, cj->maxdepth);                                       \
-    }                                                                       \
-  })
-
-/**
- * @brief Determine which version of DOPAIR1_STARS needs to be called depending
- * on the orientation of the cells or whether DOPAIR1_STARS needs to be called
- * at all.
- *
- * @param r #runner
- * @param ci #cell ci
- * @param cj #cell cj
- *
- */
-void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) {
-
-  const struct engine *restrict e = r->e;
-
-  /* Get the sort ID. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  const int sid = space_getsid(e->s, &ci, &cj, shift);
-
-  const int ci_active = cell_is_active_stars(ci, e);
-  const int cj_active = cell_is_active_stars(cj, e);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_stars = ci->nodeID == e->nodeID;
-  const int do_cj_stars = cj->nodeID == e->nodeID;
-#else
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_stars = cj->nodeID == e->nodeID;
-  const int do_cj_stars = ci->nodeID == e->nodeID;
-#endif
-  const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 &&
-                     ci_active && do_ci_stars);
-  const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 &&
-                     cj_active && do_cj_stars);
-
-  /* Anything to do here? */
-  if (!do_ci && !do_cj) return;
-
-  /* Check that cells are drifted. */
-  if (do_ci &&
-      (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  /* Have the cells been sorted? */
-  if (do_ci && (!(ci->stars.sorted & (1 << sid)) ||
-                ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin))
-    error("Interacting unsorted cells.");
-
-  if (do_ci && (!(cj->hydro.sorted & (1 << sid)) ||
-                cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin))
-    error("Interacting unsorted cells.");
-
-  if (do_cj &&
-      (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  /* Have the cells been sorted? */
-  if (do_cj && (!(ci->hydro.sorted & (1 << sid)) ||
-                ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin))
-    error("Interacting unsorted cells.");
-
-  if (do_cj && (!(cj->stars.sorted & (1 << sid)) ||
-                cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin))
-    error("Interacting unsorted cells.");
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (do_ci) {
-    // MATTHIEU: This test is faulty. To be fixed...
-    // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid);
-    RUNNER_CHECK_SORT(stars, spart, ci, cj, sid);
-  }
-
-  if (do_cj) {
-    // MATTHIEU: This test is faulty. To be fixed...
-    // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid);
-    RUNNER_CHECK_SORT(stars, spart, cj, ci, sid);
-  }
-#endif /* SWIFT_DEBUG_CHECKS */
-
-#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
-  DOPAIR1_STARS_NAIVE(r, ci, cj, 1);
-#else
-  DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift);
-#endif
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for pairs
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param gettimer Do we have a timer ?
- *
- * @todo Hard-code the sid on the recursive calls to avoid the
- * redundant computations to find the sid on-the-fly.
- */
-void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
-                       int gettimer) {
-
-  TIMER_TIC;
-
-  struct space *s = r->e->s;
-  const struct engine *e = r->e;
-
-  /* Should we even bother? */
-  const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
-                           cell_is_active_stars(ci, e);
-  const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
-                           cell_is_active_stars(cj, e);
-  if (!should_do_ci && !should_do_cj) return;
-
-  /* Get the type of pair and flip ci/cj if needed. */
-  double shift[3];
-  const int sid = space_getsid(s, &ci, &cj, shift);
-
-  /* Recurse? */
-  if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
-      cell_can_recurse_in_pair_stars_task(cj, ci)) {
-    struct cell_split_pair *csp = &cell_split_pairs[sid];
-    for (int k = 0; k < csp->count; k++) {
-      const int pid = csp->pairs[k].pid;
-      const int pjd = csp->pairs[k].pjd;
-      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
-        DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0);
-    }
-  }
-
-  /* Otherwise, compute the pair directly. */
-  else {
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-    const int do_ci_stars = ci->nodeID == e->nodeID;
-    const int do_cj_stars = cj->nodeID == e->nodeID;
-#else
-    /* here we are updating the hydro -> switch ci, cj */
-    const int do_ci_stars = cj->nodeID == e->nodeID;
-    const int do_cj_stars = ci->nodeID == e->nodeID;
-#endif
-    const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
-                      cell_is_active_stars(ci, e) && do_ci_stars;
-    const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
-                      cell_is_active_stars(cj, e) && do_cj_stars;
-
-    if (do_ci) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (!cell_are_spart_drifted(ci, e))
-        error("Interacting undrifted cells (sparts).");
-
-      if (!cell_are_part_drifted(cj, e))
-        error("Interacting undrifted cells (parts).");
-
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->stars.sorted & (1 << sid)) ||
-          ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) {
-        error("Interacting unsorted cell (sparts).");
-      }
-
-      if (!(cj->hydro.sorted & (1 << sid)) ||
-          cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx)
-        error("Interacting unsorted cell (parts). %i", cj->nodeID);
-    }
-
-    if (do_cj) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (!cell_are_part_drifted(ci, e))
-        error("Interacting undrifted cells (parts).");
-
-      if (!cell_are_spart_drifted(cj, e))
-        error("Interacting undrifted cells (sparts).");
-
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->hydro.sorted & (1 << sid)) ||
-          ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) {
-        error("Interacting unsorted cell (parts).");
-      }
-
-      if (!(cj->stars.sorted & (1 << sid)) ||
-          cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) {
-        error("Interacting unsorted cell (sparts).");
-      }
-    }
-
-    if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_PAIR_STARS);
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for self tasks
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param gettimer Do we have a timer ?
- */
-void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) {
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank)
-    error("This function should not be called on foreign cells");
-#endif
-
-  /* Should we even bother? */
-  if (ci->hydro.count == 0 || ci->stars.count == 0 ||
-      !cell_is_active_stars(ci, r->e))
-    return;
-
-  /* Recurse? */
-  if (cell_can_recurse_in_self_stars_task(ci)) {
-
-    /* Loop over all progeny. */
-    for (int k = 0; k < 8; k++)
-      if (ci->progeny[k] != NULL) {
-        DOSUB_SELF1_STARS(r, ci->progeny[k], 0);
-        for (int j = k + 1; j < 8; j++)
-          if (ci->progeny[j] != NULL)
-            DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0);
-      }
-  }
-
-  /* Otherwise, compute self-interaction. */
-  else {
-
-    /* Drift the cell to the current timestep if needed. */
-    if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell.");
-
-    DOSELF1_BRANCH_STARS(r, ci);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_SELF_STARS);
-}
+                        int *ind, int scount, struct cell *cj, int gettimer);
diff --git a/src/runner_drift.c b/src/runner_drift.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c4376743cd50ffea4709cb471959864cedcc4b7
--- /dev/null
+++ b/src/runner_drift.c
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Drift all part in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_part(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_part(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_part);
+}
+
+/**
+ * @brief Drift all gpart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_gpart(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_gpart);
+}
+
+/**
+ * @brief Drift all spart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_spart(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_spart);
+}
+
+/**
+ * @brief Drift all bpart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_bpart(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_bpart);
+}
diff --git a/src/runner_ghost.c b/src/runner_ghost.c
new file mode 100644
index 0000000000000000000000000000000000000000..2c1e8cd7190858014f7914e293b5ffdadbdc2707
--- /dev/null
+++ b/src/runner_ghost.c
@@ -0,0 +1,1355 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "feedback.h"
+#include "pressure_floor.h"
+#include "pressure_floor_iact.h"
+#include "space_getsid.h"
+#include "stars.h"
+#include "timers.h"
+#include "tracers.h"
+
+/* Import the density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the stars density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) {
+
+  struct spart *restrict sparts = c->stars.parts;
+  const struct engine *e = r->e;
+  const struct unit_system *us = e->internal_units;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const struct cosmology *cosmo = e->cosmology;
+  const struct feedback_props *feedback_props = e->feedback_props;
+  const float stars_h_max = e->hydro_properties->h_max;
+  const float stars_h_min = e->hydro_properties->h_min;
+  const float eps = e->stars_properties->h_tolerance;
+  const float stars_eta_dim =
+      pow_dimension(e->stars_properties->eta_neighbours);
+  const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations;
+  int redo = 0, scount = 0;
+
+  /* Running value of the maximal smoothing length */
+  double h_max = c->stars.h_max;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != e->nodeID)
+    error("Running the star ghost on a foreign node!");
+#endif
+
+  /* Anything to do here? */
+  if (c->stars.count == 0) return;
+  if (!cell_is_active_stars(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_stars_ghost(r, c->progeny[k], 0);
+
+        /* Update h_max */
+        h_max = max(h_max, c->progeny[k]->stars.h_max);
+      }
+    }
+  } else {
+
+    /* Init the list of active particles that have to be updated. */
+    int *sid = NULL;
+    float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
+    if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL)
+      error("Can't allocate memory for sid.");
+    if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
+      error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
+      error("Can't allocate memory for right.");
+    for (int k = 0; k < c->stars.count; k++)
+      if (spart_is_active(&sparts[k], e) &&
+          feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) {
+        sid[scount] = k;
+        h_0[scount] = sparts[k].h;
+        left[scount] = 0.f;
+        right[scount] = stars_h_max;
+        ++scount;
+      }
+
+    /* While there are particles that need to be updated... */
+    for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter;
+         num_reruns++) {
+
+      /* Reset the redo-count. */
+      redo = 0;
+
+      /* Loop over the remaining active parts in this cell. */
+      for (int i = 0; i < scount; i++) {
+
+        /* Get a direct pointer on the part. */
+        struct spart *sp = &sparts[sid[i]];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Is this part within the timestep? */
+        if (!spart_is_active(sp, e))
+          error("Ghost applied to inactive particle");
+#endif
+
+        /* Get some useful values */
+        const float h_init = h_0[i];
+        const float h_old = sp->h;
+        const float h_old_dim = pow_dimension(h_old);
+        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
+
+        float h_new;
+        int has_no_neighbours = 0;
+
+        if (sp->density.wcount == 0.f) { /* No neighbours case */
+
+          /* Flag that there were no neighbours */
+          has_no_neighbours = 1;
+
+          /* Double h and try again */
+          h_new = 2.f * h_old;
+
+        } else {
+
+          /* Finish the density calculation */
+          stars_end_density(sp, cosmo);
+
+          /* Compute one step of the Newton-Raphson scheme */
+          const float n_sum = sp->density.wcount * h_old_dim;
+          const float n_target = stars_eta_dim;
+          const float f = n_sum - n_target;
+          const float f_prime =
+              sp->density.wcount_dh * h_old_dim +
+              hydro_dimension * sp->density.wcount * h_old_dim_minus_one;
+
+          /* Improve the bisection bounds */
+          if (n_sum < n_target)
+            left[i] = max(left[i], h_old);
+          else if (n_sum > n_target)
+            right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
+          /* Skip if h is already h_max and we don't have enough neighbours
+           */
+          /* Same if we are below h_min */
+          if (((sp->h >= stars_h_max) && (f < 0.f)) ||
+              ((sp->h <= stars_h_min) && (f > 0.f))) {
+
+            stars_reset_feedback(sp);
+
+            /* Only do feedback if stars have a reasonable birth time */
+            if (feedback_do_feedback(sp)) {
+
+              const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+              const integertime_t ti_begin =
+                  get_integer_time_begin(e->ti_current - 1, sp->time_bin);
+
+              /* Get particle time-step */
+              double dt;
+              if (with_cosmology) {
+                dt = cosmology_get_delta_time(e->cosmology, ti_begin,
+                                              ti_begin + ti_step);
+              } else {
+                dt = get_timestep(sp->time_bin, e->time_base);
+              }
+
+              /* Calculate age of the star at current time */
+              double star_age_end_of_step;
+              if (with_cosmology) {
+                star_age_end_of_step =
+                    cosmology_get_delta_time_from_scale_factors(
+                        cosmo, (double)sp->birth_scale_factor, cosmo->a);
+              } else {
+                star_age_end_of_step = (float)e->time - sp->birth_time;
+              }
+
+              /* Has this star been around for a while ? */
+              if (star_age_end_of_step > 0.) {
+
+                /* Age of the star at the start of the step */
+                const double star_age_beg_of_step =
+                    max(star_age_end_of_step - dt, 0.);
+
+                /* Compute the stellar evolution  */
+                feedback_evolve_spart(sp, feedback_props, cosmo, us,
+                                      star_age_beg_of_step, dt);
+              } else {
+
+                /* Reset the feedback fields of the star particle */
+                feedback_reset_feedback(sp, feedback_props);
+              }
+            } else {
+
+              feedback_reset_feedback(sp, feedback_props);
+            }
+
+            /* Ok, we are done with this particle */
+            continue;
+          }
+
+          /* Normal case: Use Newton-Raphson to get a better value of h */
+
+          /* Avoid floating point exception from f_prime = 0 */
+          h_new = h_old - f / (f_prime + FLT_MIN);
+
+          /* Be verbose about the particles that struggle to converge */
+          if (num_reruns > max_smoothing_iter - 10) {
+
+            message(
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
+          }
+
+          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
+          h_new = min(h_new, 2.f * h_old);
+          h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
+        }
+
+        /* Check whether the particle has an inappropriate smoothing length
+         */
+        if (fabsf(h_new - h_old) > eps * h_old) {
+
+          /* Ok, correct then */
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            sp->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            sp->h = h_new;
+          }
+
+          /* If below the absolute maximum, try again */
+          if (sp->h < stars_h_max && sp->h > stars_h_min) {
+
+            /* Flag for another round of fun */
+            sid[redo] = sid[i];
+            h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
+            redo += 1;
+
+            /* Re-initialise everything */
+            stars_init_spart(sp);
+            feedback_init_spart(sp);
+
+            /* Off we go ! */
+            continue;
+
+          } else if (sp->h <= stars_h_min) {
+
+            /* Ok, this particle is a lost cause... */
+            sp->h = stars_h_min;
+
+          } else if (sp->h >= stars_h_max) {
+
+            /* Ok, this particle is a lost cause... */
+            sp->h = stars_h_max;
+
+            /* Do some damage control if no neighbours at all were found */
+            if (has_no_neighbours) {
+              stars_spart_has_no_neighbours(sp, cosmo);
+            }
+
+          } else {
+            error(
+                "Fundamental problem with the smoothing length iteration "
+                "logic.");
+          }
+        }
+
+        /* We now have a particle whose smoothing length has converged */
+
+        /* Check if h_max has increased */
+        h_max = max(h_max, sp->h);
+
+        stars_reset_feedback(sp);
+
+        /* Only do feedback if stars have a reasonable birth time */
+        if (feedback_do_feedback(sp)) {
+
+          const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(e->ti_current - 1, sp->time_bin);
+
+          /* Get particle time-step */
+          double dt;
+          if (with_cosmology) {
+            dt = cosmology_get_delta_time(e->cosmology, ti_begin,
+                                          ti_begin + ti_step);
+          } else {
+            dt = get_timestep(sp->time_bin, e->time_base);
+          }
+
+          /* Calculate age of the star at current time */
+          double star_age_end_of_step;
+          if (with_cosmology) {
+            star_age_end_of_step = cosmology_get_delta_time_from_scale_factors(
+                cosmo, sp->birth_scale_factor, (float)cosmo->a);
+          } else {
+            star_age_end_of_step = (float)e->time - sp->birth_time;
+          }
+
+          /* Has this star been around for a while ? */
+          if (star_age_end_of_step > 0.) {
+
+            /* Age of the star at the start of the step */
+            const double star_age_beg_of_step =
+                max(star_age_end_of_step - dt, 0.);
+
+            /* Compute the stellar evolution  */
+            feedback_evolve_spart(sp, feedback_props, cosmo, us,
+                                  star_age_beg_of_step, dt);
+          } else {
+
+            /* Reset the feedback fields of the star particle */
+            feedback_reset_feedback(sp, feedback_props);
+          }
+        } else {
+
+          /* Reset the feedback fields of the star particle */
+          feedback_reset_feedback(sp, feedback_props);
+        }
+      }
+
+      /* We now need to treat the particles whose smoothing length had not
+       * converged again */
+
+      /* Re-set the counter for the next loop (potentially). */
+      scount = redo;
+      if (scount > 0) {
+
+        /* Climb up the cell hierarchy. */
+        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
+
+          /* Run through this cell's density interactions. */
+          for (struct link *l = finger->stars.density; l != NULL; l = l->next) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+            if (l->t->ti_run < r->e->ti_current)
+              error("Density task should have been run.");
+#endif
+
+            /* Self-interaction? */
+            if (l->t->type == task_type_self)
+              runner_doself_subset_branch_stars_density(r, finger, sparts, sid,
+                                                        scount);
+
+            /* Otherwise, pair interaction? */
+            else if (l->t->type == task_type_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dopair_subset_branch_stars_density(
+                    r, finger, sparts, sid, scount, l->t->cj);
+              else
+                runner_dopair_subset_branch_stars_density(
+                    r, finger, sparts, sid, scount, l->t->ci);
+            }
+
+            /* Otherwise, sub-self interaction? */
+            else if (l->t->type == task_type_sub_self)
+              runner_dosub_subset_stars_density(r, finger, sparts, sid, scount,
+                                                NULL, 1);
+
+            /* Otherwise, sub-pair interaction? */
+            else if (l->t->type == task_type_sub_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dosub_subset_stars_density(r, finger, sparts, sid,
+                                                  scount, l->t->cj, 1);
+              else
+                runner_dosub_subset_stars_density(r, finger, sparts, sid,
+                                                  scount, l->t->ci, 1);
+            }
+          }
+        }
+      }
+    }
+
+    if (scount) {
+      error("Smoothing length failed to converge on %i particles.", scount);
+    }
+
+    /* Be clean */
+    free(left);
+    free(right);
+    free(sid);
+    free(h_0);
+  }
+
+  /* Update h_max */
+  c->stars.h_max = h_max;
+
+  /* The ghost may not always be at the top level.
+   * Therefore we need to update h_max between the super- and top-levels */
+  if (c->stars.ghost) {
+    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
+      atomic_max_d(&tmp->stars.h_max, h_max);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_stars_ghost);
+}
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c,
+                                         int timer) {
+
+  struct bpart *restrict bparts = c->black_holes.parts;
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const float black_holes_h_max = e->hydro_properties->h_max;
+  const float black_holes_h_min = e->hydro_properties->h_min;
+  const float eps = e->black_holes_properties->h_tolerance;
+  const float black_holes_eta_dim =
+      pow_dimension(e->black_holes_properties->eta_neighbours);
+  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
+  int redo = 0, bcount = 0;
+
+  /* Running value of the maximal smoothing length */
+  double h_max = c->black_holes.h_max;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (c->black_holes.count == 0) return;
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_black_holes_density_ghost(r, c->progeny[k], 0);
+
+        /* Update h_max */
+        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
+      }
+    }
+  } else {
+
+    /* Init the list of active particles that have to be updated. */
+    int *sid = NULL;
+    float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
+    if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for sid.");
+    if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for right.");
+    for (int k = 0; k < c->black_holes.count; k++)
+      if (bpart_is_active(&bparts[k], e)) {
+        sid[bcount] = k;
+        h_0[bcount] = bparts[k].h;
+        left[bcount] = 0.f;
+        right[bcount] = black_holes_h_max;
+        ++bcount;
+      }
+
+    /* While there are particles that need to be updated... */
+    for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter;
+         num_reruns++) {
+
+      /* Reset the redo-count. */
+      redo = 0;
+
+      /* Loop over the remaining active parts in this cell. */
+      for (int i = 0; i < bcount; i++) {
+
+        /* Get a direct pointer on the part. */
+        struct bpart *bp = &bparts[sid[i]];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Is this part within the timestep? */
+        if (!bpart_is_active(bp, e))
+          error("Ghost applied to inactive particle");
+#endif
+
+        /* Get some useful values */
+        const float h_init = h_0[i];
+        const float h_old = bp->h;
+        const float h_old_dim = pow_dimension(h_old);
+        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
+
+        float h_new;
+        int has_no_neighbours = 0;
+
+        if (bp->density.wcount == 0.f) { /* No neighbours case */
+
+          /* Flag that there were no neighbours */
+          has_no_neighbours = 1;
+
+          /* Double h and try again */
+          h_new = 2.f * h_old;
+
+        } else {
+
+          /* Finish the density calculation */
+          black_holes_end_density(bp, cosmo);
+
+          /* Compute one step of the Newton-Raphson scheme */
+          const float n_sum = bp->density.wcount * h_old_dim;
+          const float n_target = black_holes_eta_dim;
+          const float f = n_sum - n_target;
+          const float f_prime =
+              bp->density.wcount_dh * h_old_dim +
+              hydro_dimension * bp->density.wcount * h_old_dim_minus_one;
+
+          /* Improve the bisection bounds */
+          if (n_sum < n_target)
+            left[i] = max(left[i], h_old);
+          else if (n_sum > n_target)
+            right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
+          /* Skip if h is already h_max and we don't have enough neighbours
+           */
+          /* Same if we are below h_min */
+          if (((bp->h >= black_holes_h_max) && (f < 0.f)) ||
+              ((bp->h <= black_holes_h_min) && (f > 0.f))) {
+
+            black_holes_reset_feedback(bp);
+
+            /* Ok, we are done with this particle */
+            continue;
+          }
+
+          /* Normal case: Use Newton-Raphson to get a better value of h */
+
+          /* Avoid floating point exception from f_prime = 0 */
+          h_new = h_old - f / (f_prime + FLT_MIN);
+
+          /* Be verbose about the particles that struggle to converge */
+          if (num_reruns > max_smoothing_iter - 10) {
+
+            message(
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
+          }
+
+          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
+          h_new = min(h_new, 2.f * h_old);
+          h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
+        }
+
+        /* Check whether the particle has an inappropriate smoothing length
+         */
+        if (fabsf(h_new - h_old) > eps * h_old) {
+
+          /* Ok, correct then */
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            bp->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            bp->h = h_new;
+          }
+
+          /* If below the absolute maximum, try again */
+          if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) {
+
+            /* Flag for another round of fun */
+            sid[redo] = sid[i];
+            h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
+            redo += 1;
+
+            /* Re-initialise everything */
+            black_holes_init_bpart(bp);
+
+            /* Off we go ! */
+            continue;
+
+          } else if (bp->h <= black_holes_h_min) {
+
+            /* Ok, this particle is a lost cause... */
+            bp->h = black_holes_h_min;
+
+          } else if (bp->h >= black_holes_h_max) {
+
+            /* Ok, this particle is a lost cause... */
+            bp->h = black_holes_h_max;
+
+            /* Do some damage control if no neighbours at all were found */
+            if (has_no_neighbours) {
+              black_holes_bpart_has_no_neighbours(bp, cosmo);
+            }
+
+          } else {
+            error(
+                "Fundamental problem with the smoothing length iteration "
+                "logic.");
+          }
+        }
+
+        /* We now have a particle whose smoothing length has converged */
+
+        black_holes_reset_feedback(bp);
+
+        /* Check if h_max has increased */
+        h_max = max(h_max, bp->h);
+      }
+
+      /* We now need to treat the particles whose smoothing length had not
+       * converged again */
+
+      /* Re-set the counter for the next loop (potentially). */
+      bcount = redo;
+      if (bcount > 0) {
+
+        /* Climb up the cell hierarchy. */
+        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
+
+          /* Run through this cell's density interactions. */
+          for (struct link *l = finger->black_holes.density; l != NULL;
+               l = l->next) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+            if (l->t->ti_run < r->e->ti_current)
+              error("Density task should have been run.");
+#endif
+
+            /* Self-interaction? */
+            if (l->t->type == task_type_self)
+              runner_doself_subset_branch_bh_density(r, finger, bparts, sid,
+                                                     bcount);
+
+            /* Otherwise, pair interaction? */
+            else if (l->t->type == task_type_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
+                                                       bcount, l->t->cj);
+              else
+                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
+                                                       bcount, l->t->ci);
+            }
+
+            /* Otherwise, sub-self interaction? */
+            else if (l->t->type == task_type_sub_self)
+              runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
+                                             NULL, 1);
+
+            /* Otherwise, sub-pair interaction? */
+            else if (l->t->type == task_type_sub_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
+                                               l->t->cj, 1);
+              else
+                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
+                                               l->t->ci, 1);
+            }
+          }
+        }
+      }
+    }
+
+    if (bcount) {
+      error("Smoothing length failed to converge on %i particles.", bcount);
+    }
+
+    /* Be clean */
+    free(left);
+    free(right);
+    free(sid);
+    free(h_0);
+  }
+
+  /* Update h_max */
+  c->black_holes.h_max = h_max;
+
+  /* The ghost may not always be at the top level.
+   * Therefore we need to update h_max between the super- and top-levels */
+  if (c->black_holes.density_ghost) {
+    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
+      atomic_max_d(&tmp->black_holes.h_max, h_max);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
+}
+
+/**
+ * @brief Intermediate task after the BHs have done their swallowing step.
+ * This is used to update the BH quantities if necessary.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c,
+                                         int timer) {
+
+  struct bpart *restrict bparts = c->black_holes.parts;
+  const int count = c->black_holes.count;
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct bpart *bp = &bparts[i];
+
+      if (bpart_is_active(bp, e)) {
+
+        /* Compute the final operations for repositioning of this BH */
+        black_holes_end_reposition(bp, e->black_holes_properties,
+                                   e->physical_constants, e->cosmology);
+
+        /* Get particle time-step */
+        double dt;
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(bp->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(e->ti_current - 1, bp->time_bin);
+
+          dt = cosmology_get_delta_time(e->cosmology, ti_begin,
+                                        ti_begin + ti_step);
+        } else {
+          dt = get_timestep(bp->time_bin, e->time_base);
+        }
+
+        /* Compute variables required for the feedback loop */
+        black_holes_prepare_feedback(bp, e->black_holes_properties,
+                                     e->physical_constants, e->cosmology, dt);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
+}
+
+/**
+ * @brief Intermediate task after the gradient loop that does final operations
+ * on the gradient quantities and optionally slope limits the gradients
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) {
+
+#ifdef EXTRA_HYDRO_LOOP
+
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int count = c->hydro.count;
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const double time_base = e->time_base;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct part *restrict p = &parts[i];
+      struct xpart *restrict xp = &xparts[i];
+
+      if (part_is_active(p, e)) {
+
+        /* Finish the gradient calculation */
+        hydro_end_gradient(p);
+
+        /* As of here, particle force variables will be set. */
+
+        /* Calculate the time-step for passing to hydro_prepare_force.
+         * This is the physical time between the start and end of the time-step
+         * without any scale-factor powers. */
+        double dt_alpha;
+
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(p->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(ti_current - 1, p->time_bin);
+
+          dt_alpha =
+              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+        } else {
+          dt_alpha = get_timestep(p->time_bin, time_base);
+        }
+
+        /* Compute variables required for the force loop */
+        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
+
+        /* The particle force values are now set.  Do _NOT_
+           try to read any particle density variables! */
+
+        /* Prepare the particle for the force loop over neighbours */
+        hydro_reset_acceleration(p);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_extra_ghost);
+
+#else
+  error("SWIFT was not compiled with the extra hydro loop activated.");
+#endif
+}
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
+
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const struct engine *e = r->e;
+  const struct space *s = e->s;
+  const struct hydro_space *hs = &s->hs;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct chemistry_global_data *chemistry = e->chemistry;
+
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+
+  const float hydro_h_max = e->hydro_properties->h_max;
+  const float hydro_h_min = e->hydro_properties->h_min;
+  const float eps = e->hydro_properties->h_tolerance;
+  const float hydro_eta_dim =
+      pow_dimension(e->hydro_properties->eta_neighbours);
+  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
+  int redo = 0, count = 0;
+
+  /* Running value of the maximal smoothing length */
+  double h_max = c->hydro.h_max;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (c->hydro.count == 0) return;
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_ghost(r, c->progeny[k], 0);
+
+        /* Update h_max */
+        h_max = max(h_max, c->progeny[k]->hydro.h_max);
+      }
+    }
+  } else {
+
+    /* Init the list of active particles that have to be updated and their
+     * current smoothing lengths. */
+    int *pid = NULL;
+    float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
+    if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for pid.");
+    if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for right.");
+    for (int k = 0; k < c->hydro.count; k++)
+      if (part_is_active(&parts[k], e)) {
+        pid[count] = k;
+        h_0[count] = parts[k].h;
+        left[count] = 0.f;
+        right[count] = hydro_h_max;
+        ++count;
+      }
+
+    /* While there are particles that need to be updated... */
+    for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter;
+         num_reruns++) {
+
+      /* Reset the redo-count. */
+      redo = 0;
+
+      /* Loop over the remaining active parts in this cell. */
+      for (int i = 0; i < count; i++) {
+
+        /* Get a direct pointer on the part. */
+        struct part *p = &parts[pid[i]];
+        struct xpart *xp = &xparts[pid[i]];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Is this part within the timestep? */
+        if (!part_is_active(p, e)) error("Ghost applied to inactive particle");
+#endif
+
+        /* Get some useful values */
+        const float h_init = h_0[i];
+        const float h_old = p->h;
+        const float h_old_dim = pow_dimension(h_old);
+        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
+
+        float h_new;
+        int has_no_neighbours = 0;
+
+        if (p->density.wcount == 0.f) { /* No neighbours case */
+
+          /* Flag that there were no neighbours */
+          has_no_neighbours = 1;
+
+          /* Double h and try again */
+          h_new = 2.f * h_old;
+
+        } else {
+
+          /* Finish the density calculation */
+          hydro_end_density(p, cosmo);
+          chemistry_end_density(p, chemistry, cosmo);
+          pressure_floor_end_density(p, cosmo);
+
+          /* Compute one step of the Newton-Raphson scheme */
+          const float n_sum = p->density.wcount * h_old_dim;
+          const float n_target = hydro_eta_dim;
+          const float f = n_sum - n_target;
+          const float f_prime =
+              p->density.wcount_dh * h_old_dim +
+              hydro_dimension * p->density.wcount * h_old_dim_minus_one;
+
+          /* Improve the bisection bounds */
+          if (n_sum < n_target)
+            left[i] = max(left[i], h_old);
+          else if (n_sum > n_target)
+            right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
+          /* Skip if h is already h_max and we don't have enough neighbours */
+          /* Same if we are below h_min */
+          if (((p->h >= hydro_h_max) && (f < 0.f)) ||
+              ((p->h <= hydro_h_min) && (f > 0.f))) {
+
+          /* We have a particle whose smoothing length is already set (wants
+           * to be larger but has already hit the maximum OR wants to be
+           * smaller but has already reached the minimum). So, just tidy up
+           * as if the smoothing length had converged correctly  */
+
+#ifdef EXTRA_HYDRO_LOOP
+
+            /* As of here, particle gradient variables will be set. */
+            /* The force variables are set in the extra ghost. */
+
+            /* Compute variables required for the gradient loop */
+            hydro_prepare_gradient(p, xp, cosmo);
+
+            /* The particle gradient values are now set.  Do _NOT_
+               try to read any particle density variables! */
+
+            /* Prepare the particle for the gradient loop over neighbours
+             */
+            hydro_reset_gradient(p);
+
+#else
+            const struct hydro_props *hydro_props = e->hydro_properties;
+
+            /* Calculate the time-step for passing to hydro_prepare_force, used
+             * for the evolution of alpha factors (i.e. those involved in the
+             * artificial viscosity and thermal conduction terms) */
+            const double time_base = e->time_base;
+            const integertime_t ti_current = e->ti_current;
+            double dt_alpha;
+
+            if (with_cosmology) {
+              const integertime_t ti_step = get_integer_timestep(p->time_bin);
+              const integertime_t ti_begin =
+                  get_integer_time_begin(ti_current - 1, p->time_bin);
+
+              dt_alpha =
+                  cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+            } else {
+              dt_alpha = get_timestep(p->time_bin, time_base);
+            }
+
+            /* As of here, particle force variables will be set. */
+
+            /* Compute variables required for the force loop */
+            hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
+
+            /* The particle force values are now set.  Do _NOT_
+               try to read any particle density variables! */
+
+            /* Prepare the particle for the force loop over neighbours */
+            hydro_reset_acceleration(p);
+
+#endif /* EXTRA_HYDRO_LOOP */
+
+            /* Ok, we are done with this particle */
+            continue;
+          }
+
+          /* Normal case: Use Newton-Raphson to get a better value of h */
+
+          /* Avoid floating point exception from f_prime = 0 */
+          h_new = h_old - f / (f_prime + FLT_MIN);
+
+          /* Be verbose about the particles that struggle to converge */
+          if (num_reruns > max_smoothing_iter - 10) {
+
+            message(
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
+          }
+
+#ifdef SWIFT_DEBUG_CHECKS
+          if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old))
+            error(
+                "Smoothing length correction not going in the right direction");
+#endif
+
+          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
+          h_new = min(h_new, 2.f * h_old);
+          h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
+        }
+
+        /* Check whether the particle has an inappropriate smoothing length
+         */
+        if (fabsf(h_new - h_old) > eps * h_old) {
+
+          /* Ok, correct then */
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            p->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            p->h = h_new;
+          }
+
+          /* If within the allowed range, try again */
+          if (p->h < hydro_h_max && p->h > hydro_h_min) {
+
+            /* Flag for another round of fun */
+            pid[redo] = pid[i];
+            h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
+            redo += 1;
+
+            /* Re-initialise everything */
+            hydro_init_part(p, hs);
+            chemistry_init_part(p, chemistry);
+            pressure_floor_init_part(p, xp);
+            tracers_after_init(p, xp, e->internal_units, e->physical_constants,
+                               with_cosmology, e->cosmology,
+                               e->hydro_properties, e->cooling_func, e->time);
+
+            /* Off we go ! */
+            continue;
+
+          } else if (p->h <= hydro_h_min) {
+
+            /* Ok, this particle is a lost cause... */
+            p->h = hydro_h_min;
+
+          } else if (p->h >= hydro_h_max) {
+
+            /* Ok, this particle is a lost cause... */
+            p->h = hydro_h_max;
+
+            /* Do some damage control if no neighbours at all were found */
+            if (has_no_neighbours) {
+              hydro_part_has_no_neighbours(p, xp, cosmo);
+              chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo);
+              pressure_floor_part_has_no_neighbours(p, xp, cosmo);
+            }
+
+          } else {
+            error(
+                "Fundamental problem with the smoothing length iteration "
+                "logic.");
+          }
+        }
+
+        /* We now have a particle whose smoothing length has converged */
+
+        /* Check if h_max is increased */
+        h_max = max(h_max, p->h);
+
+#ifdef EXTRA_HYDRO_LOOP
+
+        /* As of here, particle gradient variables will be set. */
+        /* The force variables are set in the extra ghost. */
+
+        /* Compute variables required for the gradient loop */
+        hydro_prepare_gradient(p, xp, cosmo);
+
+        /* The particle gradient values are now set.  Do _NOT_
+           try to read any particle density variables! */
+
+        /* Prepare the particle for the gradient loop over neighbours */
+        hydro_reset_gradient(p);
+
+#else
+        const struct hydro_props *hydro_props = e->hydro_properties;
+
+        /* Calculate the time-step for passing to hydro_prepare_force, used
+         * for the evolution of alpha factors (i.e. those involved in the
+         * artificial viscosity and thermal conduction terms) */
+        const double time_base = e->time_base;
+        const integertime_t ti_current = e->ti_current;
+        double dt_alpha;
+
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(p->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(ti_current - 1, p->time_bin);
+
+          dt_alpha =
+              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+        } else {
+          dt_alpha = get_timestep(p->time_bin, time_base);
+        }
+
+        /* As of here, particle force variables will be set. */
+
+        /* Compute variables required for the force loop */
+        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
+
+        /* The particle force values are now set.  Do _NOT_
+           try to read any particle density variables! */
+
+        /* Prepare the particle for the force loop over neighbours */
+        hydro_reset_acceleration(p);
+
+#endif /* EXTRA_HYDRO_LOOP */
+      }
+
+      /* We now need to treat the particles whose smoothing length had not
+       * converged again */
+
+      /* Re-set the counter for the next loop (potentially). */
+      count = redo;
+      if (count > 0) {
+
+        /* Climb up the cell hierarchy. */
+        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
+
+          /* Run through this cell's density interactions. */
+          for (struct link *l = finger->hydro.density; l != NULL; l = l->next) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+            if (l->t->ti_run < r->e->ti_current)
+              error("Density task should have been run.");
+#endif
+
+            /* Self-interaction? */
+            if (l->t->type == task_type_self)
+              runner_doself_subset_branch_density(r, finger, parts, pid, count);
+
+            /* Otherwise, pair interaction? */
+            else if (l->t->type == task_type_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dopair_subset_branch_density(r, finger, parts, pid,
+                                                    count, l->t->cj);
+              else
+                runner_dopair_subset_branch_density(r, finger, parts, pid,
+                                                    count, l->t->ci);
+            }
+
+            /* Otherwise, sub-self interaction? */
+            else if (l->t->type == task_type_sub_self)
+              runner_dosub_subset_density(r, finger, parts, pid, count, NULL,
+                                          1);
+
+            /* Otherwise, sub-pair interaction? */
+            else if (l->t->type == task_type_sub_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dosub_subset_density(r, finger, parts, pid, count,
+                                            l->t->cj, 1);
+              else
+                runner_dosub_subset_density(r, finger, parts, pid, count,
+                                            l->t->ci, 1);
+            }
+          }
+        }
+      }
+    }
+
+    if (count) {
+      error("Smoothing length failed to converge on %i particles.", count);
+    }
+
+    /* Be clean */
+    free(left);
+    free(right);
+    free(pid);
+    free(h_0);
+  }
+
+  /* Update h_max */
+  c->hydro.h_max = h_max;
+
+  /* The ghost may not always be at the top level.
+   * Therefore we need to update h_max between the super- and top-levels */
+  if (c->hydro.ghost) {
+    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
+      atomic_max_d(&tmp->hydro.h_max, h_max);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_ghost);
+}
diff --git a/src/runner_main.c b/src/runner_main.c
new file mode 100644
index 0000000000000000000000000000000000000000..a674b64ae671bf33df0b5ba9eaa951097d738ba9
--- /dev/null
+++ b/src/runner_main.c
@@ -0,0 +1,495 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "engine.h"
+#include "scheduler.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/* Import the gravity loop functions. */
+#include "runner_doiact_grav.h"
+
+/* Import the density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the gradient loop functions (if required). */
+#ifdef EXTRA_HYDRO_LOOP
+#define FUNCTION gradient
+#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+#endif
+
+/* Import the force loop functions. */
+#define FUNCTION force
+#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the limiter loop functions. */
+#define FUNCTION limiter
+#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the stars density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the stars feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION swallow
+#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/**
+ * @brief The #runner main thread routine.
+ *
+ * @param data A pointer to this thread's data.
+ */
+void *runner_main(void *data) {
+
+  struct runner *r = (struct runner *)data;
+  struct engine *e = r->e;
+  struct scheduler *sched = &e->sched;
+  unsigned int seed = r->id;
+  pthread_setspecific(sched->local_seed_pointer, &seed);
+  /* Main loop. */
+  while (1) {
+
+    /* Wait at the barrier. */
+    engine_barrier(e);
+
+    /* Can we go home yet? */
+    if (e->step_props & engine_step_prop_done) break;
+
+    /* Re-set the pointer to the previous task, as there is none. */
+    struct task *t = NULL;
+    struct task *prev = NULL;
+
+    /* Loop while there are tasks... */
+    while (1) {
+
+      /* If there's no old task, try to get a new one. */
+      if (t == NULL) {
+
+        /* Get the task. */
+        TIMER_TIC
+        t = scheduler_gettask(sched, r->qid, prev);
+        TIMER_TOC(timer_gettask);
+
+        /* Did I get anything? */
+        if (t == NULL) break;
+      }
+
+      /* Get the cells. */
+      struct cell *ci = t->ci;
+      struct cell *cj = t->cj;
+
+#ifdef SWIFT_DEBUG_TASKS
+      /* Mark the thread we run on */
+      t->rid = r->cpuid;
+
+      /* And recover the pair direction */
+      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
+        struct cell *ci_temp = ci;
+        struct cell *cj_temp = cj;
+        double shift[3];
+        t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift);
+      } else {
+        t->sid = -1;
+      }
+#endif
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that we haven't scheduled an inactive task */
+      t->ti_run = e->ti_current;
+      /* Store the task that will be running (for debugging only) */
+      r->t = t;
+#endif
+
+      /* Different types of tasks... */
+      switch (t->type) {
+        case task_type_self:
+          if (t->subtype == task_subtype_density)
+            runner_doself1_branch_density(r, ci);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_doself1_branch_gradient(r, ci);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_doself2_branch_force(r, ci);
+          else if (t->subtype == task_subtype_limiter)
+            runner_doself2_branch_limiter(r, ci);
+          else if (t->subtype == task_subtype_grav)
+            runner_doself_recursive_grav(r, ci, 1);
+          else if (t->subtype == task_subtype_external_grav)
+            runner_do_grav_external(r, ci, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_doself_branch_stars_density(r, ci);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_doself_branch_stars_feedback(r, ci);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_doself_branch_bh_density(r, ci);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_doself_branch_bh_swallow(r, ci);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_doself_branch_bh_feedback(r, ci);
+          else
+            error("Unknown/invalid task subtype (%s).",
+                  subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_pair:
+          if (t->subtype == task_subtype_density)
+            runner_dopair1_branch_density(r, ci, cj);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dopair1_branch_gradient(r, ci, cj);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_dopair2_branch_force(r, ci, cj);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dopair2_branch_limiter(r, ci, cj);
+          else if (t->subtype == task_subtype_grav)
+            runner_dopair_recursive_grav(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_dopair_branch_stars_density(r, ci, cj);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_dopair_branch_stars_feedback(r, ci, cj);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_dopair_branch_bh_density(r, ci, cj);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_dopair_branch_bh_swallow(r, ci, cj);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_dopair_branch_bh_feedback(r, ci, cj);
+          else
+            error("Unknown/invalid task subtype (%s/%s).",
+                  taskID_names[t->type], subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_sub_self:
+          if (t->subtype == task_subtype_density)
+            runner_dosub_self1_density(r, ci, 1);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dosub_self1_gradient(r, ci, 1);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_dosub_self2_force(r, ci, 1);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dosub_self2_limiter(r, ci, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_dosub_self_stars_density(r, ci, 1);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_dosub_self_stars_feedback(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_dosub_self_bh_density(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_dosub_self_bh_swallow(r, ci, 1);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_dosub_self_bh_feedback(r, ci, 1);
+          else
+            error("Unknown/invalid task subtype (%s/%s).",
+                  taskID_names[t->type], subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_sub_pair:
+          if (t->subtype == task_subtype_density)
+            runner_dosub_pair1_density(r, ci, cj, 1);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dosub_pair1_gradient(r, ci, cj, 1);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_dosub_pair2_force(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dosub_pair2_limiter(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_dosub_pair_stars_density(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_dosub_pair_stars_feedback(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_dosub_pair_bh_density(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_dosub_pair_bh_swallow(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_dosub_pair_bh_feedback(r, ci, cj, 1);
+          else
+            error("Unknown/invalid task subtype (%s/%s).",
+                  taskID_names[t->type], subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_sort:
+          /* Cleanup only if any of the indices went stale. */
+          runner_do_hydro_sort(
+              r, ci, t->flags,
+              ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
+          /* Reset the sort flags as our work here is done. */
+          t->flags = 0;
+          break;
+        case task_type_stars_sort:
+          /* Cleanup only if any of the indices went stale. */
+          runner_do_stars_sort(
+              r, ci, t->flags,
+              ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
+          /* Reset the sort flags as our work here is done. */
+          t->flags = 0;
+          break;
+        case task_type_init_grav:
+          runner_do_init_grav(r, ci, 1);
+          break;
+        case task_type_ghost:
+          runner_do_ghost(r, ci, 1);
+          break;
+#ifdef EXTRA_HYDRO_LOOP
+        case task_type_extra_ghost:
+          runner_do_extra_ghost(r, ci, 1);
+          break;
+#endif
+        case task_type_stars_ghost:
+          runner_do_stars_ghost(r, ci, 1);
+          break;
+        case task_type_bh_density_ghost:
+          runner_do_black_holes_density_ghost(r, ci, 1);
+          break;
+        case task_type_bh_swallow_ghost3:
+          runner_do_black_holes_swallow_ghost(r, ci, 1);
+          break;
+        case task_type_drift_part:
+          runner_do_drift_part(r, ci, 1);
+          break;
+        case task_type_drift_spart:
+          runner_do_drift_spart(r, ci, 1);
+          break;
+        case task_type_drift_bpart:
+          runner_do_drift_bpart(r, ci, 1);
+          break;
+        case task_type_drift_gpart:
+          runner_do_drift_gpart(r, ci, 1);
+          break;
+        case task_type_kick1:
+          runner_do_kick1(r, ci, 1);
+          break;
+        case task_type_kick2:
+          runner_do_kick2(r, ci, 1);
+          break;
+        case task_type_end_hydro_force:
+          runner_do_end_hydro_force(r, ci, 1);
+          break;
+        case task_type_end_grav_force:
+          runner_do_end_grav_force(r, ci, 1);
+          break;
+        case task_type_logger:
+          runner_do_logger(r, ci, 1);
+          break;
+        case task_type_timestep:
+          runner_do_timestep(r, ci, 1);
+          break;
+        case task_type_timestep_limiter:
+          runner_do_limiter(r, ci, 0, 1);
+          break;
+#ifdef WITH_MPI
+        case task_type_send:
+          if (t->subtype == task_subtype_tend_part) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_gpart) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_spart) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_bpart) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_sf_counts) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_part_swallow) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_bpart_merger) {
+            free(t->buff);
+          }
+          break;
+        case task_type_recv:
+          if (t->subtype == task_subtype_tend_part) {
+            cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_gpart) {
+            cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_spart) {
+            cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_bpart) {
+            cell_unpack_end_step_black_holes(
+                ci, (struct pcell_step_black_holes *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_sf_counts) {
+            cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff);
+            cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_xv) {
+            runner_do_recv_part(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_rho) {
+            runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_gradient) {
+            runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_part_swallow) {
+            cell_unpack_part_swallow(ci,
+                                     (struct black_holes_part_data *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_bpart_merger) {
+            cell_unpack_bpart_swallow(ci,
+                                      (struct black_holes_bpart_data *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_limiter) {
+            runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_gpart) {
+            runner_do_recv_gpart(r, ci, 1);
+          } else if (t->subtype == task_subtype_spart) {
+            runner_do_recv_spart(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_bpart_rho) {
+            runner_do_recv_bpart(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_bpart_swallow) {
+            runner_do_recv_bpart(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_bpart_feedback) {
+            runner_do_recv_bpart(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_multipole) {
+            cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff);
+            free(t->buff);
+          } else {
+            error("Unknown/invalid task subtype (%d).", t->subtype);
+          }
+          break;
+#endif
+        case task_type_grav_down:
+          runner_do_grav_down(r, t->ci, 1);
+          break;
+        case task_type_grav_mesh:
+          runner_do_grav_mesh(r, t->ci, 1);
+          break;
+        case task_type_grav_long_range:
+          runner_do_grav_long_range(r, t->ci, 1);
+          break;
+        case task_type_grav_mm:
+          runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj);
+          break;
+        case task_type_cooling:
+          runner_do_cooling(r, t->ci, 1);
+          break;
+        case task_type_star_formation:
+          runner_do_star_formation(r, t->ci, 1);
+          break;
+        case task_type_stars_resort:
+          runner_do_stars_resort(r, t->ci, 1);
+          break;
+        case task_type_fof_self:
+          runner_do_fof_self(r, t->ci, 1);
+          break;
+        case task_type_fof_pair:
+          runner_do_fof_pair(r, t->ci, t->cj, 1);
+          break;
+        default:
+          error("Unknown/invalid task type (%d).", t->type);
+      }
+
+/* Mark that we have run this task on these cells */
+#ifdef SWIFT_DEBUG_CHECKS
+      if (ci != NULL) {
+        ci->tasks_executed[t->type]++;
+        ci->subtasks_executed[t->subtype]++;
+      }
+      if (cj != NULL) {
+        cj->tasks_executed[t->type]++;
+        cj->subtasks_executed[t->subtype]++;
+      }
+
+      /* This runner is not doing a task anymore */
+      r->t = NULL;
+#endif
+
+      /* We're done with this task, see if we get a next one. */
+      prev = t;
+      t = scheduler_done(sched, t);
+
+    } /* main loop. */
+  }
+
+  /* Be kind, rewind. */
+  return NULL;
+}
diff --git a/src/runner_others.c b/src/runner_others.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ffaf7aa321f658b6e0e7e10a9cb8ad2f4a5a541
--- /dev/null
+++ b/src/runner_others.c
@@ -0,0 +1,660 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *               2016 John A. Regan (john.a.regan@durham.ac.uk)
+ *                    Tom Theuns (tom.theuns@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "chemistry.h"
+#include "cooling.h"
+#include "engine.h"
+#include "error.h"
+#include "gravity.h"
+#include "hydro.h"
+#include "logger.h"
+#include "pressure_floor.h"
+#include "space.h"
+#include "star_formation.h"
+#include "star_formation_logger.h"
+#include "stars.h"
+#include "timers.h"
+#include "tracers.h"
+
+/**
+ * @brief Calculate gravity acceleration from external potential
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
+
+  struct gpart *restrict gparts = c->grav.parts;
+  const int gcount = c->grav.count;
+  const struct engine *e = r->e;
+  const struct external_potential *potential = e->external_potential;
+  const struct phys_const *constants = e->physical_constants;
+  const double time = r->e->time;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the gparts in this cell. */
+    for (int i = 0; i < gcount; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct gpart *restrict gp = &gparts[i];
+
+      /* Is this part within the time step? */
+      if (gpart_is_active(gp, e)) {
+        external_gravity_acceleration(time, potential, constants, gp);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_dograv_external);
+}
+
+/**
+ * @brief Calculate gravity accelerations from the periodic mesh
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) {
+
+  struct gpart *restrict gparts = c->grav.parts;
+  const int gcount = c->grav.count;
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic) error("Calling mesh forces in non-periodic mode.");
+#endif
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0);
+  } else {
+
+    /* Get the forces from the gravity mesh */
+    pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount);
+  }
+
+  if (timer) TIMER_TOC(timer_dograv_mesh);
+}
+
+/**
+ * @brief Calculate change in thermal state of particles induced
+ * by radiative cooling and heating.
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const struct cooling_function_data *cooling_func = e->cooling_func;
+  const struct phys_const *constants = e->physical_constants;
+  const struct unit_system *us = e->internal_units;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor;
+  const double time_base = e->time_base;
+  const integertime_t ti_current = e->ti_current;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int count = c->hydro.count;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct part *restrict p = &parts[i];
+      struct xpart *restrict xp = &xparts[i];
+
+      if (part_is_active(p, e)) {
+
+        double dt_cool, dt_therm;
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(p->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(ti_current - 1, p->time_bin);
+
+          dt_cool =
+              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+          dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin,
+                                                     ti_begin + ti_step);
+
+        } else {
+          dt_cool = get_timestep(p->time_bin, time_base);
+          dt_therm = get_timestep(p->time_bin, time_base);
+        }
+
+        /* Let's cool ! */
+        cooling_cool_part(constants, us, cosmo, hydro_props,
+                          entropy_floor_props, cooling_func, p, xp, dt_cool,
+                          dt_therm);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_cooling);
+}
+
+/**
+ *
+ */
+void runner_do_star_formation(struct runner *r, struct cell *c, int timer) {
+
+  struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct star_formation *sf_props = e->star_formation;
+  const struct phys_const *phys_const = e->physical_constants;
+  const int count = c->hydro.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const int with_feedback = (e->policy & engine_policy_feedback);
+  const struct hydro_props *restrict hydro_props = e->hydro_properties;
+  const struct unit_system *restrict us = e->internal_units;
+  struct cooling_function_data *restrict cooling = e->cooling_func;
+  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
+  const double time_base = e->time_base;
+  const integertime_t ti_current = e->ti_current;
+  const int current_stars_count = c->stars.count;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != e->nodeID)
+    error("Running star formation task on a foreign node!");
+#endif
+
+  /* Anything to do here? */
+  if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) {
+    star_formation_logger_log_inactive_cell(&c->stars.sfh);
+    return;
+  }
+
+  /* Reset the SFR */
+  star_formation_logger_init(&c->stars.sfh);
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) {
+        /* Load the child cell */
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Do the recursion */
+        runner_do_star_formation(r, cp, 0);
+
+        /* Update current cell using child cells */
+        star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh);
+      }
+  } else {
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* Only work on active particles */
+      if (part_is_active(p, e)) {
+
+        /* Is this particle star forming? */
+        if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo,
+                                           hydro_props, us, cooling,
+                                           entropy_floor)) {
+
+          /* Time-step size for this particle */
+          double dt_star;
+          if (with_cosmology) {
+            const integertime_t ti_step = get_integer_timestep(p->time_bin);
+            const integertime_t ti_begin =
+                get_integer_time_begin(ti_current - 1, p->time_bin);
+
+            dt_star =
+                cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+
+          } else {
+            dt_star = get_timestep(p->time_bin, time_base);
+          }
+
+          /* Compute the SF rate of the particle */
+          star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo,
+                                     dt_star);
+
+          /* Add the SFR and SFR*dt to the SFH struct of this cell */
+          star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star);
+
+          /* Are we forming a star particle from this SF rate? */
+          if (star_formation_should_convert_to_star(p, xp, sf_props, e,
+                                                    dt_star)) {
+
+            /* Convert the gas particle to a star particle */
+            struct spart *sp = cell_convert_part_to_spart(e, c, p, xp);
+
+            /* Did we get a star? (Or did we run out of spare ones?) */
+            if (sp != NULL) {
+
+              /* message("We formed a star id=%lld cellID=%d", sp->id,
+               * c->cellID); */
+
+              /* Copy the properties of the gas particle to the star particle */
+              star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo,
+                                             with_cosmology, phys_const,
+                                             hydro_props, us, cooling);
+
+              /* Update the Star formation history */
+              star_formation_logger_log_new_spart(sp, &c->stars.sfh);
+            }
+          }
+
+        } else { /* Are we not star-forming? */
+
+          /* Update the particle to flag it as not star-forming */
+          star_formation_update_part_not_SFR(p, xp, e, sf_props,
+                                             with_cosmology);
+
+        } /* Not Star-forming? */
+
+      } else { /* is active? */
+
+        /* Check if the particle is not inhibited */
+        if (!part_is_inhibited(p, e)) {
+          star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh);
+        }
+      }
+    } /* Loop over particles */
+  }
+
+  /* If we formed any stars, the star sorts are now invalid. We need to
+   * re-compute them. */
+  if (with_feedback && (c == c->top) &&
+      (current_stars_count != c->stars.count)) {
+    cell_set_star_resort_flag(c);
+  }
+
+  if (timer) TIMER_TOC(timer_do_star_formation);
+}
+
+/**
+ * @brief End the hydro force calculation of all active particles in a cell
+ * by multiplying the acccelerations by the relevant constants
+ *
+ * @param r The #runner thread.
+ * @param c The #cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0);
+  } else {
+
+    const struct cosmology *cosmo = e->cosmology;
+    const int count = c->hydro.count;
+    struct part *restrict parts = c->hydro.parts;
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+
+      if (part_is_active(p, e)) {
+
+        /* Finish the force loop */
+        hydro_end_force(p, cosmo);
+        chemistry_end_force(p, cosmo);
+
+#ifdef SWIFT_BOUNDARY_PARTICLES
+
+        /* Get the ID of the part */
+        const long long id = p->id;
+
+        /* Cancel hdyro forces of these particles */
+        if (id < SWIFT_BOUNDARY_PARTICLES) {
+
+          /* Don't move ! */
+          hydro_reset_acceleration(p);
+
+#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH)
+
+          /* Some values need to be reset in the Gizmo case. */
+          hydro_prepare_force(p, &c->hydro.xparts[k], cosmo,
+                              e->hydro_properties, 0);
+#endif
+        }
+#endif
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_end_hydro_force);
+}
+
+/**
+ * @brief End the gravity force calculation of all active particles in a cell
+ * by multiplying the acccelerations by the relevant constants
+ *
+ * @param r The #runner thread.
+ * @param c The #cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0);
+  } else {
+
+    const struct space *s = e->s;
+    const int periodic = s->periodic;
+    const float G_newton = e->physical_constants->const_newton_G;
+
+    /* Potential normalisation in the case of periodic gravity */
+    float potential_normalisation = 0.;
+    if (periodic && (e->policy & engine_policy_self_gravity)) {
+      const double volume = s->dim[0] * s->dim[1] * s->dim[2];
+      const double r_s = e->mesh->r_s;
+      potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume;
+    }
+
+    const int gcount = c->grav.count;
+    struct gpart *restrict gparts = c->grav.parts;
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *restrict gp = &gparts[k];
+
+      if (gpart_is_active(gp, e)) {
+
+        /* Finish the force calculation */
+        gravity_end_force(gp, G_newton, potential_normalisation, periodic);
+
+#ifdef SWIFT_MAKE_GRAVITY_GLASS
+
+        /* Negate the gravity forces */
+        gp->a_grav[0] *= -1.f;
+        gp->a_grav[1] *= -1.f;
+        gp->a_grav[2] *= -1.f;
+#endif
+
+#ifdef SWIFT_NO_GRAVITY_BELOW_ID
+
+        /* Get the ID of the gpart */
+        long long id = 0;
+        if (gp->type == swift_type_gas)
+          id = e->s->parts[-gp->id_or_neg_offset].id;
+        else if (gp->type == swift_type_stars)
+          id = e->s->sparts[-gp->id_or_neg_offset].id;
+        else if (gp->type == swift_type_black_hole)
+          error("Unexisting type");
+        else
+          id = gp->id_or_neg_offset;
+
+        /* Cancel gravity forces of these particles */
+        if (id < SWIFT_NO_GRAVITY_BELOW_ID) {
+
+          /* Don't move ! */
+          gp->a_grav[0] = 0.f;
+          gp->a_grav[1] = 0.f;
+          gp->a_grav[2] = 0.f;
+        }
+#endif
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if ((e->policy & engine_policy_self_gravity) &&
+            !(e->policy & engine_policy_black_holes)) {
+
+          /* Let's add a self interaction to simplify the count */
+          gp->num_interacted++;
+
+          /* Check that this gpart has interacted with all the other
+           * particles (via direct or multipoles) in the box */
+          if (gp->num_interacted !=
+              e->total_nr_gparts - e->count_inhibited_gparts) {
+
+            /* Get the ID of the gpart */
+            long long my_id = 0;
+            if (gp->type == swift_type_gas)
+              my_id = e->s->parts[-gp->id_or_neg_offset].id;
+            else if (gp->type == swift_type_stars)
+              my_id = e->s->sparts[-gp->id_or_neg_offset].id;
+            else if (gp->type == swift_type_black_hole)
+              error("Unexisting type");
+            else
+              my_id = gp->id_or_neg_offset;
+
+            error(
+                "g-particle (id=%lld, type=%s) did not interact "
+                "gravitationally with all other gparts "
+                "gp->num_interacted=%lld, total_gparts=%lld (local "
+                "num_gparts=%zd inhibited_gparts=%lld)",
+                my_id, part_type_names[gp->type], gp->num_interacted,
+                e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts);
+          }
+        }
+#endif
+      }
+    }
+  }
+  if (timer) TIMER_TOC(timer_end_grav_force);
+}
+
+/**
+ * @brief Write the required particles through the logger.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_logger(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_LOGGER
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int count = c->hydro.count;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? Avoid spending too much time in useless cells. */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be log */
+      /* This is the same function than part_is_active, except for
+       * debugging checks */
+      if (part_is_active(p, e)) {
+
+        if (logger_should_write(&xp->logger_data, e->logger)) {
+          /* Write particle */
+          /* Currently writing everything, should adapt it through time */
+          logger_log_part(e->logger, p,
+                          logger_mask_data[logger_x].mask |
+                              logger_mask_data[logger_v].mask |
+                              logger_mask_data[logger_a].mask |
+                              logger_mask_data[logger_u].mask |
+                              logger_mask_data[logger_h].mask |
+                              logger_mask_data[logger_rho].mask |
+                              logger_mask_data[logger_consts].mask,
+                          &xp->logger_data.last_offset);
+
+          /* Set counter back to zero */
+          xp->logger_data.steps_since_last_output = 0;
+        } else
+          /* Update counter */
+          xp->logger_data.steps_since_last_output += 1;
+      }
+    }
+  }
+
+  if (c->grav.count > 0) error("gparts not implemented");
+
+  if (c->stars.count > 0) error("sparts not implemented");
+
+  if (timer) TIMER_TOC(timer_logger);
+
+#else
+  error("Logger disabled, please enable it during configuration");
+#endif
+}
+
+/**
+ * @brief Recursively search for FOF groups in a single cell.
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_fof_self(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_FOF
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int periodic = s->periodic;
+  const struct gpart *const gparts = s->gparts;
+  const double search_r2 = e->fof_properties->l_x2;
+
+  rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c);
+
+  if (timer) TIMER_TOC(timer_fof_self);
+
+#else
+  error("SWIFT was not compiled with FOF enabled!");
+#endif
+}
+
+/**
+ * @brief Recursively search for FOF groups between a pair of cells.
+ *
+ * @param r runner task
+ * @param ci cell i
+ * @param cj cell j
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
+                        int timer) {
+
+#ifdef WITH_FOF
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int periodic = s->periodic;
+  const struct gpart *const gparts = s->gparts;
+  const double search_r2 = e->fof_properties->l_x2;
+
+  rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci,
+                      cj);
+
+  if (timer) TIMER_TOC(timer_fof_pair);
+#else
+  error("SWIFT was not compiled with FOF enabled!");
+#endif
+}
diff --git a/src/runner_recv.c b/src/runner_recv.c
new file mode 100644
index 0000000000000000000000000000000000000000..803e68c2106933684109e798e24952a0dbdfea6e
--- /dev/null
+++ b/src/runner_recv.c
@@ -0,0 +1,368 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Construct the cell properties from the received #part.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
+                         int timer) {
+#ifdef WITH_MPI
+
+  const struct part *restrict parts = c->hydro.parts;
+  const size_t nr_parts = c->hydro.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_hydro_end_min = max_nr_timesteps;
+  integertime_t ti_hydro_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+  float h_max = 0.f;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* Clear this cell's sorted mask. */
+  if (clear_sorts) c->hydro.sorted = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_parts; k++) {
+      if (parts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, parts[k].time_bin);
+      time_bin_max = max(time_bin_max, parts[k].time_bin);
+      h_max = max(h_max, parts[k].h);
+    }
+
+    /* Convert into a time */
+    ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
+        runner_do_recv_part(r, c->progeny[k], clear_sorts, 0);
+        ti_hydro_end_min =
+            min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min);
+        ti_hydro_end_max =
+            max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max);
+        h_max = max(h_max, c->progeny[k]->hydro.h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_hydro_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_hydro_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->hydro.ti_end_min = ti_hydro_end_min;
+  // c->hydro.ti_end_max = ti_hydro_end_max;
+  c->hydro.ti_old_part = ti_current;
+  c->hydro.h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_part);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #gpart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
+
+  const struct gpart *restrict gparts = c->grav.parts;
+  const size_t nr_gparts = c->grav.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_gravity_end_min = max_nr_timesteps;
+  integertime_t ti_gravity_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_gparts; k++) {
+      if (gparts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, gparts[k].time_bin);
+      time_bin_max = max(time_bin_max, gparts[k].time_bin);
+    }
+
+    /* Convert into a time */
+    ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) {
+        runner_do_recv_gpart(r, c->progeny[k], 0);
+        ti_gravity_end_min =
+            min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min);
+        ti_gravity_end_max =
+            max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_gravity_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_gravity_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->grav.ti_end_min = ti_gravity_end_min;
+  // c->grav.ti_end_max = ti_gravity_end_max;
+  c->grav.ti_old_part = ti_current;
+
+  if (timer) TIMER_TOC(timer_dorecv_gpart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #spart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer) {
+
+#ifdef WITH_MPI
+
+  struct spart *restrict sparts = c->stars.parts;
+  const size_t nr_sparts = c->stars.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_stars_end_min = max_nr_timesteps;
+  integertime_t ti_stars_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+  float h_max = 0.f;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* Clear this cell's sorted mask. */
+  if (clear_sorts) c->stars.sorted = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_sparts; k++) {
+#ifdef DEBUG_INTERACTIONS_STARS
+      sparts[k].num_ngb_force = 0;
+#endif
+      if (sparts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, sparts[k].time_bin);
+      time_bin_max = max(time_bin_max, sparts[k].time_bin);
+      h_max = max(h_max, sparts[k].h);
+    }
+
+    /* Convert into a time */
+    ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
+        runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0);
+        ti_stars_end_min =
+            min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min);
+        ti_stars_end_max =
+            max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max);
+        h_max = max(h_max, c->progeny[k]->stars.h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_stars_end_min < ti_current &&
+      !(r->e->policy & engine_policy_star_formation))
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_stars_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->grav.ti_end_min = ti_gravity_end_min;
+  // c->grav.ti_end_max = ti_gravity_end_max;
+  c->stars.ti_old_part = ti_current;
+  c->stars.h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_spart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #bpart.
+ *
+ * Note that we do not need to clear the sorts since we do not sort
+ * the black holes.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer) {
+
+#ifdef WITH_MPI
+
+  struct bpart *restrict bparts = c->black_holes.parts;
+  const size_t nr_bparts = c->black_holes.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_black_holes_end_min = max_nr_timesteps;
+  integertime_t ti_black_holes_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+  float h_max = 0.f;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_bparts; k++) {
+#ifdef DEBUG_INTERACTIONS_BLACK_HOLES
+      bparts[k].num_ngb_force = 0;
+#endif
+
+      /* message("Receiving bparts id=%lld time_bin=%d", */
+      /* 	      bparts[k].id, bparts[k].time_bin); */
+
+      if (bparts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, bparts[k].time_bin);
+      time_bin_max = max(time_bin_max, bparts[k].time_bin);
+      h_max = max(h_max, bparts[k].h);
+    }
+
+    /* Convert into a time */
+    ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) {
+        runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0);
+        ti_black_holes_end_min =
+            min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min);
+        ti_black_holes_end_max =
+            max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max);
+        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_black_holes_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_black_holes_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->grav.ti_end_min = ti_gravity_end_min;
+  // c->grav.ti_end_max = ti_gravity_end_max;
+  c->black_holes.ti_old_part = ti_current;
+  c->black_holes.h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_bpart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
diff --git a/src/runner_sort.c b/src/runner_sort.c
new file mode 100644
index 0000000000000000000000000000000000000000..914b64f93b970000885b1b578d762d3f15455332
--- /dev/null
+++ b/src/runner_sort.c
@@ -0,0 +1,708 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Sorts again all the stars in a given cell hierarchy.
+ *
+ * This is intended to be used after the star formation task has been run
+ * to get the cells back into a state where self/pair star tasks can be run.
+ *
+ * @param r The thread #runner.
+ * @param c The top-level cell to run on.
+ * @param timer Are we timing this?
+ */
+void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != r->e->nodeID) error("Task must be run locally!");
+#endif
+
+  TIMER_TIC;
+
+  /* Did we demand a recalculation of the stars'sorts? */
+  if (cell_get_flag(c, cell_flag_do_stars_resort)) {
+    runner_do_all_stars_sort(r, c);
+    cell_clear_flag(c, cell_flag_do_stars_resort);
+  }
+
+  if (timer) TIMER_TOC(timer_do_stars_resort);
+}
+
+/**
+ * @brief Sort the entries in ascending order using QuickSort.
+ *
+ * @param sort The entries
+ * @param N The number of entries.
+ */
+void runner_do_sort_ascending(struct sort_entry *sort, int N) {
+
+  struct {
+    short int lo, hi;
+  } qstack[10];
+  int qpos, i, j, lo, hi, imin;
+  struct sort_entry temp;
+  float pivot;
+
+  /* Sort parts in cell_i in decreasing order with quicksort */
+  qstack[0].lo = 0;
+  qstack[0].hi = N - 1;
+  qpos = 0;
+  while (qpos >= 0) {
+    lo = qstack[qpos].lo;
+    hi = qstack[qpos].hi;
+    qpos -= 1;
+    if (hi - lo < 15) {
+      for (i = lo; i < hi; i++) {
+        imin = i;
+        for (j = i + 1; j <= hi; j++)
+          if (sort[j].d < sort[imin].d) imin = j;
+        if (imin != i) {
+          temp = sort[imin];
+          sort[imin] = sort[i];
+          sort[i] = temp;
+        }
+      }
+    } else {
+      pivot = sort[(lo + hi) / 2].d;
+      i = lo;
+      j = hi;
+      while (i <= j) {
+        while (sort[i].d < pivot) i++;
+        while (sort[j].d > pivot) j--;
+        if (i <= j) {
+          if (i < j) {
+            temp = sort[i];
+            sort[i] = sort[j];
+            sort[j] = temp;
+          }
+          i += 1;
+          j -= 1;
+        }
+      }
+      if (j > (lo + hi) / 2) {
+        if (lo < j) {
+          qpos += 1;
+          qstack[qpos].lo = lo;
+          qstack[qpos].hi = j;
+        }
+        if (i < hi) {
+          qpos += 1;
+          qstack[qpos].lo = i;
+          qstack[qpos].hi = hi;
+        }
+      } else {
+        if (i < hi) {
+          qpos += 1;
+          qstack[qpos].lo = i;
+          qstack[qpos].hi = hi;
+        }
+        if (lo < j) {
+          qpos += 1;
+          qstack[qpos].lo = lo;
+          qstack[qpos].hi = j;
+        }
+      }
+    }
+  }
+}
+
+#ifdef SWIFT_DEBUG_CHECKS
+/**
+ * @brief Recursively checks that the flags are consistent in a cell hierarchy.
+ *
+ * Debugging function. Exists in two flavours: hydro & stars.
+ */
+#define RUNNER_CHECK_SORTS(TYPE)                                               \
+  void runner_check_sorts_##TYPE(struct cell *c, int flags) {                  \
+                                                                               \
+    if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \
+    if (c->split)                                                              \
+      for (int k = 0; k < 8; k++)                                              \
+        if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0)            \
+          runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted);            \
+  }
+#else
+#define RUNNER_CHECK_SORTS(TYPE)                                       \
+  void runner_check_sorts_##TYPE(struct cell *c, int flags) {          \
+    error("Calling debugging code without debugging flag activated."); \
+  }
+#endif
+
+RUNNER_CHECK_SORTS(hydro)
+RUNNER_CHECK_SORTS(stars)
+
+/**
+ * @brief Sort the particles in the given cell along all cardinal directions.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ * @param flags Cell flag.
+ * @param cleanup If true, re-build the sorts for the selected flags instead
+ *        of just adding them.
+ * @param clock Flag indicating whether to record the timing or not, needed
+ *      for recursive calls.
+ */
+void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags,
+                          int cleanup, int clock) {
+
+  struct sort_entry *fingers[8];
+  const int count = c->hydro.count;
+  const struct part *parts = c->hydro.parts;
+  struct xpart *xparts = c->hydro.xparts;
+  float buff[8];
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->hydro.super == NULL) error("Task called above the super level!!!");
+#endif
+
+  /* We need to do the local sorts plus whatever was requested further up. */
+  flags |= c->hydro.do_sort;
+  if (cleanup) {
+    c->hydro.sorted = 0;
+  } else {
+    flags &= ~c->hydro.sorted;
+  }
+  if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return;
+
+  /* Check that the particles have been moved to the current time */
+  if (flags && !cell_are_part_drifted(c, r->e))
+    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_hydro(c, c->hydro.sorted);
+
+  /* Make sure the sort flags are consistent (upard). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->hydro.sorted & ~c->hydro.sorted)
+      error("Inconsistent sort flags (upward).");
+  }
+
+  /* Update the sort timer which represents the last time the sorts
+     were re-set. */
+  if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current;
+#endif
+
+  /* Allocate memory for sorting. */
+  cell_malloc_hydro_sorts(c, flags);
+
+  /* Does this cell have any progeny? */
+  if (c->split) {
+
+    /* Fill in the gaps within the progeny. */
+    float dx_max_sort = 0.0f;
+    float dx_max_sort_old = 0.0f;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+
+        if (c->progeny[k]->hydro.count > 0) {
+
+          /* Only propagate cleanup if the progeny is stale. */
+          runner_do_hydro_sort(
+              r, c->progeny[k], flags,
+              cleanup && (c->progeny[k]->hydro.dx_max_sort_old >
+                          space_maxreldx * c->progeny[k]->dmin),
+              0);
+          dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort);
+          dx_max_sort_old =
+              max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old);
+        } else {
+
+          /* We need to clean up the unused flags that were in case the
+             number of particles in the cell would change */
+          cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
+        }
+      }
+    }
+    c->hydro.dx_max_sort = dx_max_sort;
+    c->hydro.dx_max_sort_old = dx_max_sort_old;
+
+    /* Loop over the 13 different sort arrays. */
+    for (int j = 0; j < 13; j++) {
+
+      /* Has this sort array been flagged? */
+      if (!(flags & (1 << j))) continue;
+
+      /* Init the particle index offsets. */
+      int off[8];
+      off[0] = 0;
+      for (int k = 1; k < 8; k++)
+        if (c->progeny[k - 1] != NULL)
+          off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count;
+        else
+          off[k] = off[k - 1];
+
+      /* Init the entries and indices. */
+      int inds[8];
+      for (int k = 0; k < 8; k++) {
+        inds[k] = k;
+        if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
+          fingers[k] = c->progeny[k]->hydro.sort[j];
+          buff[k] = fingers[k]->d;
+          off[k] = off[k];
+        } else
+          buff[k] = FLT_MAX;
+      }
+
+      /* Sort the buffer. */
+      for (int i = 0; i < 7; i++)
+        for (int k = i + 1; k < 8; k++)
+          if (buff[inds[k]] < buff[inds[i]]) {
+            int temp_i = inds[i];
+            inds[i] = inds[k];
+            inds[k] = temp_i;
+          }
+
+      /* For each entry in the new sort list. */
+      struct sort_entry *finger = c->hydro.sort[j];
+      for (int ind = 0; ind < count; ind++) {
+
+        /* Copy the minimum into the new sort array. */
+        finger[ind].d = buff[inds[0]];
+        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
+
+        /* Update the buffer. */
+        fingers[inds[0]] += 1;
+        buff[inds[0]] = fingers[inds[0]]->d;
+
+        /* Find the smallest entry. */
+        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
+          int temp_i = inds[k - 1];
+          inds[k - 1] = inds[k];
+          inds[k] = temp_i;
+        }
+
+      } /* Merge. */
+
+      /* Add a sentinel. */
+      c->hydro.sort[j][count].d = FLT_MAX;
+      c->hydro.sort[j][count].i = 0;
+
+      /* Mark as sorted. */
+      atomic_or(&c->hydro.sorted, 1 << j);
+
+    } /* loop over sort arrays. */
+
+  } /* progeny? */
+
+  /* Otherwise, just sort. */
+  else {
+
+    /* Reset the sort distance */
+    if (c->hydro.sorted == 0) {
+#ifdef SWIFT_DEBUG_CHECKS
+      if (xparts != NULL && c->nodeID != engine_rank)
+        error("Have non-NULL xparts in foreign cell");
+#endif
+
+      /* And the individual sort distances if we are a local cell */
+      if (xparts != NULL) {
+        for (int k = 0; k < count; k++) {
+          xparts[k].x_diff_sort[0] = 0.0f;
+          xparts[k].x_diff_sort[1] = 0.0f;
+          xparts[k].x_diff_sort[2] = 0.0f;
+        }
+      }
+      c->hydro.dx_max_sort_old = 0.f;
+      c->hydro.dx_max_sort = 0.f;
+    }
+
+    /* Fill the sort array. */
+    for (int k = 0; k < count; k++) {
+      const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]};
+      for (int j = 0; j < 13; j++)
+        if (flags & (1 << j)) {
+          c->hydro.sort[j][k].i = k;
+          c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] +
+                                  px[1] * runner_shift[j][1] +
+                                  px[2] * runner_shift[j][2];
+        }
+    }
+
+    /* Add the sentinel and sort. */
+    for (int j = 0; j < 13; j++)
+      if (flags & (1 << j)) {
+        c->hydro.sort[j][count].d = FLT_MAX;
+        c->hydro.sort[j][count].i = 0;
+        runner_do_sort_ascending(c->hydro.sort[j], count);
+        atomic_or(&c->hydro.sorted, 1 << j);
+      }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify the sorting. */
+  for (int j = 0; j < 13; j++) {
+    if (!(flags & (1 << j))) continue;
+    struct sort_entry *finger = c->hydro.sort[j];
+    for (int k = 1; k < count; k++) {
+      if (finger[k].d < finger[k - 1].d)
+        error("Sorting failed, ascending array.");
+      if (finger[k].i >= count) error("Sorting failed, indices borked.");
+    }
+  }
+
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_hydro(c, flags);
+
+  /* Make sure the sort flags are consistent (upward). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->hydro.sorted & ~c->hydro.sorted)
+      error("Inconsistent sort flags.");
+  }
+#endif
+
+  /* Clear the cell's sort flags. */
+  c->hydro.do_sort = 0;
+  cell_clear_flag(c, cell_flag_do_hydro_sub_sort);
+  c->hydro.requires_sorts = 0;
+
+  if (clock) TIMER_TOC(timer_dosort);
+}
+
+/**
+ * @brief Sort the stars particles in the given cell along all cardinal
+ * directions.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ * @param flags Cell flag.
+ * @param cleanup If true, re-build the sorts for the selected flags instead
+ *        of just adding them.
+ * @param clock Flag indicating whether to record the timing or not, needed
+ *      for recursive calls.
+ */
+void runner_do_stars_sort(struct runner *r, struct cell *c, int flags,
+                          int cleanup, int clock) {
+
+  struct sort_entry *fingers[8];
+  const int count = c->stars.count;
+  struct spart *sparts = c->stars.parts;
+  float buff[8];
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->hydro.super == NULL) error("Task called above the super level!!!");
+#endif
+
+  /* We need to do the local sorts plus whatever was requested further up. */
+  flags |= c->stars.do_sort;
+  if (cleanup) {
+    c->stars.sorted = 0;
+  } else {
+    flags &= ~c->stars.sorted;
+  }
+  if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return;
+
+  /* Check that the particles have been moved to the current time */
+  if (flags && !cell_are_spart_drifted(c, r->e)) {
+    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_stars(c, c->stars.sorted);
+
+  /* Make sure the sort flags are consistent (upward). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->stars.sorted & ~c->stars.sorted)
+      error("Inconsistent sort flags (upward).");
+  }
+
+  /* Update the sort timer which represents the last time the sorts
+     were re-set. */
+  if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current;
+#endif
+
+  /* start by allocating the entry arrays in the requested dimensions. */
+  cell_malloc_stars_sorts(c, flags);
+
+  /* Does this cell have any progeny? */
+  if (c->split) {
+
+    /* Fill in the gaps within the progeny. */
+    float dx_max_sort = 0.0f;
+    float dx_max_sort_old = 0.0f;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+
+        if (c->progeny[k]->stars.count > 0) {
+
+          /* Only propagate cleanup if the progeny is stale. */
+          const int cleanup_prog =
+              cleanup && (c->progeny[k]->stars.dx_max_sort_old >
+                          space_maxreldx * c->progeny[k]->dmin);
+          runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0);
+          dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort);
+          dx_max_sort_old =
+              max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old);
+        } else {
+
+          /* We need to clean up the unused flags that were in case the
+             number of particles in the cell would change */
+          cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
+        }
+      }
+    }
+    c->stars.dx_max_sort = dx_max_sort;
+    c->stars.dx_max_sort_old = dx_max_sort_old;
+
+    /* Loop over the 13 different sort arrays. */
+    for (int j = 0; j < 13; j++) {
+
+      /* Has this sort array been flagged? */
+      if (!(flags & (1 << j))) continue;
+
+      /* Init the particle index offsets. */
+      int off[8];
+      off[0] = 0;
+      for (int k = 1; k < 8; k++)
+        if (c->progeny[k - 1] != NULL)
+          off[k] = off[k - 1] + c->progeny[k - 1]->stars.count;
+        else
+          off[k] = off[k - 1];
+
+      /* Init the entries and indices. */
+      int inds[8];
+      for (int k = 0; k < 8; k++) {
+        inds[k] = k;
+        if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
+          fingers[k] = c->progeny[k]->stars.sort[j];
+          buff[k] = fingers[k]->d;
+          off[k] = off[k];
+        } else
+          buff[k] = FLT_MAX;
+      }
+
+      /* Sort the buffer. */
+      for (int i = 0; i < 7; i++)
+        for (int k = i + 1; k < 8; k++)
+          if (buff[inds[k]] < buff[inds[i]]) {
+            int temp_i = inds[i];
+            inds[i] = inds[k];
+            inds[k] = temp_i;
+          }
+
+      /* For each entry in the new sort list. */
+      struct sort_entry *finger = c->stars.sort[j];
+      for (int ind = 0; ind < count; ind++) {
+
+        /* Copy the minimum into the new sort array. */
+        finger[ind].d = buff[inds[0]];
+        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
+
+        /* Update the buffer. */
+        fingers[inds[0]] += 1;
+        buff[inds[0]] = fingers[inds[0]]->d;
+
+        /* Find the smallest entry. */
+        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
+          int temp_i = inds[k - 1];
+          inds[k - 1] = inds[k];
+          inds[k] = temp_i;
+        }
+
+      } /* Merge. */
+
+      /* Add a sentinel. */
+      c->stars.sort[j][count].d = FLT_MAX;
+      c->stars.sort[j][count].i = 0;
+
+      /* Mark as sorted. */
+      atomic_or(&c->stars.sorted, 1 << j);
+
+    } /* loop over sort arrays. */
+
+  } /* progeny? */
+
+  /* Otherwise, just sort. */
+  else {
+
+    /* Reset the sort distance */
+    if (c->stars.sorted == 0) {
+
+      /* And the individual sort distances if we are a local cell */
+      for (int k = 0; k < count; k++) {
+        sparts[k].x_diff_sort[0] = 0.0f;
+        sparts[k].x_diff_sort[1] = 0.0f;
+        sparts[k].x_diff_sort[2] = 0.0f;
+      }
+      c->stars.dx_max_sort_old = 0.f;
+      c->stars.dx_max_sort = 0.f;
+    }
+
+    /* Fill the sort array. */
+    for (int k = 0; k < count; k++) {
+      const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]};
+      for (int j = 0; j < 13; j++)
+        if (flags & (1 << j)) {
+          c->stars.sort[j][k].i = k;
+          c->stars.sort[j][k].d = px[0] * runner_shift[j][0] +
+                                  px[1] * runner_shift[j][1] +
+                                  px[2] * runner_shift[j][2];
+        }
+    }
+
+    /* Add the sentinel and sort. */
+    for (int j = 0; j < 13; j++)
+      if (flags & (1 << j)) {
+        c->stars.sort[j][count].d = FLT_MAX;
+        c->stars.sort[j][count].i = 0;
+        runner_do_sort_ascending(c->stars.sort[j], count);
+        atomic_or(&c->stars.sorted, 1 << j);
+      }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify the sorting. */
+  for (int j = 0; j < 13; j++) {
+    if (!(flags & (1 << j))) continue;
+    struct sort_entry *finger = c->stars.sort[j];
+    for (int k = 1; k < count; k++) {
+      if (finger[k].d < finger[k - 1].d)
+        error("Sorting failed, ascending array.");
+      if (finger[k].i >= count) error("Sorting failed, indices borked.");
+    }
+  }
+
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_stars(c, flags);
+
+  /* Make sure the sort flags are consistent (upward). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->stars.sorted & ~c->stars.sorted)
+      error("Inconsistent sort flags.");
+  }
+#endif
+
+  /* Clear the cell's sort flags. */
+  c->stars.do_sort = 0;
+  cell_clear_flag(c, cell_flag_do_stars_sub_sort);
+  c->stars.requires_sorts = 0;
+
+  if (clock) TIMER_TOC(timer_do_stars_sort);
+}
+
+/**
+ * @brief Recurse into a cell until reaching the super level and call
+ * the hydro sorting function there.
+ *
+ * This function must be called at or above the super level!
+ *
+ * This function will sort the particles in all 13 directions.
+ *
+ * @param r the #runner.
+ * @param c the #cell.
+ */
+void runner_do_all_hydro_sort(struct runner *r, struct cell *c) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
+#endif
+
+  if (!cell_is_active_hydro(c, r->e)) return;
+
+  /* Shall we sort at this level? */
+  if (c->hydro.super == c) {
+
+    /* Sort everything */
+    runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
+
+  } else {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (c->hydro.super != NULL) error("Function called below the super level!");
+#endif
+
+    /* Ok, then, let's try lower */
+    if (c->split) {
+      for (int k = 0; k < 8; ++k) {
+        if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]);
+      }
+    } else {
+#ifdef SWIFT_DEBUG_CHECKS
+      error("Reached a leaf without encountering a hydro super cell!");
+#endif
+    }
+  }
+}
+
+/**
+ * @brief Recurse into a cell until reaching the super level and call
+ * the star sorting function there.
+ *
+ * This function must be called at or above the super level!
+ *
+ * This function will sort the particles in all 13 directions.
+ *
+ * @param r the #runner.
+ * @param c the #cell.
+ */
+void runner_do_all_stars_sort(struct runner *r, struct cell *c) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
+#endif
+
+  if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return;
+
+  /* Shall we sort at this level? */
+  if (c->hydro.super == c) {
+
+    /* Sort everything */
+    runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
+
+  } else {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (c->hydro.super != NULL) error("Function called below the super level!");
+#endif
+
+    /* Ok, then, let's try lower */
+    if (c->split) {
+      for (int k = 0; k < 8; ++k) {
+        if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]);
+      }
+    } else {
+#ifdef SWIFT_DEBUG_CHECKS
+      error("Reached a leaf without encountering a hydro super cell!");
+#endif
+    }
+  }
+}
diff --git a/src/runner_time_integration.c b/src/runner_time_integration.c
new file mode 100644
index 0000000000000000000000000000000000000000..e1f5de709da804330953b47a647d0f0ce13de7bb
--- /dev/null
+++ b/src/runner_time_integration.c
@@ -0,0 +1,987 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "kick.h"
+#include "timers.h"
+#include "timestep.h"
+#include "timestep_limiter.h"
+#include "tracers.h"
+
+/**
+ * @brief Initialize the multipoles before the gravity calculation.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_init_grav(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!(e->policy & engine_policy_self_gravity))
+    error("Grav-init task called outside of self-gravity calculation");
+#endif
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Reset the gravity acceleration tensors */
+  gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current);
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_init_grav);
+}
+
+/**
+ * @brief Perform the first half-kick on all the active particles in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  struct gpart *restrict gparts = c->grav.parts;
+  struct spart *restrict sparts = c->stars.parts;
+  const int count = c->hydro.count;
+  const int gcount = c->grav.count;
+  const int scount = c->stars.count;
+  const integertime_t ti_current = e->ti_current;
+  const double time_base = e->time_base;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) &&
+      !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e))
+    return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be kicked */
+      if (part_is_starting(p, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->wakeup == time_bin_awake)
+          error("Woken-up particle that has not been processed in kick1");
+#endif
+
+        /* Skip particles that have been woken up and treated by the limiter. */
+        if (p->wakeup != time_bin_not_awake) continue;
+
+        const integertime_t ti_step = get_integer_timestep(p->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current + 1, p->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end = ti_begin + ti_step;
+
+        if (ti_begin != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
+        if (with_cosmology) {
+          dt_kick_hydro = cosmology_get_hydro_kick_factor(
+              cosmo, ti_begin, ti_begin + ti_step / 2);
+          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+          dt_kick_therm = cosmology_get_therm_kick_factor(
+              cosmo, ti_begin, ti_begin + ti_step / 2);
+          dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+        } else {
+          dt_kick_hydro = (ti_step / 2) * time_base;
+          dt_kick_grav = (ti_step / 2) * time_base;
+          dt_kick_therm = (ti_step / 2) * time_base;
+          dt_kick_corr = (ti_step / 2) * time_base;
+        }
+
+        /* do the kick */
+        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
+                  dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin,
+                  ti_begin + ti_step / 2);
+
+        /* Update the accelerations to be used in the drift for hydro */
+        if (p->gpart != NULL) {
+
+          xp->a_grav[0] = p->gpart->a_grav[0];
+          xp->a_grav[1] = p->gpart->a_grav[1];
+          xp->a_grav[2] = p->gpart->a_grav[2];
+        }
+      }
+    }
+
+    /* Loop over the gparts in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if ((gp->type == swift_type_dark_matter ||
+           gp->type == swift_type_dark_matter_background) &&
+          gpart_is_starting(gp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current + 1, gp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current + 1, gp->time_bin);
+
+        if (ti_begin != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, gp->time_bin, ti_current);
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* do the kick */
+        kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
+      }
+    }
+
+    /* Loop over the stars particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the s-part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* If particle needs to be kicked */
+      if (spart_is_starting(sp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current + 1, sp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current + 1, sp->time_bin);
+
+        if (ti_begin != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, sp->time_bin, ti_current);
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* do the kick */
+        kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_kick1);
+}
+
+/**
+ * @brief Perform the second half-kick on all the active particles in a cell.
+ *
+ * Also prepares particles to be drifted.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const int count = c->hydro.count;
+  const int gcount = c->grav.count;
+  const int scount = c->stars.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  struct gpart *restrict gparts = c->grav.parts;
+  struct spart *restrict sparts = c->stars.parts;
+  const integertime_t ti_current = e->ti_current;
+  const double time_base = e->time_base;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
+      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e))
+    return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be kicked */
+      if (part_is_active(p, e)) {
+
+        integertime_t ti_begin, ti_end, ti_step;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->wakeup == time_bin_awake)
+          error("Woken-up particle that has not been processed in kick1");
+#endif
+
+        if (p->wakeup == time_bin_not_awake) {
+
+          /* Time-step from a regular kick */
+          ti_step = get_integer_timestep(p->time_bin);
+          ti_begin = get_integer_time_begin(ti_current, p->time_bin);
+          ti_end = ti_begin + ti_step;
+
+        } else {
+
+          /* Time-step that follows a wake-up call */
+          ti_begin = get_integer_time_begin(ti_current, p->wakeup);
+          ti_end = get_integer_time_end(ti_current, p->time_bin);
+          ti_step = ti_end - ti_begin;
+
+          /* Reset the flag. Everything is back to normal from now on. */
+          p->wakeup = time_bin_awake;
+        }
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
+              "time_bin=%d wakeup=%d ti_current=%lld",
+              ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
+#endif
+        /* Time interval for this half-kick */
+        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
+        if (with_cosmology) {
+          dt_kick_hydro = cosmology_get_hydro_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+          dt_kick_grav = cosmology_get_grav_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+          dt_kick_therm = cosmology_get_therm_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+          dt_kick_corr = cosmology_get_corr_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+        } else {
+          dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+        }
+
+        /* Finish the time-step with a second half-kick */
+        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
+                  dt_kick_corr, cosmo, hydro_props, entropy_floor,
+                  ti_begin + ti_step / 2, ti_end);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that kick and the drift are synchronized */
+        if (p->ti_drift != p->ti_kick) error("Error integrating part in time.");
+#endif
+
+        /* Prepare the values to be drifted */
+        hydro_reset_predicted_values(p, xp, cosmo);
+      }
+    }
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if ((gp->type == swift_type_dark_matter ||
+           gp->type == swift_type_dark_matter_background) &&
+          gpart_is_active(gp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, gp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error("Particle in wrong time-bin");
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* Finish the time-step with a second half-kick */
+        kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2,
+                   ti_begin + ti_step);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that kick and the drift are synchronized */
+        if (gp->ti_drift != gp->ti_kick)
+          error("Error integrating g-part in time.");
+#endif
+
+        /* Prepare the values to be drifted */
+        gravity_reset_predicted_values(gp);
+      }
+    }
+
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* If particle needs to be kicked */
+      if (spart_is_active(sp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, sp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error("Particle in wrong time-bin");
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* Finish the time-step with a second half-kick */
+        kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2,
+                   ti_begin + ti_step);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that kick and the drift are synchronized */
+        if (sp->ti_drift != sp->ti_kick)
+          error("Error integrating s-part in time.");
+#endif
+
+        /* Prepare the values to be drifted */
+        stars_reset_predicted_values(sp);
+      }
+    }
+  }
+  if (timer) TIMER_TOC(timer_kick2);
+}
+
+/**
+ * @brief Computes the next time-step of all active particles in this cell
+ * and update the cell's statistics.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const int count = c->hydro.count;
+  const int gcount = c->grav.count;
+  const int scount = c->stars.count;
+  const int bcount = c->black_holes.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  struct gpart *restrict gparts = c->grav.parts;
+  struct spart *restrict sparts = c->stars.parts;
+  struct bpart *restrict bparts = c->black_holes.parts;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
+      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) {
+    c->hydro.updated = 0;
+    c->grav.updated = 0;
+    c->stars.updated = 0;
+    c->black_holes.updated = 0;
+    return;
+  }
+
+  int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
+                ti_stars_beg_max = 0;
+  integertime_t ti_black_holes_end_min = max_nr_timesteps,
+                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
+
+  /* No children? */
+  if (!c->split) {
+
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs updating */
+      if (part_is_active(p, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, p->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_part_timestep(p, xp, e);
+
+        /* Update particle */
+        p->time_bin = get_time_bin(ti_new_step);
+        if (p->gpart != NULL) p->gpart->time_bin = p->time_bin;
+
+        /* Update the tracers properties */
+        tracers_after_timestep(p, xp, e->internal_units, e->physical_constants,
+                               with_cosmology, e->cosmology,
+                               e->hydro_properties, e->cooling_func, e->time);
+
+        /* Number of updated particles */
+        updated++;
+        if (p->gpart != NULL) g_updated++;
+
+        /* What is the next sync-point ? */
+        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
+
+        if (p->gpart != NULL) {
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+        }
+      }
+
+      else { /* part is inactive */
+
+        if (!part_is_inhibited(p, e)) {
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, p->time_bin);
+
+          const integertime_t ti_beg =
+              get_integer_time_begin(ti_current + 1, p->time_bin);
+
+          /* What is the next sync-point ? */
+          ti_hydro_end_min = min(ti_end, ti_hydro_end_min);
+          ti_hydro_end_max = max(ti_end, ti_hydro_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max);
+
+          if (p->gpart != NULL) {
+
+            /* What is the next sync-point ? */
+            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+            /* What is the next starting point for this cell ? */
+            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+          }
+        }
+      }
+    }
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart */
+      if (gp->type == swift_type_dark_matter ||
+          gp->type == swift_type_dark_matter_background) {
+
+        /* need to be updated ? */
+        if (gpart_is_active(gp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Current end of time-step */
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, gp->time_bin);
+
+          if (ti_end != ti_current)
+            error("Computing time-step of rogue particle.");
+#endif
+
+          /* Get new time-step */
+          const integertime_t ti_new_step = get_gpart_timestep(gp, e);
+
+          /* Update particle */
+          gp->time_bin = get_time_bin(ti_new_step);
+
+          /* Number of updated g-particles */
+          g_updated++;
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+
+        } else { /* gpart is inactive */
+
+          if (!gpart_is_inhibited(gp, e)) {
+
+            const integertime_t ti_end =
+                get_integer_time_end(ti_current, gp->time_bin);
+
+            /* What is the next sync-point ? */
+            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+            const integertime_t ti_beg =
+                get_integer_time_begin(ti_current + 1, gp->time_bin);
+
+            /* What is the next starting point for this cell ? */
+            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+          }
+        }
+      }
+    }
+
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* need to be updated ? */
+      if (spart_is_active(sp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, sp->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_spart_timestep(sp, e);
+
+        /* Update particle */
+        sp->time_bin = get_time_bin(ti_new_step);
+        sp->gpart->time_bin = get_time_bin(ti_new_step);
+
+        /* Number of updated s-particles */
+        s_updated++;
+        g_updated++;
+
+        ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min);
+        ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max);
+        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
+        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_stars_beg_max = max(ti_current, ti_stars_beg_max);
+        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+
+        /* star particle is inactive but not inhibited */
+      } else {
+
+        if (!spart_is_inhibited(sp, e)) {
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, sp->time_bin);
+
+          const integertime_t ti_beg =
+              get_integer_time_begin(ti_current + 1, sp->time_bin);
+
+          ti_stars_end_min = min(ti_end, ti_stars_end_min);
+          ti_stars_end_max = max(ti_end, ti_stars_end_max);
+          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_stars_beg_max = max(ti_beg, ti_stars_beg_max);
+          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+        }
+      }
+    }
+
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < bcount; k++) {
+
+      /* Get a handle on the part. */
+      struct bpart *restrict bp = &bparts[k];
+
+      /* need to be updated ? */
+      if (bpart_is_active(bp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, bp->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_bpart_timestep(bp, e);
+
+        /* Update particle */
+        bp->time_bin = get_time_bin(ti_new_step);
+        bp->gpart->time_bin = get_time_bin(ti_new_step);
+
+        /* Number of updated s-particles */
+        b_updated++;
+        g_updated++;
+
+        ti_black_holes_end_min =
+            min(ti_current + ti_new_step, ti_black_holes_end_min);
+        ti_black_holes_end_max =
+            max(ti_current + ti_new_step, ti_black_holes_end_max);
+        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
+        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max);
+        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+
+        /* star particle is inactive but not inhibited */
+      } else {
+
+        if (!bpart_is_inhibited(bp, e)) {
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, bp->time_bin);
+
+          const integertime_t ti_beg =
+              get_integer_time_begin(ti_current + 1, bp->time_bin);
+
+          ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min);
+          ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max);
+          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max);
+          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+        }
+      }
+    }
+
+  } else {
+
+    /* Loop over the progeny. */
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Recurse */
+        runner_do_timestep(r, cp, 0);
+
+        /* And aggregate */
+        updated += cp->hydro.updated;
+        g_updated += cp->grav.updated;
+        s_updated += cp->stars.updated;
+        b_updated += cp->black_holes.updated;
+
+        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
+        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
+        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
+
+        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
+        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
+        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
+
+        ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min);
+        ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max);
+        ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max);
+
+        ti_black_holes_end_min =
+            min(cp->black_holes.ti_end_min, ti_black_holes_end_min);
+        ti_black_holes_end_max =
+            max(cp->grav.ti_end_max, ti_black_holes_end_max);
+        ti_black_holes_beg_max =
+            max(cp->grav.ti_beg_max, ti_black_holes_beg_max);
+      }
+    }
+  }
+
+  /* Store the values. */
+  c->hydro.updated = updated;
+  c->grav.updated = g_updated;
+  c->stars.updated = s_updated;
+  c->black_holes.updated = b_updated;
+
+  c->hydro.ti_end_min = ti_hydro_end_min;
+  c->hydro.ti_end_max = ti_hydro_end_max;
+  c->hydro.ti_beg_max = ti_hydro_beg_max;
+  c->grav.ti_end_min = ti_gravity_end_min;
+  c->grav.ti_end_max = ti_gravity_end_max;
+  c->grav.ti_beg_max = ti_gravity_beg_max;
+  c->stars.ti_end_min = ti_stars_end_min;
+  c->stars.ti_end_max = ti_stars_end_max;
+  c->stars.ti_beg_max = ti_stars_beg_max;
+  c->black_holes.ti_end_min = ti_black_holes_end_min;
+  c->black_holes.ti_end_max = ti_black_holes_end_max;
+  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->hydro.ti_end_min == e->ti_current &&
+      c->hydro.ti_end_min < max_nr_timesteps)
+    error("End of next hydro step is current time!");
+  if (c->grav.ti_end_min == e->ti_current &&
+      c->grav.ti_end_min < max_nr_timesteps)
+    error("End of next gravity step is current time!");
+  if (c->stars.ti_end_min == e->ti_current &&
+      c->stars.ti_end_min < max_nr_timesteps)
+    error("End of next stars step is current time!");
+  if (c->black_holes.ti_end_min == e->ti_current &&
+      c->black_holes.ti_end_min < max_nr_timesteps)
+    error("End of next black holes step is current time!");
+#endif
+
+  if (timer) TIMER_TOC(timer_timestep);
+}
+
+/**
+ * @brief Apply the time-step limiter to all awaken particles in a cell
+ * hierarchy.
+ *
+ * @param r The task #runner.
+ * @param c The #cell.
+ * @param force Limit the particles irrespective of the #cell flags.
+ * @param timer Are we timing this ?
+ */
+void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int count = c->hydro.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we only limit local cells. */
+  if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope.");
+#endif
+
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+
+  /* Limit irrespective of cell flags? */
+  force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter));
+
+  /* Early abort? */
+  if (c->hydro.count == 0) {
+
+    /* Clear the limiter flags. */
+    cell_clear_flag(
+        c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Recurse */
+        runner_do_limiter(r, cp, force, 0);
+
+        /* And aggregate */
+        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
+        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
+        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
+        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
+        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
+        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
+      }
+    }
+
+    /* Store the updated values */
+    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
+    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
+    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
+    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
+    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
+    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
+
+  } else if (!c->split && force) {
+
+    ti_hydro_end_min = c->hydro.ti_end_min;
+    ti_hydro_end_max = c->hydro.ti_end_max;
+    ti_hydro_beg_max = c->hydro.ti_beg_max;
+    ti_gravity_end_min = c->grav.ti_end_min;
+    ti_gravity_end_max = c->grav.ti_end_max;
+    ti_gravity_beg_max = c->grav.ti_beg_max;
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* Avoid inhibited particles */
+      if (part_is_inhibited(p, e)) continue;
+
+      /* If the particle will be active no need to wake it up */
+      if (part_is_active(p, e) && p->wakeup != time_bin_not_awake)
+        p->wakeup = time_bin_not_awake;
+
+      /* Bip, bip, bip... wake-up time */
+      if (p->wakeup <= time_bin_awake) {
+
+        /* Apply the limiter and get the new time-step size */
+        const integertime_t ti_new_step = timestep_limit_part(p, xp, e);
+
+        /* What is the next sync-point ? */
+        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
+
+        /* Also limit the gpart counter-part */
+        if (p->gpart != NULL) {
+
+          /* Register the time-bin */
+          p->gpart->time_bin = p->time_bin;
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+        }
+      }
+    }
+
+    /* Store the updated values */
+    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
+    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
+    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
+    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
+    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
+    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
+  }
+
+  /* Clear the limiter flags. */
+  cell_clear_flag(c,
+                  cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
+
+  if (timer) TIMER_TOC(timer_do_limiter);
+}
diff --git a/src/timestep_limiter.h b/src/timestep_limiter.h
index d8555a352c8e1a799ac13d268932c9d37f30fe33..01b72daea5599b662c38fdc4b3ada8b2ac5b3d11 100644
--- a/src/timestep_limiter.h
+++ b/src/timestep_limiter.h
@@ -22,6 +22,9 @@
 /* Config parameters. */
 #include "../config.h"
 
+/* Local headers. */
+#include "kick.h"
+
 /**
  * @brief Wakes up a particle by rewinding it's kick1 back in time and applying
  * a new one such that the particle becomes active again in the next time-step.