diff --git a/.gitignore b/.gitignore
index 775f2d24b64ecda6a036b0d7b4b2ea62a1a24210..c29fa3e3a48e9846b5c7c422b746589cb740802d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ examples/*/*/*.rst
 examples/*/*/*.hdf5
 examples/*/*/*.csv
 examples/*/*/*.dot
+examples/*/*/cell_hierarchy.html
 examples/*/*/energy.txt
 examples/*/*/task_level.txt
 examples/*/*/timesteps_*.txt
diff --git a/Makefile.am b/Makefile.am
index c71cc8d00c797f0e2afc034cb1abfff7eba14c88..40ba64dcdd1c7270712288bce938ab56e918694d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -23,6 +23,9 @@ SUBDIRS = src argparse examples doc tests tools
 if HAVEEAGLECOOLING
 SUBDIRS += examples/Cooling/CoolingRates
 endif
+if HAVELOGGER
+SUBDIRS += logger
+endif
 
 # Non-standard files that should be part of the distribution.
 EXTRA_DIST = INSTALL.swift .clang-format format.sh
diff --git a/README b/README
index ee7abd5a5709c81ecef1a89c1a651a925ce2f4a9..8d722a66da5083889e0adfb5af51206509bef53d 100644
--- a/README
+++ b/README
@@ -71,6 +71,8 @@ Parameters:
                                       from all ranks.
     -y, --task-dumps=<int>            Time-step frequency at which task analysis
                                       files and/or tasks are dumped.
+    --cell-dumps=<int>                Time-step frequency at which cell graphs 
+                                      are dumped. 
     -Y, --threadpool-dumps=<int>      Time-step frequency at which threadpool
                                       tasks are dumped.
 
diff --git a/README.md b/README.md
index efffc9b4c43ff8f0821c4d7d49721ff7ff5949d0..f91b03d3f6a9656e33adc3216a15ed41e7b971de 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,8 @@ Parameters:
                                       from all ranks.
     -y, --task-dumps=<int>            Time-step frequency at which task analysis
                                       files and/or tasks are dumped.
+    --cell-dumps=<int>                Time-step frequency at which cell graphs 
+                                      are dumped. 
     -Y, --threadpool-dumps=<int>      Time-step frequency at which threadpool
                                       tasks are dumped.
 
diff --git a/configure.ac b/configure.ac
index 338edec60f956c37f666f7592a931b2c20a9f6e8..8d189c1210abf48304ca39b0fc6450323091eb7e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,7 @@ AC_ARG_ENABLE([logger],
 if test "$with_logger" = "yes"; then
    AC_DEFINE([WITH_LOGGER], 1, [logger enabled])
 fi
+AM_CONDITIONAL([HAVELOGGER],[test $with_logger = "yes"])
 
 # Interprocedural optimization support. Needs special handling for linking and
 # archiving as well as compilation with Intels, needs to be done before
@@ -996,6 +997,41 @@ fi
 AC_SUBST([TBBMALLOC_LIBS])
 AM_CONDITIONAL([HAVETBBMALLOC],[test -n "$TBBMALLOC_LIBS"])
 
+# Check for python.
+have_python="no"
+AC_ARG_WITH([python],
+    [AS_HELP_STRING([--with-python=PATH],
+       [root directory where python is installed @<:@yes/no@:>@]
+    )],
+    [with_python="$withval"],
+    [with_python="no"]
+)
+if test "x$with_python" != "xno"; then
+   if test "$with_python" == ""; then
+      # use linux default python
+      with_python="/usr/"
+   fi
+   AM_PATH_PYTHON([3], [], [AC_MSG_ERROR(python not found)])   
+   AC_ARG_VAR([PYTHON_INCS], [Include flags for python, bypassing python-config])
+   AC_ARG_VAR([PYTHON_CONFIG], [Path to python-config])
+   AS_IF([test -z "$PYTHON_INCS"], [
+      AS_IF([test -z "$PYTHON_CONFIG"], [
+      	AC_PATH_PROGS([PYTHON_CONFIG],
+	          [python$PYTHON_VERSION-config python-config],
+                  [no],
+                  [`dirname $PYTHON`])
+    	AS_IF([test "$PYTHON_CONFIG" = no], [AC_MSG_ERROR([cannot find python-config for $PYTHON.])])
+      ])
+      AC_MSG_CHECKING([python include flags])
+      PYTHON_INCS=`$PYTHON_CONFIG --includes`
+      AC_MSG_RESULT([$PYTHON_INCS])
+  ])
+  have_python="yes"
+fi
+AC_SUBST([PYTHON_INCS])
+AM_CONDITIONAL([HAVEPYTHON],[test -n "$PYTHON_INCS"])
+
+
 # Check for HDF5. This is required.
 AX_LIB_HDF5
 if test "$with_hdf5" != "yes"; then
@@ -1991,7 +2027,7 @@ AM_CONDITIONAL([HAVEEAGLEFEEDBACK], [test $with_feedback = "EAGLE"])
 
 # Handle .in files.
 AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile examples/Cooling/CoolingRates/Makefile doc/Makefile doc/Doxyfile tests/Makefile])
-AC_CONFIG_FILES([argparse/Makefile tools/Makefile])
+AC_CONFIG_FILES([argparse/Makefile tools/Makefile logger/Makefile logger/tests/Makefile])
 AC_CONFIG_FILES([tests/testReading.sh], [chmod +x tests/testReading.sh])
 AC_CONFIG_FILES([tests/testActivePair.sh], [chmod +x tests/testActivePair.sh])
 AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh])
@@ -2044,7 +2080,6 @@ AC_MSG_RESULT([
    CPU profiler         : $have_profiler
    Pthread barriers     : $have_pthread_barrier
    VELOCIraptor enabled : $have_velociraptor
-   Particle Logger      : $with_logger
    FoF activated:       : $enable_fof
 
    Hydro scheme       : $with_hydro
@@ -2082,4 +2117,7 @@ AC_MSG_RESULT([
    Custom icbrtf               : $enable_custom_icbrtf
    Boundary particles          : $boundary_particles
 
+   Particle Logger      : $with_logger
+   Python enabled       : $have_python
+
  ------------------------])
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index c6b3046d2d3591c937dfd98cf75fb7697b90110f..94424f644e2f9e6dc4c436a42423ba667186e02b 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -777,6 +777,7 @@ INPUT		       += @top_srcdir@/src/tracers/EAGLE
 INPUT		       += @top_srcdir@/src/stars/EAGLE
 INPUT		       += @top_srcdir@/src/feedback/EAGLE
 INPUT		       += @top_srcdir@/src/black_holes/EAGLE
+INPUT		       += @top_srcdir@/logger
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst
index f7f2f979666270ce371b532b6eab7bad3a23c9bd..8b4467f5f36a5e07f0b5446f4f590b2643990731 100644
--- a/doc/RTD/source/AnalysisTools/index.rst
+++ b/doc/RTD/source/AnalysisTools/index.rst
@@ -21,11 +21,13 @@ Cell graph
 ----------
 
 An interactive graph of the cells is available with the configuration option ``--enable-cell-graph``.
-During a run, SWIFT will generate a ``cell_hierarchy_*.csv`` file per MPI rank.
-The command ``tools/make_cell_hierarchy.sh cell_hierarchy_*.csv`` merges the files together and generates the file ``cell_hierarchy.html``
+During a run, SWIFT will generate a ``cell_hierarchy_*.csv`` file per MPI rank at the frequency given by the parameter ``--cell-dumps=n``.
+The command ``tools/make_cell_hierarchy.sh cell_hierarchy_0000_*.csv`` merges the files at time step 0 together and generates the file ``cell_hierarchy.html``
 that contains the graph and can be read with your favorite web browser.
 
-With chrome, you cannot access the files directly, you will need to either access them through an existing server (e.g. public http provided by your university)
+With most web browsers, you cannot access the files directly.
+If it is the case, the cells will never appear (but everything else should be fine).
+To solve this problem, you will need to either access them through an existing server (e.g. public http provided by your university)
 or install ``npm`` and then run the following commands
 
 .. code-block:: bash
@@ -34,6 +36,14 @@ or install ``npm`` and then run the following commands
    http-server .
 
 Now you can open the web page ``http://localhost:8080/cell_hierarchy.html``.
+When running a large simulation, the data loading may take a while (a few seconds for EAGLE_6).
+Your browser should not be hanging, but will seems to be idle.
+
+If you wish to add some information to the graph, you can do it by modifying the files ``src/space.c`` and ``tools/data/cell_hierarchy.html``.
+In the first one, you will need to modify the calls to ``fprintf`` in the functions ``space_write_cell_hierarchy`` and ``space_write_cell``.
+Here the code is simply writing CSV files containing all the required information about the cells.
+In the second one, you will need to find the function ``mouseover`` and add the field that you have created.
+You can also increase the size of the bubble through the style parameter ``height``.
 
 Memory usage reports
 --------------------
diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst
index 1144477548062bb61e47a88d3a1ee062b89b97cf..5251b36f7394465c59577932155544a755c0ee43 100644
--- a/doc/RTD/source/CommandLineOptions/index.rst
+++ b/doc/RTD/source/CommandLineOptions/index.rst
@@ -11,7 +11,10 @@ For instance, just running the ``swift`` binary will not use any SPH or gravity;
 the particles will just sit still!
 
 Below is a list of the command line options and when they should be used. The same list
-can be found by typing ``./swift -h``::
+can be found by typing ``./swift -h``:
+
+.. code-block:: none
+
 
     -h, --help                        show this help message and exit
 
@@ -65,5 +68,7 @@ can be found by typing ``./swift -h``::
                                       from all ranks.
     -y, --task-dumps=<int>            Time-step frequency at which task analysis
                                       files and/or tasks are dumped.
+    --cell-dumps=<int>                Time-step frequency at which cell graphs 
+                                      are dumped. 
     -Y, --threadpool-dumps=<int>      Time-step frequency at which threadpool
                                       tasks are dumped.
diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst
index d3a79588f148cfb0b84fc533c7e77cc29891f1bd..4107bb836b20840a2b0fe4473fc816b6cf1dca90 100644
--- a/doc/RTD/source/ParameterFiles/parameter_description.rst
+++ b/doc/RTD/source/ParameterFiles/parameter_description.rst
@@ -370,6 +370,45 @@ The full section to start a typical cosmological run would be:
      H_mass_fraction:          0.755
      H_ionization_temperature: 1e4
 
+.. _Parameters_Stars:
+
+Stars
+-----
+
+The ``Stars`` section is used to set parameters that describe the Stars
+calculations when doing feedback or enrichment. Note that if stars only act
+gravitationally (i.e. SWIFT is run *without* ``--feedback``) no parameters
+in this section are used. 
+
+The first four parameters are related to the neighbour search:
+
+* The (relative) tolerance to converge smoothing lengths within:
+  ``h_tolerance`` (Default: same as SPH scheme)
+* The maximal smoothing length in internal units: ``h_max`` (Default: same
+  as SPH scheme)
+* The minimal allowed smoothing length in terms of the gravitational
+  softening: ``h_min_ratio`` (Default: same as SPH scheme)
+* The maximal (relative) allowed change in volume over one time-step:
+  ``max_volume_change`` (Default: same as SPH scheme)
+
+These four parameters are optional and will default to their SPH equivalent
+if left unspecified. That is the value specified by the user in that
+section or the default SPH value if left unspecified there as well.
+
+The two remaining parameters can be used to overwrite the birth time (or
+scale-factor) of the stars that were read from the ICs. This can be useful
+to start a simulation with stars already of a given age. The parameters
+are:
+
+* Whether or not to overwrite anything: ``overwrite_birth_time``
+  (Default: 0)
+* The value to use: ``birth_time``
+
+If the birth time is set to ``-1`` then the stars will never enter any
+feedback or enrichment loop. When these values are not specified, SWIFT
+will start and use the birth times specified in the ICs. If no values are
+given in the ICs, the stars' birth times will be zeroed, which can cause
+issues depending on the type of run performed.
 
 .. _Parameters_time_integration:
 
diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
index 93c8c740f58efb23a017a7d229f81a685e837b1a..ad20f401d26bde02a6a44299843b25e07a8f83d9 100644
--- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
@@ -38,7 +38,7 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
+  theta:                  0.5       # Opening angle (Multipole acceptance criterion)
   mesh_side_length:       64
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
@@ -53,6 +53,11 @@ SPH:
   minimal_temperature:   100.0    # (internal units)
   initial_temperature:   268.7
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
@@ -64,7 +69,6 @@ FOF:
 
 Scheduler:
   max_top_level_cells:   16
-  cell_split_size:       100
   tasks_per_cell:        5
 
 Restarts:
diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
index d5307533c0ffccae7644f06dbe33c27bf46f4114..fd902d27daaca3c6c3ca9c5d52fbf43b1283c581 100644
--- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
@@ -38,7 +38,7 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
+  theta:                  0.5       # Opening angle (Multipole acceptance criterion)
   mesh_side_length:       128
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
@@ -53,6 +53,11 @@ SPH:
   minimal_temperature:   100.0    # (internal units)
   initial_temperature:   268.7
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
@@ -64,7 +69,6 @@ FOF:
 
 Scheduler:
   max_top_level_cells:   16
-  cell_split_size:       100
   tasks_per_cell:        5
 
 Restarts:
diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
index f757dc9dcf104237c6ecc5e472d29f79375a1d53..3091fb0be35111f0e6046fd99f0c426840d00231 100644
--- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
@@ -38,7 +38,7 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                    0.025     # Constant dimensionless multiplier for time integration.
-  theta:                  0.7       # Opening angle (Multipole acceptance criterion)
+  theta:                  0.5       # Opening angle (Multipole acceptance criterion)
   mesh_side_length:       256
   comoving_DM_softening:         0.0026994 # Comoving DM softening length (in internal units).
   max_physical_DM_softening:     0.0007    # Max physical DM softening length (in internal units).
@@ -53,6 +53,11 @@ SPH:
   minimal_temperature:   100.0    # (internal units)
   initial_temperature:   268.7
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
@@ -64,7 +69,6 @@ FOF:
 
 Scheduler:
   max_top_level_cells:   32
-  cell_split_size:       100
   tasks_per_cell:        5
 
 Restarts:
diff --git a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml
index f77036b5d55f33b4fd3f42c7bea0ccc124003a40..0cc97babbd2b89a7507808bfcad2648e0c03ce47 100644
--- a/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml
+++ b/examples/EAGLE_low_z/EAGLE_100/eagle_100.yml
@@ -57,6 +57,13 @@ SPH:
   h_min_ratio:           0.1      # Minimal smoothing in units of softening.
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
   minimal_temperature:   100      # (internal units)
+  overwrite_birth_time:  1
+  birth_time:            0.33333  # Pretend all the stars were born at z = 2
+
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
 
 # Parameters related to the initial conditions
 InitialConditions:
diff --git a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml
index f470c691a5a76207998f6d854f6e8d44f0a1aebb..73f4e1a8d4269567d4139af6b992754d17494d3d 100644
--- a/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_low_z/EAGLE_12/eagle_12.yml
@@ -59,6 +59,13 @@ SPH:
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
   minimal_temperature:   100      # (internal units)
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+  overwrite_birth_time:  1
+  birth_time:            0.33333  # Pretend all the stars were born at z = 2
+  
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml
index 07685bf783b34b2872df4a32610fa791db01cded..f7a9394299fbf641a98b2ffc2d7c4bac364c164e 100644
--- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml
@@ -67,6 +67,13 @@ SPH:
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
   minimal_temperature:   100      # (internal units)
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+  overwrite_birth_time:  1
+  birth_time:            0.33333  # Pretend all the stars were born at z = 2
+
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
diff --git a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml
index 943c64c7a29fd87b9b5f78a4edded6b14e0f3c57..ea46a9ad677d8e37ec48a83645a4501e8bdc842f 100644
--- a/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_low_z/EAGLE_50/eagle_50.yml
@@ -58,6 +58,13 @@ SPH:
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
   minimal_temperature:   100      # (internal units)
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+  overwrite_birth_time:  1
+  birth_time:            0.33333  # Pretend all the stars were born at z = 2
+
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml
index 48a825750fd2a927ba08dfc5a8a4607a490fe0d8..27082dd0b881279c6631dfdc1edb0ac8ea3d07c6 100644
--- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml
+++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml
@@ -68,6 +68,13 @@ SPH:
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
   minimal_temperature:   100      # (internal units)
 
+# Parameters of the stars neighbour search
+Stars:
+  resolution_eta:        1.1642   # Target smoothing length in units of the mean inter-particle separation
+  h_tolerance:           7e-3
+  overwrite_birth_time:  1
+  birth_time:            0.33333  # Pretend all the stars were born at z = 2
+
 # Parameters for the Friends-Of-Friends algorithm
 FOF:
   basename:                        fof_output  # Filename for the FOF outputs.
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml
index 79fe5682692347127081f021ed6930df57bbfa02..dcd580243c51b0cbfb24c684709e1e511829f089 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_feedback/isolated_galaxy.yml
@@ -45,6 +45,11 @@ SPH:
   h_max:                 10.
   minimal_temperature:   100.
 
+# Parameters for the stars neighbour search
+Stars:
+  overwrite_birth_time:    1     # Make sure the stars in the ICs do not do any feedback
+  birth_time:             -1.    # by setting all of their birth times to -1  
+
 # Standard EAGLE cooling options
 EAGLECooling:
   dir_name:                ./coolingtables/  # Location of the Wiersma+08 cooling tables
diff --git a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml
index d917f926724c022cd15524058ddde2a7466acaab..fe57f693b0fcba6d8bc70c0fddf2d9dce2e60b99 100644
--- a/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml
+++ b/examples/IsolatedGalaxy/IsolatedGalaxy_starformation/isolated_galaxy.yml
@@ -45,6 +45,11 @@ SPH:
   h_max:                 10.
   minimal_temperature:   10.      # Kelvin
 
+# Parameters for the stars neighbour search
+Stars:
+  overwrite_birth_time:    1     # Make sure the stars in the ICs do not do any feedback
+  birth_time:             -1.    # by setting all of their birth times to -1  
+  
 # Standard EAGLE cooling options
 EAGLECooling:
   dir_name:                ./coolingtables/  # Location of the Wiersma+08 cooling tables
diff --git a/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml b/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml
index 9b8c3e34dad20eba560a7316f16364b76a088c05..b3d318d68b69d0940d7a37b17ae5331a711b140f 100644
--- a/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml
+++ b/examples/SubgridTests/CosmologicalStellarEvolution/stellar_evolution.yml
@@ -40,7 +40,8 @@ SPH:
 
 # Properties of the stars
 Stars:
-  birth_time:  0.00991   # Give the star in the ICs a decent birth time
+  overwrite_birth_time: 1
+  birth_time:           0.00991   # Give the star in the ICs a decent birth time
   
 # Parameters related to the initial conditions
 InitialConditions:
diff --git a/examples/SubgridTests/StellarEvolution/stellar_evolution.yml b/examples/SubgridTests/StellarEvolution/stellar_evolution.yml
index 230ce6c8b8a51603c1dab9a308845be3e984febb..63c7a4d2624793af26bdbaf628715243e2ab511d 100644
--- a/examples/SubgridTests/StellarEvolution/stellar_evolution.yml
+++ b/examples/SubgridTests/StellarEvolution/stellar_evolution.yml
@@ -34,7 +34,8 @@ SPH:
 
 # Properties of the stars
 Stars:
-  birth_time:  0.   # Give the star in the ICs a decent birth time
+  overwrite_birth_time: 1
+  birth_time:           0.   # Give the star in the ICs a decent birth time
   
 # Parameters related to the initial conditions
 InitialConditions:
diff --git a/examples/main.c b/examples/main.c
index 27af0897a4bdd12287fd0460579a4eb9ea3f08c1..9f9c0a471370a208251fe1c3628d3d980b476af4 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -148,6 +148,7 @@ int main(int argc, char *argv[]) {
   int with_aff = 0;
   int dry_run = 0;
   int dump_tasks = 0;
+  int dump_cells = 0;
   int dump_threadpool = 0;
   int nsteps = -2;
   int restart = 0;
@@ -263,6 +264,9 @@ int main(int argc, char *argv[]) {
       OPT_INTEGER('y', "task-dumps", &dump_tasks,
                   "Time-step frequency at which task graphs are dumped.", NULL,
                   0, 0),
+      OPT_INTEGER(0, "cell-dumps", &dump_cells,
+                  "Time-step frequency at which cell graphs are dumped.", NULL,
+                  0, 0),
       OPT_INTEGER('Y', "threadpool-dumps", &dump_threadpool,
                   "Time-step frequency at which threadpool tasks are dumped.",
                   NULL, 0, 0),
@@ -323,6 +327,16 @@ int main(int argc, char *argv[]) {
   }
 #endif
 
+#ifndef SWIFT_CELL_GRAPH
+  if (dump_cells) {
+    if (myrank == 0) {
+      error(
+          "complete cell dumps are only created when "
+          "configured with --enable-cell-graph.");
+    }
+  }
+#endif
+
 #ifndef SWIFT_DEBUG_THREADPOOL
   if (dump_threadpool) {
     printf(
@@ -542,9 +556,12 @@ int main(int argc, char *argv[]) {
   if (with_mpole_reconstruction && nr_nodes > 1)
     error("Cannot reconstruct m-poles every step over MPI (yet).");
   if (with_limiter) error("Can't run with time-step limiter over MPI (yet)");
+#ifdef WITH_LOGGER
+  error("Can't run with the particle logger over MPI (yet)");
+#endif
 #endif
 
-    /* Temporary early aborts for modes not supported with hand-vec. */
+  /* Temporary early aborts for modes not supported with hand-vec. */
 #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
     !defined(CHEMISTRY_NONE)
   error(
@@ -1261,6 +1278,13 @@ int main(int argc, char *argv[]) {
       task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1);
     }
 
+#ifdef SWIFT_CELL_GRAPH
+    /* Dump the cell data using the given frequency. */
+    if (dump_cells && (dump_cells == 1 || j % dump_cells == 1)) {
+      space_write_cell_hierarchy(e.s, j + 1);
+    }
+#endif
+
       /* Dump memory use report if collected. */
 #ifdef SWIFT_MEMUSE_REPORTS
     {
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index 3b2aa8b8eed305051829603b73d6fbe62048e573..c029a0a5b862a2d20188c87873f71179a45f20e8 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -54,7 +54,8 @@ Stars:
   h_tolerance:          1e-4     # (Optional) Relative accuracy of the Netwon-Raphson scheme for the smoothing lengths. Defaults to the SPH value.
   max_ghost_iterations: 30       # (Optional) Maximal number of iterations allowed to converge towards the smoothing length. Defaults to the SPH value.
   max_volume_change:    1.4      # (Optional) Maximal allowed change of kernel volume over one time-step. Defaults to the SPH value.
-  birth_time:           -1       # (Optional) Initial birth time of *all* the stars. If not -1, this value will overwrite all the values read from the ICs.
+  overwrite_birth_time:  0       # (Optional) Do we want to overwrite the birth time of the stars read from the ICs? (default: 0).
+  birth_time:           -1       # (Optional) Initial birth times of *all* the stars to be used if we are overwriting them. (-1 means the stars remain inactive feedback-wise througout the run).
 
 # Parameters for the self-gravity scheme
 Gravity:
@@ -134,9 +135,9 @@ Snapshots:
 # Parameters governing the logger snapshot system
 Logger:
   delta_step:           10     # Update the particle log every this many updates
-  initial_buffer_size:  1      # buffer size in GB
-  buffer_scale:		10     # (Optional) When buffer size is too small, update it with required memory times buffer_scale
   basename:             index  # Common part of the filenames
+  initial_buffer_size:  1      # (Optional) Buffer size in GB
+  buffer_scale:	        10     # (Optional) When buffer size is too small, update it with required memory times buffer_scale
   
 # Parameters governing the conserved quantities statistics
 Statistics:
diff --git a/logger/Makefile.am b/logger/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..3bfd5af848c504d50fe201e02f49186287fbfb5a
--- /dev/null
+++ b/logger/Makefile.am
@@ -0,0 +1,73 @@
+# This file is part of SWIFT.
+# Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+#                    Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+#                    Loic Hausammann (loic.hausammann@epfl.ch)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Add the non-standard paths to the included library headers
+AM_CFLAGS = $(PYTHON_INCS) -I$(top_srcdir)/src $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS) $(GRACKLE_INCS)
+
+
+AM_LDFLAGS = $(HDF5_LDFLAGS)
+
+# Assign a "safe" version number
+BIN_LDFLAGS = -version-info 0:0:0
+
+# The git command, if available.
+GIT_CMD = @GIT_CMD@
+
+# Additional dependencies for shared libraries.
+EXTRA_LIBS = $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(HDF5_LIBS) $(FFTW_LIBS) $(GRACKLE_LIBS) \
+	$(VELOCIRAPTOR_LIBS) $(GSL_LIBS) -L../src/.libs -lswiftsim
+
+# MPI libraries.
+# MPI_LIBS = $(MPI_THREAD_LIBS)
+# MPI_FLAGS = -DWITH_MPI
+
+# Build the liblogger library
+lib_LTLIBRARIES = liblogger.la
+# Build a MPI-enabled version too?
+# if HAVEMPI
+# lib_LTLIBRARIES += liblogger_mpi.la
+# endif
+
+# subdirectories
+SUBDIRS = tests
+
+# List required headers
+include_HEADERS = logger_header.h logger_loader_io.h logger_particle.h logger_time.h logger_tools.h logger_reader.h \
+	logger_logfile.h
+
+# Common source files
+AM_SOURCES = logger_header.c logger_loader_io.c logger_particle.c logger_time.c logger_tools.c logger_reader.c \
+	logger_logfile.c
+if HAVEPYTHON
+AM_SOURCES += logger_python_wrapper.c
+endif
+
+# Include files for distribution, not installation.
+nobase_noinst_HEADERS = 
+
+# Sources and flags for regular library
+liblogger_la_SOURCES = $(AM_SOURCES)
+liblogger_la_CFLAGS = $(AM_CFLAGS)
+liblogger_la_LDFLAGS = $(AM_LDFLAGS) $(EXTRA_LIBS) $(BIN_LDFLAGS)
+
+# Sources and flags for MPI library
+# liblogger_mpi_la_SOURCES = $(AM_SOURCES)
+# liblogger_mpi_la_CFLAGS = $(AM_CFLAGS) $(MPI_FLAGS)
+# liblogger_mpi_la_LDFLAGS = $(AM_LDFLAGS) $(MPI_LIBS) $(EXTRA_LIBS)
+# liblogger_mpi_la_SHORTNAME = mpi
+# liblogger_mpi_la_LIBADD =
diff --git a/logger/logger_header.c b/logger/logger_header.c
new file mode 100644
index 0000000000000000000000000000000000000000..61e5da246c9aa07eeeb42e751832f017fa04ca0a
--- /dev/null
+++ b/logger/logger_header.c
@@ -0,0 +1,196 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "logger_header.h"
+
+#include "logger_loader_io.h"
+#include "logger_logfile.h"
+#include "logger_reader.h"
+#include "logger_tools.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Name of each offset direction. */
+const char *logger_offset_name[logger_offset_count] = {
+    "Forward",
+    "Backward",
+    "Corrupted",
+};
+
+/**
+ * @brief Print the properties of the header to stdout.
+ *
+ * @param h The #header.
+ */
+void header_print(const struct header *h) {
+#ifdef SWIFT_DEBUG_CHECKS
+  message("Debug checks enabled.");
+#endif
+  message("First Offset:     %lu.", h->offset_first_record);
+  message("Offset direction: %s.", logger_offset_name[h->offset_direction]);
+  message("Number masks:     %i.", h->number_mask);
+
+  for (size_t i = 0; i < h->number_mask; i++) {
+    message("  Mask:  %s.", h->masks[i].name);
+    message("  Value: %u.", h->masks[i].mask);
+    message("  Size:  %i.", h->masks[i].size);
+    message("");
+  }
+};
+
+/**
+ * @brief free the allocated memory.
+ *
+ * @param h The #header.
+ */
+void header_free(struct header *h) { free(h->masks); };
+
+/**
+ * @brief Check if a field is present in the header.
+ *
+ * @param h The #header.
+ * @param field name of the requested field.
+ * @return Index of the field (-1 if not found).
+ */
+int header_get_field_index(const struct header *h, const char *field) {
+  for (size_t i = 0; i < h->number_mask; i++) {
+    if (strcmp(h->masks[i].name, field) == 0) {
+      return i;
+    }
+  }
+
+  return -1;
+};
+
+/**
+ * @brief Update the offset direction in the structure and
+ * write it to the logfile.
+ *
+ * @param h #header file structure.
+ * @param new_value The new value to write.
+ *
+ */
+void header_change_offset_direction(struct header *h,
+                                    enum logger_offset_direction new_value) {
+  h->offset_direction = new_value;
+  /* Skip file format and version numbers. */
+  size_t offset = LOGGER_VERSION_SIZE + 2 * sizeof(int);
+
+  logger_loader_io_write_data(h->log->log.map + offset, sizeof(unsigned int),
+                              &new_value);
+}
+
+/**
+ * @brief read the logger header.
+ *
+ * @param h out: The #header.
+ * @param log The #logger_logfile.
+ */
+void header_read(struct header *h, struct logger_logfile *log) {
+  void *map = log->log.map;
+
+  /* Set pointer to log. */
+  h->log = log;
+
+  /* read the file format. */
+  char file_format[STRING_SIZE];
+  map = logger_loader_io_read_data(map, LOGGER_VERSION_SIZE, &file_format);
+  if (strcmp(file_format, "SWIFT_LOGGER"))
+    error("Wrong file format (%s).", file_format);
+
+  /* Read the major version number. */
+  map = logger_loader_io_read_data(map, sizeof(int), &h->major_version);
+
+  /* Read the minor version number. */
+  map = logger_loader_io_read_data(map, sizeof(int), &h->minor_version);
+
+  struct logger_reader *reader = log->reader;
+  if (&reader->log != log) error("Wrong link to the reader.");
+
+  if (reader->verbose > 0)
+    message("File version %i.%i.", h->major_version, h->minor_version);
+
+  /* Read the offset directions. */
+  map = logger_loader_io_read_data(map, sizeof(int), &h->offset_direction);
+
+  if (!header_is_forward(h) && !header_is_backward(h) &&
+      !header_is_corrupted(h))
+    error("Wrong offset value in the header (%i).", h->offset_direction);
+
+  /* Read offset to first record. */
+  map = logger_loader_io_read_data(map, LOGGER_OFFSET_SIZE,
+                                   &h->offset_first_record);
+
+  /* Read the size of the strings. */
+  map =
+      logger_loader_io_read_data(map, sizeof(unsigned int), &h->string_length);
+
+  /* Check if value defined in this file is large enough. */
+  if (STRING_SIZE < h->string_length) {
+    error("Name too large in log file %i.", h->string_length);
+  }
+
+  /* Read the number of masks. */
+  map = logger_loader_io_read_data(map, sizeof(unsigned int), &h->number_mask);
+
+  /* Allocate the masks memory. */
+  h->masks = malloc(sizeof(struct mask_data) * h->number_mask);
+
+  /* Loop over all masks. */
+  for (size_t i = 0; i < h->number_mask; i++) {
+    /* Read the mask name. */
+    map = logger_loader_io_read_data(map, h->string_length, h->masks[i].name);
+
+    /* Set the mask value. */
+    h->masks[i].mask = 1 << i;
+
+    /* Read the mask data size. */
+    map = logger_loader_io_read_data(map, sizeof(unsigned int),
+                                     &h->masks[i].size);
+  }
+
+  /* Check the logfile header's size. */
+  if (map != log->log.map + h->offset_first_record) {
+    header_print(h);
+    size_t offset = map - log->log.map;
+    error("Wrong header size (in header %zi, current %zi).",
+          h->offset_first_record, offset);
+  }
+};
+
+/**
+ * @brief Count number of bits in a given mask (without the record header).
+ *
+ * @param h #header file structure.
+ * @param mask Mask to compute.
+ *
+ * @return number of bits in mask.
+ */
+size_t header_get_record_size_from_mask(const struct header *h,
+                                        const size_t mask) {
+  size_t count = 0;
+  /* Loop over each masks. */
+  for (size_t i = 0; i < h->number_mask; i++) {
+    if (mask & h->masks[i].mask) {
+      count += h->masks[i].size;
+    }
+  }
+  return count;
+}
diff --git a/logger/logger_header.h b/logger/logger_header.h
new file mode 100644
index 0000000000000000000000000000000000000000..c388ef65cda21d00f53ddc54e97f43671edf1aeb
--- /dev/null
+++ b/logger/logger_header.h
@@ -0,0 +1,119 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef LOGGER_LOGGER_HEADER_H
+#define LOGGER_LOGGER_HEADER_H
+
+#include "logger_tools.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define LOGGER_VERSION_SIZE 20
+#define LOGGER_OFFSET_SIZE 7
+#define LOGGER_MASK_SIZE 1
+
+enum logger_offset_direction {
+  logger_offset_backward = 0,
+  logger_offset_forward,
+  logger_offset_corrupted,
+  /* Number of offset type. */
+  logger_offset_count,
+};
+
+/**
+ * @brief Names of the offset directions.
+ */
+extern const char *logger_offset_name[];
+
+struct logger_logfile;
+
+/**
+ * @brief This structure contains everything from the file header.
+ *
+ * This structure is initialized by #header_read and need to be freed
+ * with #header_free.
+ *
+ * The information contained by the header can be easily access with
+ * the functions #header_get_record_size_from_mask and #header_get_field_index.
+ *
+ * The only function that modify the file is #header_change_offset_direction.
+ */
+struct header {
+  /* Dump's major version. */
+  int major_version;
+
+  /* Dump's minor version. */
+  int minor_version;
+
+  /* Offset of the first record. */
+  size_t offset_first_record;
+
+  /* Number of bytes for strings. */
+  unsigned int string_length;
+
+  /* Number of masks. */
+  unsigned int number_mask;
+
+  /* List of masks. */
+  struct mask_data *masks;
+
+  /* Direction of the offset in the records. */
+  enum logger_offset_direction offset_direction;
+
+  /* The corresponding log. */
+  struct logger_logfile *log;
+};
+
+void header_print(const struct header *h);
+void header_free(struct header *h);
+int header_get_field_index(const struct header *h, const char *field);
+void header_read(struct header *h, struct logger_logfile *log);
+size_t header_get_record_size_from_mask(const struct header *h,
+                                        const size_t mask);
+void header_change_offset_direction(struct header *h,
+                                    enum logger_offset_direction new_value);
+
+/**
+ * @brief Check if the offset are forward.
+ * @param h The #header.
+ */
+__attribute__((always_inline)) INLINE static int header_is_forward(
+    const struct header *h) {
+  return h->offset_direction == logger_offset_forward;
+}
+
+/**
+ * @brief Check if the offset are backward.
+ * @param h The #header.
+ */
+__attribute__((always_inline)) INLINE static int header_is_backward(
+    const struct header *h) {
+  return h->offset_direction == logger_offset_backward;
+}
+
+/**
+ * @brief Check if the offset are corrupted.
+ * @param h The #header.
+ */
+__attribute__((always_inline)) INLINE static int header_is_corrupted(
+    const struct header *h) {
+  return h->offset_direction == logger_offset_corrupted;
+}
+
+#endif  // LOGGER_LOGGER_HEADER_H
diff --git a/logger/logger_loader_io.c b/logger/logger_loader_io.c
new file mode 100644
index 0000000000000000000000000000000000000000..f18f9bb7eb2eaf88ba11eaf916c0a68a27cfd2d2
--- /dev/null
+++ b/logger/logger_loader_io.c
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "logger_header.h"
+#include "logger_loader_io.h"
+#include "logger_tools.h"
+
+/**
+ * @brief get the size of a file.
+ *
+ * @param fd file id.
+ *
+ * @return file size.
+ */
+size_t logger_loader_io_get_file_size(int fd) {
+  struct stat s;
+  int status = fstat(fd, &s);
+  if (status != 0) error("Unable to get file size (%s).", strerror(errno));
+  return s.st_size;
+}
+
+/**
+ * @brief Map a file.
+ *
+ * #logger_loader_io_munmap_file should be called to unmap the file.
+ *
+ * @param filename file to read.
+ * @param file_size (out) size of the file.
+ * @param read_only Open the file in read only mode?
+ *
+ */
+void *logger_loader_io_mmap_file(char *filename, size_t *file_size,
+                                 int read_only) {
+  /* open the file. */
+  int fd;
+
+  if (read_only)
+    fd = open(filename, O_RDONLY);
+  else
+    fd = open(filename, O_RDWR);
+
+  if (fd == -1)
+    error("Unable to open file %s (%s).", filename, strerror(errno));
+
+  /* get the file size. */
+  *file_size = logger_loader_io_get_file_size(fd);
+
+  /* map the memory. */
+  int mode = PROT_READ;
+  if (!read_only) mode |= PROT_WRITE;
+
+  void *map = mmap(NULL, *file_size, mode, MAP_SHARED, fd, 0);
+  if (map == MAP_FAILED)
+    error("Failed to allocate map of size %zi bytes (%s).", *file_size,
+          strerror(errno));
+
+  /* Close the file. */
+  close(fd);
+
+  return map;
+}
+
+/**
+ * @brief Unmap a file.
+ *
+ * @param map file mapping.
+ * @param file_size The file size.
+ *
+ */
+void logger_loader_io_munmap_file(void *map, size_t file_size) {
+  /* unmap the file. */
+  if (munmap(map, file_size) != 0) {
+    error("Unable to unmap the file (%s).", strerror(errno));
+  }
+}
diff --git a/logger/logger_loader_io.h b/logger/logger_loader_io.h
new file mode 100644
index 0000000000000000000000000000000000000000..d44fea673017644306e73261afdbc6dec26948c6
--- /dev/null
+++ b/logger/logger_loader_io.h
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+/**
+ * @file logger_loader_io.h
+ * @brief This file contains basic IO function.
+ */
+#ifndef LOGGER_LOGGER_LOADER_IO_H
+#define LOGGER_LOGGER_LOADER_IO_H
+
+#include "logger_header.h"
+#include "logger_tools.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+size_t logger_loader_io_get_file_size(int fd);
+void *logger_loader_io_mmap_file(char *filename, size_t *file_size,
+                                 int read_only);
+void logger_loader_io_munmap_file(void *map, size_t file_size);
+
+/**
+ * @brief read a mask with its offset.
+ *
+ * @param h #header file structure.
+ * @param data Pointer to the data to read.
+ * @param mask (output) mask read from the data.
+ * @param diff_offset (output) offset difference to previous/next corresponding
+ * record.
+ *
+ * @return memory after the record header.
+ */
+__attribute__((always_inline)) INLINE static void *logger_loader_io_read_mask(
+    const struct header *h, void *data, size_t *mask, size_t *diff_offset) {
+  /* read mask */
+  if (mask) {
+    *mask = 0;
+    memcpy(mask, data, LOGGER_MASK_SIZE);
+  }
+  data += LOGGER_MASK_SIZE;
+
+  /* read offset */
+  if (diff_offset) {
+    *diff_offset = 0;
+    memcpy(diff_offset, data, LOGGER_OFFSET_SIZE);
+  }
+  data += LOGGER_OFFSET_SIZE;
+
+  return data;
+}
+
+/**
+ * @brief read a single value from a file.
+ *
+ * @param data Pointer to the data to read.
+ * @param size size of the data to read.
+ * @param p pointer where to store the data.
+
+ * @return memory after the data written.
+ */
+__attribute__((always_inline)) INLINE static void *logger_loader_io_read_data(
+    void *data, const size_t size, void *p) {
+  memcpy(p, data, size);
+  return data + size;
+};
+
+/**
+ * @brief write a single value in a file.
+ *
+ * @param data Pointer to the data to read.
+ * @param size size of the data to write.
+ * @param p pointer to the data.
+ *
+ * @return memory after the data written.
+ */
+__attribute__((always_inline)) INLINE static void *logger_loader_io_write_data(
+    void *data, const size_t size, const void *p) {
+  memcpy(data, p, size);
+
+  return data + size;
+};
+
+#endif  // LOGGER_LOGGER_LOADER_IO_H
diff --git a/logger/logger_logfile.c b/logger/logger_logfile.c
new file mode 100644
index 0000000000000000000000000000000000000000..c70068cd24c01a5ba231e97e343a0c076dc0ecb4
--- /dev/null
+++ b/logger/logger_logfile.c
@@ -0,0 +1,175 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "logger_logfile.h"
+#include "logger_loader_io.h"
+#include "logger_reader.h"
+
+/**
+ * @brief Initialize the #logger_logfile.
+ *
+ * If required this function will also reverse the offsets.
+ * @param log The #logger_logfile.
+ * @param filename the log's filename.
+ * @param reader The #logger_reader.
+ * @param only_header Read only the header.
+ */
+void logger_logfile_init_from_file(struct logger_logfile *log, char *filename,
+                                   struct logger_reader *reader,
+                                   int only_header) {
+
+  /* Set the pointer to the reader. */
+  log->reader = reader;
+  if (&reader->log != log) error("Wrong link to the reader.");
+
+  /* Set pointers to zero. */
+  time_array_init(&log->times);
+
+  /* Open file, map it and get its size. */
+  if (reader->verbose > 1) message("Mapping the log file.");
+  log->log.map = logger_loader_io_mmap_file(filename, &log->log.file_size,
+                                            /* read_only */ 1);
+
+  /* Read the header. */
+  if (reader->verbose > 1) message("Reading the header.");
+  header_read(&log->header, log);
+
+  /* Print the header. */
+  if (reader->verbose > 0) {
+    header_print(&log->header);
+  }
+
+  /* No need to continue if only the
+     header is required. */
+  if (only_header) return;
+
+  /* Check if the offset are corrupted. */
+  if (header_is_corrupted(&log->header)) {
+    error("The offsets have been corrupted.");
+  }
+
+  /* Reverse the offsets direction. */
+  if (header_is_backward(&log->header)) {
+    logger_logfile_reverse_offset(log, filename);
+  }
+
+  /* Initialize the time array. */
+  if (reader->verbose > 1) message("Reading the time stamps.");
+  time_array_populate(&log->times, log);
+
+  /* Print the time array. */
+  if (reader->verbose > 0) {
+    time_array_print(&log->times);
+  }
+}
+
+/**
+ * @brief Free the allocated memory and unmap the file.
+ *
+ * @param log The #logger_logfile.
+ */
+void logger_logfile_free(struct logger_logfile *log) {
+  logger_loader_io_munmap_file(log->log.map, log->log.file_size);
+
+  time_array_free(&log->times);
+}
+
+/**
+ * @brief Reverse offset in log file
+ *
+ * @param log The #logger_logfile
+ * @param filename The log's filename.
+ */
+void logger_logfile_reverse_offset(struct logger_logfile *log, char *filename) {
+
+  /* Close and reopen the file in write mode. */
+  logger_loader_io_munmap_file(log->log.map, log->log.file_size);
+  log->log.map = logger_loader_io_mmap_file(filename, &log->log.file_size,
+                                            /* read_only */ 0);
+
+  /* Get pointers */
+  struct header *header = &log->header;
+  const struct logger_reader *reader = log->reader;
+  if (&reader->log != log) error("Wrong link to the reader.");
+
+  /* Check if the offsets need to be reversed. */
+  if (!header_is_backward(header)) {
+    error("The offsets are already reversed.");
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (reader->verbose > 0) {
+    message("Check record's headers...");
+  }
+
+  /* check that the record offset points to another record. */
+  for (size_t offset_debug = header->offset_first_record;
+       offset_debug < log->log.file_size;
+       offset_debug = tools_check_record_consistency(reader, offset_debug)) {
+  }
+
+  if (reader->verbose > 0) {
+    message("Record's headers are correct.");
+  }
+#endif
+
+  message("WARNING: Modifying the logfile, do not kill the job!");
+
+  /* Set the offset direction to a corrupted status. */
+  header_change_offset_direction(header, logger_offset_corrupted);
+
+  if (reader->verbose > 0) {
+    message("Reversing offsets...");
+  }
+
+  /* reverse the record's offset. */
+  for (size_t offset = header->offset_first_record; offset < log->log.file_size;
+       offset = tools_reverse_offset(header, log->log.map, offset)) {
+  }
+
+  if (reader->verbose > 0) {
+    message("Reversing done.");
+  }
+
+  /* Now that the offset are effectively reversed, can set the direction to
+     forward. */
+  header_change_offset_direction(header, logger_offset_forward);
+
+  message("WARNING: Modification done, you can now safely kill the job.");
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (reader->verbose > 0) {
+    message("Check record's headers...");
+  }
+
+  /* check that the record offset points to another record. */
+  for (size_t offset_debug = header->offset_first_record;
+       offset_debug < log->log.file_size;
+       offset_debug = tools_check_record_consistency(reader, offset_debug)) {
+  }
+
+  if (reader->verbose > 0) {
+    message("Record's headers are correct.");
+  }
+#endif
+
+  /* Close and reopen the file in read mode. */
+  logger_loader_io_munmap_file(log->log.map, log->log.file_size);
+  log->log.map = logger_loader_io_mmap_file(filename, &log->log.file_size,
+                                            /* read_only */ 1);
+}
diff --git a/logger/logger_logfile.h b/logger/logger_logfile.h
new file mode 100644
index 0000000000000000000000000000000000000000..0b6ef728d524bb104b83fc28b9250c51a764dfd4
--- /dev/null
+++ b/logger/logger_logfile.h
@@ -0,0 +1,69 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+/**
+ * @file logger_logfile.h
+ * @brief This file contains the high level function for the log.
+ */
+#ifndef LOGGER_LOGGER_LOGFILE_H
+#define LOGGER_LOGGER_LOGFILE_H
+
+#include "logger_header.h"
+#include "logger_time.h"
+
+struct logger_reader;
+
+/**
+ * @brief This structure deals with the log file.
+ *
+ * This structure is initialized by the #logger_reader
+ * and deals with the log file.
+ * It maps it, reverse the offsets (if required) and unmap it.
+ *
+ * The structure is initialized with #logger_logfile_init_from_file and
+ * freed with #logger_logfile_free.
+ */
+struct logger_logfile {
+
+  /* Information contained in the file header. */
+  struct header header;
+
+  /* The reader that is using this log file. */
+  struct logger_reader *reader;
+
+  /* Information about the time records. */
+  struct time_array times;
+
+  /* The log's variables. */
+  struct {
+    /* Mapped data. */
+    void *map;
+
+    /* File size. */
+    size_t file_size;
+
+  } log;
+};
+
+void logger_logfile_init_from_file(struct logger_logfile *log, char *filename,
+                                   struct logger_reader *reader,
+                                   int only_header);
+void logger_logfile_reverse_offset(struct logger_logfile *log, char *filename);
+void logger_logfile_free(struct logger_logfile *log);
+
+#endif  // LOGGER_LOGGER_LOGFILE_H
diff --git a/logger/logger_particle.c b/logger/logger_particle.c
new file mode 100644
index 0000000000000000000000000000000000000000..6809e0edf6125e66cbb8807cc98eeb31b5e04ecd
--- /dev/null
+++ b/logger/logger_particle.c
@@ -0,0 +1,253 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "logger_particle.h"
+#include "logger_header.h"
+#include "logger_loader_io.h"
+#include "logger_reader.h"
+#include "logger_time.h"
+#include "logger_tools.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * @brief Print the properties of a logger_particle.
+ *
+ * @param p The #logger_particle to print
+ */
+void logger_particle_print(const struct logger_particle *p) {
+  message("ID:            %lu.", p->id);
+  message("Mass:          %g", p->mass);
+  message("Time:          %g.", p->time);
+  message("Cutoff Radius: %g.", p->h);
+  message("Positions:     (%g, %g, %g).", p->pos[0], p->pos[1], p->pos[2]);
+  message("Velocities:    (%g, %g, %g).", p->vel[0], p->vel[1], p->vel[2]);
+  message("Accelerations: (%g, %g, %g).", p->acc[0], p->acc[1], p->acc[2]);
+  message("Entropy:       %g.", p->entropy);
+  message("Density:       %g.", p->density);
+}
+
+/**
+ * @brief Initialize a logger_particle.
+ *
+ * @param part The #logger_particle to initialize.
+ */
+void logger_particle_init(struct logger_particle *part) {
+  for (size_t k = 0; k < DIM; k++) {
+    part->pos[k] = 0;
+    part->vel[k] = 0;
+    part->acc[k] = 0;
+  }
+
+  part->entropy = -1;
+  part->density = -1;
+  part->h = -1;
+  part->mass = -1;
+  part->id = SIZE_MAX;
+}
+
+/**
+ * @brief Read a single named entry for a particle.
+ *
+ * @param part The #logger_particle to update.
+ * @param map The mapped data.
+ * @param field field to read.
+ * @param size number of bits to read.
+ *
+ * @return mapped data after the block read.
+ */
+void *logger_particle_read_field(struct logger_particle *part, void *map,
+                                 const char *field, const size_t size) {
+  void *p = NULL;
+
+  /* Get the correct pointer. */
+  if (strcmp("positions", field) == 0) {
+    p = &part->pos;
+  } else if (strcmp("velocities", field) == 0) {
+    p = &part->vel;
+  } else if (strcmp("accelerations", field) == 0) {
+    p = &part->acc;
+  } else if (strcmp("entropy", field) == 0) {
+    p = &part->entropy;
+  } else if (strcmp("smoothing length", field) == 0) {
+    p = &part->h;
+  } else if (strcmp("density", field) == 0) {
+    p = &part->density;
+  } else if (strcmp("consts", field) == 0) {
+    p = malloc(size);
+  } else {
+    error("Type %s not defined.", field);
+  }
+
+  /* read the data. */
+  map = logger_loader_io_read_data(map, size, p);
+
+  /* Split the required fields. */
+  if (strcmp("consts", field) == 0) {
+    part->mass = 0;
+    part->id = 0;
+    memcpy(&part->mass, p, sizeof(float));
+    p += sizeof(float);
+    memcpy(&part->id, p, sizeof(size_t));
+    p -= sizeof(float);
+    free(p);
+  }
+
+  return map;
+}
+
+/**
+ * @brief Read a particle entry in the log file.
+ *
+ * @param reader The #logger_reader.
+ * @param part The #logger_particle to update.
+ * @param offset offset of the record to read.
+ * @param time time to interpolate.
+ * @param reader_type #logger_reader_type.
+ *
+ * @return position after the record.
+ */
+size_t logger_particle_read(struct logger_particle *part,
+                            const struct logger_reader *reader, size_t offset,
+                            const double time,
+                            const enum logger_reader_type reader_type) {
+
+  /* Get a few pointers. */
+  const struct header *h = &reader->log.header;
+  void *map = reader->log.log.map;
+
+  const struct time_array *times = &reader->log.times;
+
+  size_t mask = 0;
+  size_t h_offset = 0;
+
+  logger_particle_init(part);
+
+  /* Read the record's mask. */
+  map = logger_loader_io_read_mask(h, map + offset, &mask, &h_offset);
+
+  /* Check if it is not a time record. */
+  if (mask == 128) error("Unexpected mask: %lu.", mask);
+
+  /* Read all the fields. */
+  for (size_t i = 0; i < h->number_mask; i++) {
+    if (mask & h->masks[i].mask) {
+      map = logger_particle_read_field(part, map, h->masks[i].name,
+                                       h->masks[i].size);
+    }
+  }
+
+  /* Get the time of current record.
+     This check is required for the manipulating the file before
+     the initialization of the time_array. */
+  if (times->size != 0) {
+    part->time = time_array_get_time(times, offset);
+  } else
+    part->time = -1;
+
+  /* update the offset. */
+  offset = (size_t)(map - reader->log.log.map);
+
+  /* Check if an interpolation is required. */
+  if (reader_type == logger_reader_const) return offset;
+
+  /* Start reading next record. */
+  struct logger_particle part_next;
+
+  /* Check that the offset are in the correct direction. */
+  if (!header_is_forward(h)) {
+    error("Cannot read a particle with non forward offsets.");
+  }
+
+  /* No next particle. */
+  if (h_offset == 0) return (size_t)(map - reader->log.log.map);
+
+  /* get absolute offset of next particle. */
+  h_offset += offset - header_get_record_size_from_mask(h, mask) -
+              LOGGER_MASK_SIZE - LOGGER_OFFSET_SIZE;
+
+  /* Get time of next record. */
+  part_next.time = time_array_get_time(times, h_offset);
+
+  /* Read next record. */
+  h_offset = logger_particle_read(&part_next, reader, h_offset, part_next.time,
+                                  logger_reader_const);
+
+  /* Interpolate the two particles. */
+  logger_particle_interpolate(part, &part_next, time);
+
+  return offset;
+}
+
+/**
+ * @brief interpolate two particles at a given time
+ *
+ * @param part_curr #logger_particle In: current particle (before time), Out:
+ * interpolated particle
+ * @param part_next #logger_particle next particle (after time)
+ * @param time interpolation time
+ *
+ */
+void logger_particle_interpolate(struct logger_particle *part_curr,
+                                 const struct logger_particle *part_next,
+                                 const double time) {
+
+  /* Check that a particle is provided. */
+  if (!part_curr) error("part_curr is NULL.");
+  if (!part_next) error("part_next is NULL.");
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check the particle order. */
+  if (part_next->time <= part_curr->time)
+    error("Wrong particle order (next before current).");
+  if ((time < part_curr->time) || (part_next->time < time))
+    error(
+        "Cannot extrapolate (particle time: %f, "
+        "interpolating time: %f, next particle time: %f).",
+        part_curr->time, time, part_next->time);
+#endif
+
+  /* Compute the interpolation scaling. */
+  double scaling = part_next->time - part_curr->time;
+
+  scaling = (time - part_curr->time) / scaling;
+
+  double tmp;
+  float ftmp;
+
+  /* interpolate vectors. */
+  for (size_t i = 0; i < DIM; i++) {
+    tmp = (part_next->pos[i] - part_curr->pos[i]);
+    part_curr->pos[i] += tmp * scaling;
+
+    ftmp = (part_next->vel[i] - part_curr->vel[i]);
+    part_curr->vel[i] += ftmp * scaling;
+
+    ftmp = (part_next->acc[i] - part_curr->acc[i]);
+    part_curr->acc[i] += ftmp * scaling;
+  }
+
+  /* interpolate scalars. */
+  ftmp = (part_next->entropy - part_curr->entropy);
+  part_curr->entropy += ftmp * scaling;
+
+  /* set time. */
+  part_curr->time = time;
+}
diff --git a/logger/logger_particle.h b/logger/logger_particle.h
new file mode 100644
index 0000000000000000000000000000000000000000..addd23564b65a734152ae8f538596d79019dd36f
--- /dev/null
+++ b/logger/logger_particle.h
@@ -0,0 +1,107 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef LOGGER_LOGGER_PARTICLE_H
+#define LOGGER_LOGGER_PARTICLE_H
+
+#include "logger_header.h"
+#include "logger_time.h"
+#include "logger_tools.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(HYDRO_DIMENSION_1D)
+#define DIM 1
+#elif defined(HYDRO_DIMENSION_2D)
+#define DIM 2
+#elif defined(HYDRO_DIMENSION_3D)
+#define DIM 3
+#endif
+
+struct logger_reader;
+
+/**
+ * @brief Store the data from a record.
+ *
+ * This structure contains all the required fields
+ * present in a file.
+ *
+ * As we need only a few particles, no need to keep it small.
+ *
+ * The particle is initialized with #logger_particle_init
+ * and can be updated with a record through #logger_particle_read.
+ *
+ * In #logger_particle_read, we use #logger_particle_read_field on
+ * each field and #logger_particle_interpolate if a linear
+ * interpolation is required.
+ */
+struct logger_particle {
+  /* position. */
+  double pos[DIM];
+
+  /* velocity. */
+  float vel[DIM];
+
+  /* acceleration. */
+  float acc[DIM];
+
+  /* entropy. */
+  float entropy;
+
+  /* smoothing length. */
+  float h;
+
+  /* density. */
+  float density;
+
+  /* mass. */
+  float mass;
+
+  /* unique id. */
+  size_t id;
+
+  /* time of the record. */
+  double time;
+};
+
+/**
+ * @brief Defines the type of interpolation
+ */
+enum logger_reader_type {
+  logger_reader_const, /* Constant interpolation. */
+  logger_reader_lin,   /* Linear interpolation. */
+};
+
+void logger_particle_print(const struct logger_particle *p);
+
+size_t logger_particle_read(struct logger_particle *part,
+                            const struct logger_reader *reader, size_t offset,
+                            const double time,
+                            const enum logger_reader_type reader_type);
+
+void logger_particle_init(struct logger_particle *part);
+
+void *logger_particle_read_field(struct logger_particle *part, void *map,
+                                 const char *field, const size_t size);
+
+void logger_particle_interpolate(struct logger_particle *part_curr,
+                                 const struct logger_particle *part_next,
+                                 const double time);
+
+#endif  // LOGGER_LOGGER_PARTICLE_H
diff --git a/logger/logger_python_wrapper.c b/logger/logger_python_wrapper.c
new file mode 100644
index 0000000000000000000000000000000000000000..07c87b4989896977c56ddff4df243a5310d393a7
--- /dev/null
+++ b/logger/logger_python_wrapper.c
@@ -0,0 +1,290 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "logger_header.h"
+#include "logger_loader_io.h"
+#include "logger_particle.h"
+#include "logger_reader.h"
+#include "logger_time.h"
+
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+
+#include <Python.h>
+#include <errno.h>
+#include <numpy/arrayobject.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * @brief load data from the offset without any interpolation
+ *
+ * <b>offset</b> PyArrayObject list of offset for each particle.
+ *
+ * <b>filename</b> string filename of the log file.
+ *
+ * <b>verbose</b> Verbose level.
+ *
+ * <b>returns</b> dictionnary containing the data read.
+ */
+static PyObject *loadFromIndex(__attribute__((unused)) PyObject *self,
+                               PyObject *args) {
+
+  /* input variables. */
+  PyArrayObject *offset = NULL;
+  char *filename = NULL;
+
+  /* output variables. */
+  PyArrayObject *pos = NULL;
+  PyArrayObject *vel = NULL;
+  PyArrayObject *acc = NULL;
+  PyArrayObject *entropy = NULL;
+  PyArrayObject *h_sph = NULL;
+  PyArrayObject *rho = NULL;
+  PyArrayObject *mass = NULL;
+  PyArrayObject *id = NULL;
+
+  size_t time_offset;
+  int verbose = 2;
+
+  /* parse arguments. */
+  if (!PyArg_ParseTuple(args, "OsL|i", &offset, &filename, &time_offset,
+                        &verbose))
+    return NULL;
+
+  if (!PyArray_Check(offset)) {
+    error("Offset is not a numpy array.");
+  }
+  if (PyArray_NDIM(offset) != 1) {
+    error("Offset is not a 1 dimensional array.");
+  }
+  if (PyArray_TYPE(offset) != NPY_UINT64) {
+    error("Offset does not contain unsigned int.");
+  }
+
+  /* initialize the reader. */
+  struct logger_reader reader;
+  logger_reader_init(&reader, filename, verbose);
+  struct header *h = &reader.log.header;
+
+  /* init array. */
+  npy_intp dim[2];
+  dim[0] = PyArray_DIMS(offset)[0];
+  dim[1] = DIM;
+
+  /* Get required time. */
+  double time = time_array_get_time(&reader.log.times, time_offset);
+
+  /* init output. */
+  if (header_get_field_index(h, "positions") != -1) {
+    pos = (PyArrayObject *)PyArray_SimpleNew(2, dim, NPY_DOUBLE);
+  }
+
+  if (header_get_field_index(h, "velocities") != -1) {
+    vel = (PyArrayObject *)PyArray_SimpleNew(2, dim, NPY_FLOAT);
+  }
+
+  if (header_get_field_index(h, "accelerations") != -1) {
+    acc = (PyArrayObject *)PyArray_SimpleNew(2, dim, NPY_FLOAT);
+  }
+
+  if (header_get_field_index(h, "entropy") != -1) {
+    entropy =
+        (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT);
+  }
+
+  if (header_get_field_index(h, "smoothing length") != -1) {
+    h_sph =
+        (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT);
+  }
+
+  if (header_get_field_index(h, "density") != -1) {
+    rho =
+        (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT);
+  }
+
+  if (header_get_field_index(h, "consts") != -1) {
+    mass =
+        (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_FLOAT);
+    id = (PyArrayObject *)PyArray_SimpleNew(1, PyArray_DIMS(offset), NPY_ULONG);
+  }
+
+  if (verbose > 1) message("Reading particles.");
+
+  /* loop over all particles. */
+  for (npy_intp i = 0; i < PyArray_DIMS(offset)[0]; i++) {
+    struct logger_particle part;
+
+    /* Get the offset. */
+    size_t offset_particle = *(size_t *)PyArray_GETPTR1(offset, i);
+
+    /* Read the particle. */
+    logger_particle_read(&part, &reader, offset_particle, time,
+                         logger_reader_lin);
+
+    double *dtmp;
+    float *ftmp;
+    size_t *stmp;
+
+    /* copy the data. */
+    for (size_t k = 0; k < DIM; k++) {
+      if (pos) {
+        dtmp = PyArray_GETPTR2(pos, i, k);
+        *dtmp = part.pos[k];
+      }
+
+      if (vel) {
+        ftmp = PyArray_GETPTR2(vel, i, k);
+        *ftmp = part.vel[k];
+      }
+
+      if (acc) {
+        ftmp = PyArray_GETPTR2(acc, i, k);
+        *ftmp = part.acc[k];
+      }
+    }
+
+    if (entropy) {
+      ftmp = PyArray_GETPTR1(entropy, i);
+      *ftmp = part.entropy;
+    }
+
+    if (rho) {
+      ftmp = PyArray_GETPTR1(rho, i);
+      *ftmp = part.density;
+    }
+
+    if (h_sph) {
+      ftmp = PyArray_GETPTR1(h_sph, i);
+      *ftmp = part.h;
+    }
+
+    if (mass) {
+      ftmp = PyArray_GETPTR1(mass, i);
+      *ftmp = part.mass;
+    }
+
+    if (id) {
+      stmp = PyArray_GETPTR1(id, i);
+      *stmp = part.id;
+    }
+  }
+
+  /* Free the memory. */
+  logger_reader_free(&reader);
+
+  /* construct return value. */
+  PyObject *dict = PyDict_New();
+  PyObject *key = PyUnicode_FromString("positions");
+  PyDict_SetItem(dict, key, PyArray_Return(pos));
+
+  if (vel) {
+    key = PyUnicode_FromString("velocities");
+    PyDict_SetItem(dict, key, PyArray_Return(vel));
+  }
+
+  if (acc) {
+    key = PyUnicode_FromString("accelerations");
+    PyDict_SetItem(dict, key, PyArray_Return(acc));
+  }
+
+  if (entropy) {
+    key = PyUnicode_FromString("entropy");
+    PyDict_SetItem(dict, key, PyArray_Return(entropy));
+  }
+
+  if (rho) {
+    key = PyUnicode_FromString("rho");
+    PyDict_SetItem(dict, key, PyArray_Return(rho));
+  }
+
+  if (h_sph) {
+    key = PyUnicode_FromString("h_sph");
+    PyDict_SetItem(dict, key, PyArray_Return(h_sph));
+  }
+
+  if (mass) {
+    key = PyUnicode_FromString("mass");
+    PyDict_SetItem(dict, key, PyArray_Return(mass));
+  }
+
+  if (id) {
+    key = PyUnicode_FromString("id");
+    PyDict_SetItem(dict, key, PyArray_Return(id));
+  }
+
+  return dict;
+}
+
+/**
+ * @brief Reverse offset in log file
+ *
+ * <b>filename</b> string filename of the log file
+ * <b>verbose</b> Verbose level
+ */
+static PyObject *pyReverseOffset(__attribute__((unused)) PyObject *self,
+                                 PyObject *args) {
+  /* input variables. */
+  char *filename = NULL;
+
+  int verbose = 0;
+
+  /* parse the arguments. */
+  if (!PyArg_ParseTuple(args, "s|i", &filename, &verbose)) return NULL;
+
+  /* initialize the reader which reverse the offset if necessary. */
+  struct logger_reader reader;
+  logger_reader_init(&reader, filename, verbose);
+
+  /* Free the reader. */
+  logger_reader_free(&reader);
+
+  return Py_BuildValue("");
+}
+
+/* definition of the method table. */
+
+static PyMethodDef libloggerMethods[] = {
+    {"loadFromIndex", loadFromIndex, METH_VARARGS,
+     "Load snapshot directly from the offset in an index file."},
+    {"reverseOffset", pyReverseOffset, METH_VARARGS,
+     "Reverse the offset (from pointing backward to forward)."},
+
+    {NULL, NULL, 0, NULL} /* Sentinel */
+};
+
+static struct PyModuleDef libloggermodule = {
+    PyModuleDef_HEAD_INIT,
+    "liblogger",
+    "Module reading a SWIFTsim logger snapshot",
+    -1,
+    libloggerMethods,
+    NULL, /* m_slots */
+    NULL, /* m_traverse */
+    NULL, /* m_clear */
+    NULL  /* m_free */
+};
+
+PyMODINIT_FUNC PyInit_liblogger(void) {
+  PyObject *m;
+  m = PyModule_Create(&libloggermodule);
+  if (m == NULL) return NULL;
+
+  import_array();
+
+  return m;
+}
diff --git a/logger/logger_reader.c b/logger/logger_reader.c
new file mode 100644
index 0000000000000000000000000000000000000000..0954b9c5a8e56213de4d5b2a445aeeb9105e327c
--- /dev/null
+++ b/logger/logger_reader.c
@@ -0,0 +1,90 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "logger_reader.h"
+
+/**
+ * @brief Initialize the reader.
+ *
+ * @param reader The #logger_reader.
+ * @param filename The log filename.
+ * @param verbose The verbose level.
+ */
+void logger_reader_init(struct logger_reader *reader, char *filename,
+                        int verbose) {
+  if (verbose > 1) message("Initializing the reader.");
+
+  /* Initialize the reader variables. */
+  reader->verbose = verbose;
+
+  /* Initialize the log file. */
+  logger_logfile_init_from_file(&reader->log, filename, reader,
+                                /* only_header */ 0);
+
+  if (verbose > 1) message("Initialization done.");
+}
+
+/**
+ * @brief Free the reader.
+ *
+ * @param reader The #logger_reader.
+ */
+void logger_reader_free(struct logger_reader *reader) {
+  /* Free the log. */
+  logger_logfile_free(&reader->log);
+}
+
+/**
+ * @brief Read a record (timestamp or particle)
+ *
+ * @param reader The #logger_reader.
+ * @param lp (out) The #logger_particle (if the record is a particle).
+ * @param time (out) The time read (if the record is a timestamp).
+ * @param is_particle Is the record a particle (or a timestamp)?
+ * @param offset The offset in the file.
+ *
+ * @return The offset after this record.
+ */
+size_t reader_read_record(struct logger_reader *reader,
+                          struct logger_particle *lp, double *time,
+                          int *is_particle, size_t offset) {
+
+  struct logger_logfile *log = &reader->log;
+
+  /* Read mask to find out if timestamp or particle. */
+  size_t mask = 0;
+  logger_loader_io_read_mask(&log->header, log->log.map + offset, &mask, NULL);
+
+  /* Check if timestamp or not. */
+  int ind = header_get_field_index(&log->header, "timestamp");
+  if (ind == -1) {
+    error("File header does not contain a mask for time.");
+  }
+  if (log->header.masks[ind].mask == mask) {
+    *is_particle = 0;
+    integertime_t int_time = 0;
+    offset = time_read(&int_time, time, reader, offset);
+  } else {
+    *is_particle = 1;
+    offset =
+        logger_particle_read(lp, reader, offset, *time, logger_reader_const);
+  }
+
+  return offset;
+}
diff --git a/logger/logger_reader.h b/logger/logger_reader.h
new file mode 100644
index 0000000000000000000000000000000000000000..124d271f57587a26dbfb59299678f0ce5cfbdf79
--- /dev/null
+++ b/logger/logger_reader.h
@@ -0,0 +1,81 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+/**
+ * @file logger_reader.h
+ * @brief This file contains the C functions shown to the external user.
+ *
+ * Here is a quick summary of our different elements:
+ *
+ * The logger is a time adaptive way to write snapshots.
+ * It consists of a set of files: the log file, the parameter file and the index
+ * files.
+ *
+ * The <b>parameter file</b> contains all the information related to the code
+ * (e.g. boxsize).
+ *
+ * The <b>index files</b> are not mandatory files that indicates the position of
+ * the particles in the log file at a given time step. They are useful to
+ * speedup the reading.
+ *
+ * The <b>log file</b> consists in a large file where the particles are logged
+ * one after the other. It contains a <b>log file header</b> at the beginning of
+ * the file and a large collection of <b>records</b>.
+ *
+ * The records are logged one after the other and each contains a <b>record
+ * header</b> and then a list of <b>named entries</b>. In the record header, a
+ * <b>mask</b> is provided that corresponds to the type of named entries present
+ * in this record. It also contains the <b>offset</b> to the previous or next
+ * record for this particle.
+ */
+
+#ifndef LOGGER_LOGGER_READER_H
+#define LOGGER_LOGGER_READER_H
+
+#include "logger_loader_io.h"
+#include "logger_logfile.h"
+#include "logger_particle.h"
+
+/**
+ * @brief Main structure of the logger.
+ *
+ * This structure contains all the variables required for the logger.
+ * It should be the only structure that the user see.
+ *
+ * It is initialized with #logger_reader_init and freed with
+ * #logger_reader_free.
+ */
+struct logger_reader {
+
+  /* Time of each index file. #TODO */
+  double *times;
+
+  /* Informations contained in the file header. */
+  struct logger_logfile log;
+
+  /* Level of verbosity. */
+  int verbose;
+};
+
+void logger_reader_init(struct logger_reader *reader, char *filename,
+                        int verbose);
+void logger_reader_free(struct logger_reader *reader);
+size_t reader_read_record(struct logger_reader *reader,
+                          struct logger_particle *lp, double *time,
+                          int *is_particle, size_t offset);
+#endif  // LOGGER_LOGGER_READER_H
diff --git a/logger/logger_time.c b/logger/logger_time.c
new file mode 100644
index 0000000000000000000000000000000000000000..d2c6ebc3f9e3171ba7fdec6c6a63eb23d7001df6
--- /dev/null
+++ b/logger/logger_time.c
@@ -0,0 +1,315 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "logger_time.h"
+#include "logger_loader_io.h"
+#include "logger_logfile.h"
+#include "logger_reader.h"
+
+/**
+ * @brief Check if enough space is available and increase it if required.
+ *
+ * @param t The #time_array.
+ */
+void time_array_ensure_size(struct time_array *t) {
+  /* Check if we still have some place. */
+  if (t->size < t->capacity) return;
+
+  /* Increase the size */
+  t->capacity *= 2;
+
+  /* Allocate the new array */
+  struct time_record *tmp = malloc(sizeof(struct time_record) * t->capacity);
+  if (tmp == NULL) error("Failed to allocate the time records.");
+
+  /* Copy the memory */
+  memcpy(tmp, t->records, sizeof(struct time_record) * t->size);
+
+  /* Cleanup the memory */
+  free(t->records);
+
+  /* Set the pointer to the new array */
+  t->records = tmp;
+}
+
+/**
+ * @brief Add an element to the #time_array.
+ *
+ * @param t The #time_array.
+ * @param int_time The time in integer.
+ * @param time The time in double.
+ * @param offset The offset of the record.
+ */
+void time_array_append(struct time_array *t, const integertime_t int_time,
+                       const double time, const size_t offset) {
+
+  /* Increase the available space if required */
+  time_array_ensure_size(t);
+
+  /* Copy the values */
+  t->records[t->size].time = time;
+  t->records[t->size].int_time = int_time;
+  t->records[t->size].offset = offset;
+
+  /* Increase the size used. */
+  t->size += 1;
+}
+
+/**
+ * @brief read a time record.
+ *
+ * @param int_time integer time read.
+ * @param time time read.
+ * @param reader The #logger_reader.
+ * @param offset position in the file.
+ *
+ */
+size_t time_read(integertime_t *int_time, double *time,
+                 const struct logger_reader *reader, size_t offset) {
+
+  /* Initialize variables. */
+  const struct header *h = &reader->log.header;
+  void *map = h->log->log.map;
+
+  size_t mask = 0;
+  size_t prev_offset = 0;
+  *int_time = 0;
+  *time = 0;
+
+  /* read record header. */
+  map = logger_loader_io_read_mask(h, map + offset, &mask, &prev_offset);
+
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* check if time mask is present in log file header. */
+  int ind = header_get_field_index(h, "timestamp");
+  if (ind == -1) error("File header does not contain a mask for time.");
+
+  /* check if reading a time record. */
+  if (h->masks[ind].mask != mask) error("Not a time record.");
+#endif
+
+  /* read the record. */
+  map =
+      logger_loader_io_read_data(map, sizeof(unsigned long long int), int_time);
+  map = logger_loader_io_read_data(map, sizeof(double), time);
+
+  return map - h->log->log.map;
+}
+
+/**
+ * @brief get offset of first time record
+ *
+ * @param h file #header
+ * @return offset of first time record
+ *
+ */
+size_t time_offset_first_record(const struct header *h) {
+
+  /* Initialize a few variables. */
+  size_t offset = h->offset_first_record;
+  void *map = h->log->log.map;
+
+  /* Check that the first record is really a time record. */
+  int i = header_get_field_index(h, "timestamp");
+
+  if (i == -1) error("Time mask not present in the log file header.");
+
+  size_t mask = 0;
+  logger_loader_io_read_mask(h, map + offset, &mask, NULL);
+
+  if (mask != h->masks[i].mask) error("Log file should begin by timestep.");
+
+  return h->offset_first_record;
+}
+
+/**
+ * @brief Initialize an empty time array.
+ *
+ * @param t #time_array to initialize.
+ */
+void time_array_init(struct time_array *t) {
+  /* Allocate the arrays */
+  t->records = malloc(sizeof(struct time_record) * LOGGER_TIME_INIT_SIZE);
+  if (t->records == NULL) error("Failed to initialize the time records.");
+
+  /* Initialize the sizes */
+  t->size = 0;
+  t->capacity = LOGGER_TIME_INIT_SIZE;
+}
+
+/**
+ * @brief Initialize a time array from a file.
+ *
+ * @param t #time_array to initialize.
+ * @param log The #logger_logfile.
+ */
+void time_array_populate(struct time_array *t, struct logger_logfile *log) {
+
+  /* Initialize a few variables. */
+  integertime_t int_time = 0;
+  double time = 0;
+
+  /* get file size. */
+  size_t file_size = log->log.file_size;
+
+  /* get first time stamp. */
+  size_t offset = time_offset_first_record(&log->header);
+  while (offset < file_size) {
+    /* read current time record and store it. */
+    size_t tmp_offset = offset;
+    time_read(&int_time, &time, log->reader, tmp_offset);
+    time_array_append(t, int_time, time, offset);
+
+    /* get next record. */
+    int test = tools_get_next_record(&log->header, log->log.map, &offset,
+                                     log->log.file_size);
+    if (test == -1) break;
+  }
+}
+
+/**
+ * @brief access the time of a given record (by its offset).
+ *
+ * @param t #time_array to access.
+ * @param offset offset of the record.
+ *
+ * @return integer time of the record.
+ */
+integertime_t time_array_get_integertime(struct time_array *t,
+                                         const size_t offset) {
+  size_t ind = time_array_get_index(t, offset);
+  return t->records[ind].int_time;
+}
+
+/**
+ * @brief access the time of a given record (by its offset).
+ *
+ * @param t #time_array to access.
+ * @param offset offset of the record.
+ *
+ * @return time of the record.
+ */
+double time_array_get_time(const struct time_array *t, const size_t offset) {
+  size_t ind = time_array_get_index(t, offset);
+  return t->records[ind].time;
+}
+
+/**
+ * @brief Find the index of the last time record written before a given offset.
+ *
+ * @param t #time_array to access.
+ * @param offset offset of the record.
+ *
+ * @return The index of the last time record.
+ */
+size_t time_array_get_index(const struct time_array *t, const size_t offset) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!t) error("NULL pointer.");
+
+  if (offset < t->records[0].offset || offset > t->records[t->size - 1].offset)
+    error("Offset outside of range.");
+#endif
+
+  /* left will contain the index at the end of the loop */
+  size_t left = 0;
+  size_t right = t->size - 1;
+
+  /* Find the time_array with the correct offset through a bisection method. */
+  while (left <= right) {
+    size_t center = (left + right) / 2;
+    const size_t offset_center = t->records[center].offset;
+
+    if (offset > offset_center) {
+      left = center + 1;
+    } else if (offset < offset_center) {
+      right = center - 1;
+    } else {
+      return center;
+    }
+  }
+
+  return right;
+}
+
+/**
+ * @brief free memory of a #time_array
+ *
+ * @param t #time_array to free
+ */
+void time_array_free(struct time_array *t) {
+  /* Free the arrays */
+  free(t->records);
+  t->records = NULL;
+
+  /* Reset the counters */
+  t->size = 0;
+  t->capacity = 0;
+}
+
+/**
+ * @brief print a #time_array
+ *
+ * @param t #time_array to print
+ */
+void time_array_print(const struct time_array *t) {
+  const size_t threshold = 4;
+
+  size_t n = t->size;
+  size_t up_threshold = n - threshold;
+
+  printf("Times (size %lu): [%lli (%g)", n, t->records[0].int_time,
+         t->records[0].time);
+
+  /* Loop over all elements. */
+  for (size_t i = 1; i < n; i++) {
+    /* Skip the times at the center of the array. */
+    if (i < threshold || i > up_threshold)
+      printf(", %lli (%g)", t->records[i].int_time, t->records[i].time);
+
+    if (i == threshold) printf(", ...");
+  }
+
+  printf("]\n");
+}
+
+/**
+ * @brief print a #time_array (offset)
+ *
+ * @param t #time_array to print
+ */
+void time_array_print_offset(const struct time_array *t) {
+  const size_t threshold = 4;
+
+  size_t n = t->size;
+  size_t up_threshold = n - threshold;
+
+  printf("Times (size %lu): [%lu", n, t->records[0].offset);
+
+  /* Loop over all elements. */
+  for (size_t i = 1; i < n; i++) {
+    /* Skip the offset in the middle of the array. */
+    if (i < threshold || i > up_threshold)
+      printf(", %lu", t->records[i].offset);
+
+    if (i == threshold) printf(", ...");
+  }
+
+  printf("]\n");
+}
diff --git a/logger/logger_time.h b/logger/logger_time.h
new file mode 100644
index 0000000000000000000000000000000000000000..b27abffb9c1b3aa02c82c1739d1206b43f3ac431
--- /dev/null
+++ b/logger/logger_time.h
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef LOGGER_LOGGER_TIMELINE_H
+#define LOGGER_LOGGER_TIMELINE_H
+
+#include "logger_header.h"
+#include "logger_tools.h"
+
+typedef int8_t timebin_t;
+typedef long long integertime_t;
+
+struct logger_reader;
+
+#define LOGGER_TIME_INIT_SIZE 1024
+
+/**
+ * @brief This structure contains all the information present in a time record.
+ */
+struct time_record {
+  /* Integertime of the records. */
+  integertime_t int_time;
+
+  /* Double time of the records. */
+  double time;
+
+  /* Offset in the file of the time records. */
+  size_t offset;
+};
+
+/**
+ * @brief This structure contains all the time record.
+ *
+ * In order to obtain easily the time step of a record,
+ * this structure is required. It contains all the time step
+ * with their integer time, double time and position in the file.
+ *
+ * This structure is initialized with #time_array_init and #time_array_populate,
+ * and freed with #time_array_free.
+ *
+ * The time step of an offset can be obtained with
+ * #time_array_get_integertime, #time_array_get_time and
+ * #time_array_get_index.
+ */
+struct time_array {
+
+  /* The complete list of time record */
+  struct time_record *records;
+
+  /* Number of element in the arrays. */
+  size_t size;
+
+  /* Maximum number of element available */
+  size_t capacity;
+};
+
+void time_array_append(struct time_array *t, const integertime_t int_time,
+                       const double time, const size_t offset);
+size_t time_read(integertime_t *int_time, double *time,
+                 const struct logger_reader *reader, size_t offset);
+
+void time_array_init(struct time_array *t);
+void time_array_populate(struct time_array *t, struct logger_logfile *log);
+
+integertime_t time_array_get_integertime(struct time_array *t,
+                                         const size_t offset);
+
+double time_array_get_time(const struct time_array *t, const size_t offset);
+
+size_t time_array_get_index(const struct time_array *t, const size_t offset);
+
+void time_array_free(struct time_array *t);
+
+void time_array_print(const struct time_array *t);
+
+void time_array_print_offset(const struct time_array *t);
+
+size_t time_offset_first_record(const struct header *h);
+
+#endif  // LOGGER_LOGGER_TIMELINE_H
diff --git a/logger/logger_tools.c b/logger/logger_tools.c
new file mode 100644
index 0000000000000000000000000000000000000000..a9a6ecfcb0acf72b11898d00fdfeff90fd70406d
--- /dev/null
+++ b/logger/logger_tools.c
@@ -0,0 +1,231 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "logger_tools.h"
+#include "logger_header.h"
+#include "logger_loader_io.h"
+#include "logger_reader.h"
+
+#include "logger_particle.h"
+
+#include <stdio.h>
+
+/**
+ * @brief get the offset of the next corresponding record.
+ *
+ * @param h #header structure of the file
+ * @param map file mapping
+ * @param offset In: initial offset, Out: offset of the next record
+ * @param file_size The file size.
+ *
+ * @return -1 if no next record, otherwise 0
+ */
+int tools_get_next_record(const struct header *h, void *map, size_t *offset,
+                          size_t file_size) {
+  if (header_is_forward(h))
+    return _tools_get_next_record_forward(h, map, offset);
+  if (header_is_backward(h))
+    return _tools_get_next_record_backward(h, map, offset, file_size);
+  else
+    error("Offsets are corrupted.");
+}
+
+/**
+ * @brief internal function of #tools_get_next_record. Should not be used
+ * outside.
+ *
+ * @param h #header structure of the file
+ * @param map file mapping
+ * @param offset (Out) offset of the next record
+ *
+ * @return error code, -1 if no next record
+ */
+int _tools_get_next_record_forward(const struct header *h, void *map,
+                                   size_t *offset) {
+  size_t diff_offset = 0;
+
+  /* Read the offset. */
+  map = logger_loader_io_read_mask(h, map + *offset, NULL, &diff_offset);
+
+  if (diff_offset == 0) return -1;
+
+  /* Set the absolute offset. */
+  *offset += diff_offset;
+  return 0;
+}
+
+/**
+ * @brief internal function of #tools_get_next_record. Should not be used (very
+ * slow)
+ *
+ * @param h #header structure of the file
+ * @param map file mapping
+ * @param offset In: initial offset, Out: offset of the next record
+ * @param file_size The file size.
+ *
+ * @return error code, -1 if no next record
+ */
+int _tools_get_next_record_backward(const struct header *h, void *map,
+                                    size_t *offset, size_t file_size) {
+#ifndef SWIFT_DEBUG_CHECKS
+  error("Should not be used, method too slow");
+#endif
+  size_t current_offset = *offset;
+  size_t record_header = LOGGER_MASK_SIZE + LOGGER_OFFSET_SIZE;
+
+  while (current_offset < file_size) {
+    size_t mask = 0;
+    size_t prev_offset;
+    logger_loader_io_read_mask(h, map + current_offset, &mask, &prev_offset);
+
+    prev_offset = current_offset - prev_offset - record_header;
+    if (*offset == prev_offset) {
+      *offset = current_offset - record_header;
+      return 0;
+    }
+
+    current_offset += header_get_record_size_from_mask(h, mask);
+  }
+
+  return -1;
+}
+
+/**
+ * @brief switch side offset.
+ *
+ * From current record, switch side of the offset of the previous one.
+ * @param h #header structure of the file.
+ * @param file_map file mapping.
+ * @param offset position of the record.
+ *
+ * @return position after the record.
+ */
+size_t tools_reverse_offset(const struct header *h, void *file_map,
+                            size_t offset) {
+  size_t mask = 0;
+  size_t prev_offset = 0;
+  const size_t cur_offset = offset;
+  void *map = file_map;
+
+  /* read mask + offset. */
+  map = logger_loader_io_read_mask(h, map + offset, &mask, &prev_offset);
+
+  /* write offset of zero (in case it is the last record). */
+  const size_t zero = 0;
+  map -= LOGGER_OFFSET_SIZE;
+  map = logger_loader_io_write_data(map, LOGGER_OFFSET_SIZE, &zero);
+
+  /* set offset after current record. */
+  map += header_get_record_size_from_mask(h, mask);
+  size_t after_current_record = (size_t)(map - file_map);
+
+  /* first records do not have a previous partner. */
+  if (prev_offset == cur_offset) return after_current_record;
+
+  if (prev_offset > cur_offset)
+    error("Unexpected offset: header %lu, current %lu.", prev_offset,
+          cur_offset);
+
+  /* modify previous offset. */
+  map = file_map + cur_offset - prev_offset + LOGGER_MASK_SIZE;
+  map = logger_loader_io_write_data(map, LOGGER_OFFSET_SIZE, &prev_offset);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  size_t prev_mask = 0;
+  map -= LOGGER_MASK_SIZE + LOGGER_OFFSET_SIZE;
+  logger_loader_io_read_mask(h, map, &prev_mask, NULL);
+
+  /* Check if we are not mixing time stamp and particles */
+  if ((prev_mask != 128 && mask == 128) || (prev_mask == 128 && mask != 128))
+    error("Unexpected mask: %lu, got %lu.", mask, prev_mask);
+
+#endif  // SWIFT_DEBUG_CHECKS
+
+  return after_current_record;
+}
+
+/**
+ * @brief debugging function checking the offset and the mask of a record.
+ *
+ * Compare the mask with the one pointed by the header.
+ * if the record is a particle, check the id too.
+ *
+ * @param reader The #logger_reader.
+ * @param offset position of the record.
+ *
+ * @return position after the record.
+ */
+size_t tools_check_record_consistency(const struct logger_reader *reader,
+                                      size_t offset) {
+#ifndef SWIFT_DEBUG_CHECKS
+  error("Should not check in non debug mode.");
+#endif
+
+  const struct header *h = &reader->log.header;
+  void *file_init = reader->log.log.map;
+  void *map = file_init + offset;
+
+  size_t mask;
+  size_t pointed_offset;
+
+  /* read mask + offset. */
+  map = logger_loader_io_read_mask(h, map, &mask, &pointed_offset);
+
+  /* get absolute offset. */
+  if (header_is_forward(h))
+    pointed_offset += offset;
+  else if (header_is_backward(h)) {
+    if (offset < pointed_offset)
+      error("Offset too large (%lu) at %lu with mask %lu.", pointed_offset,
+            offset, mask);
+    pointed_offset = offset - pointed_offset;
+  } else {
+    error("Offset are corrupted.");
+  }
+
+  /* set offset after current record. */
+  map += header_get_record_size_from_mask(h, mask);
+
+  if (pointed_offset == offset || pointed_offset == 0)
+    return (size_t)(map - file_init);
+
+  /* read mask of the pointed record. */
+  size_t pointed_mask = 0;
+  logger_loader_io_read_mask(h, file_init + pointed_offset, &pointed_mask,
+                             NULL);
+
+  /* check if not mixing time stamp and particles. */
+  if ((pointed_mask != 128 && mask == 128) ||
+      (pointed_mask == 128 && mask != 128))
+    error("Error in the offset (mask %lu at %lu != %lu at %lu).", mask, offset,
+          pointed_mask, pointed_offset);
+
+  if (pointed_mask == 128) return (size_t)(map - file_init);
+
+  struct logger_particle part;
+  logger_particle_read(&part, reader, offset, 0, logger_reader_const);
+
+  size_t id = part.id;
+  logger_particle_read(&part, reader, pointed_offset, 0, logger_reader_const);
+
+  if (id != part.id)
+    error("Offset wrong, id incorrect (%lu != %lu) at %lu.", id, part.id,
+          pointed_offset);
+
+  return (size_t)(map - file_init);
+}
diff --git a/logger/logger_tools.h b/logger/logger_tools.h
new file mode 100644
index 0000000000000000000000000000000000000000..21a59e42fca144a0381b15e8771ca14ceed46b33
--- /dev/null
+++ b/logger/logger_tools.h
@@ -0,0 +1,59 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+/**
+ * @brief This file contains functions that help to navigate in the logs.
+ */
+#ifndef LOGGER_LOGGER_TOOLS_H
+#define LOGGER_LOGGER_TOOLS_H
+
+#include "../config.h"
+
+/* Swift include */
+#include "../src/dimension.h"
+#include "../src/error.h"
+#include "../src/inline.h"
+#include "../src/logger.h"
+#include "../src/part_type.h"
+
+#ifdef HAVE_PYTHON
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <Python.h>
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define STRING_SIZE 200
+
+struct header;
+struct logger_reader;
+
+int tools_get_next_record(const struct header *h, void *map, size_t *offset,
+                          size_t file_size);
+int _tools_get_next_record_backward(const struct header *h, void *map,
+                                    size_t *offset, size_t file_size);
+int _tools_get_next_record_forward(const struct header *h, void *map,
+                                   size_t *offset);
+size_t tools_reverse_offset(const struct header *h, void *map, size_t offset);
+size_t tools_check_record_consistency(const struct logger_reader *reader,
+                                      size_t offset);
+
+#endif  // LOGGER_LOGGER_TOOLS_H
diff --git a/logger/python/reader_example.py b/logger/python/reader_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ace309c5b68b4fc4f1088b6206cd1ae3ccd69a5
--- /dev/null
+++ b/logger/python/reader_example.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+"""
+Read a logger file by using an index.
+Example: ./reader_example.py ../../examples/SedovBlast_3D/index.dump ../../examples/SedovBlast_3D/index_0005.hdf5
+"""
+import sys
+from h5py import File
+import numpy as np
+import matplotlib.pyplot as plt
+sys.path.append("../.libs/")
+
+import liblogger as logger
+
+# Get filenames
+if len(sys.argv) != 3:
+    print("WARNING missing arguments. Will use the default ones")
+    index = "../../examples/HydroTests/SedovBlast_3D/index_0002.hdf5"
+    dump = "../../examples/HydroTests/SedovBlast_3D/index.dump"
+else:
+    index = sys.argv[-1]
+    dump = sys.argv[-2]
+
+# constant
+offset_name = "PartType0/Offset"
+header = "Header"
+time_name = "Time Offset"
+
+# Read index file
+with File(index, "r") as f:
+    if offset_name not in f:
+        raise Exception("Unable to find the offset dataset")
+    offset = f[offset_name][:]
+
+    if header not in f:
+        raise Exception("Unable to find the header")
+    if time_name not in f[header].attrs:
+        raise Exception("Unable to find the time offset")
+    time_offset = f[header].attrs[time_name]
+
+# read dump
+data = logger.loadFromIndex(offset, dump, time_offset)
+
+# Compute distance from center
+pos = data["positions"]
+center = pos.mean()
+r2 = np.sum((pos - center)**2, axis=1)
+
+# plot entropy vs distance
+plt.plot(np.sqrt(r2), data["entropy"], '.')
+
+plt.xlim(0., 0.5)
+plt.ylim(-5, 50)
+plt.xlabel("Radius")
+plt.ylabel("Entropy")
+plt.show()
diff --git a/logger/tests/Makefile.am b/logger/tests/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..dd94462b8b98b0a089d0f959b81c603c29911a76
--- /dev/null
+++ b/logger/tests/Makefile.am
@@ -0,0 +1,37 @@
+# This file is part of SWIFT.
+# Copyright (c) 2019 loic.hausammann@epfl.ch.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Add the source directory and the non-standard paths to the included library headers to CFLAGS
+AM_CFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/logger $(HDF5_CPPFLAGS) $(GSL_INCS) $(FFTW_INCS)
+
+AM_LDFLAGS = ../../src/.libs/libswiftsim.a ../.libs/liblogger.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(GSL_LIBS) $(PROFILER_LIBS)
+
+# List of programs and scripts to run in the test suite
+TESTS = testLogfileHeader testLogfileReader testTimeArray
+
+# List of test programs to compile
+check_PROGRAMS = testLogfileHeader testLogfileReader testTimeArray
+
+# Rebuild tests when SWIFT is updated.
+$(check_PROGRAMS): ../../src/.libs/libswiftsim.a ../.libs/liblogger.a
+
+# Sources for the individual programs
+testLogfileHeader_SOURCES = testLogfileHeader.c
+testLogfileReader_SOURCES = testLogfileReader.c
+testTimeArray_SOURCES = testTimeArray.c
+
+# Files necessary for distribution
+EXTRA_DIST = testLogfileHeader.yml testLogfileReader.yml
diff --git a/logger/tests/testLogfileHeader.c b/logger/tests/testLogfileHeader.c
new file mode 100644
index 0000000000000000000000000000000000000000..0f2c8a5df7942d50cbb641b99e3173a05fe1d539
--- /dev/null
+++ b/logger/tests/testLogfileHeader.c
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "logger_header.h"
+#include "logger_logfile.h"
+#include "logger_reader.h"
+#include "swift.h"
+
+int main(int argc, char *argv[]) {
+
+  /*
+    First generate the file.
+  */
+
+  message("Generating the dump.");
+  /* Create required structures. */
+  struct logger_writer log;
+  struct swift_params params;
+  char filename[200] = "testLogfileHeader.yml";
+
+  /* Read parameters. */
+  parser_read_file(filename, &params);
+
+  /* Initialize the logger. */
+  logger_init(&log, &params);
+
+  /* get dump filename. */
+  char dump_filename[PARSER_MAX_LINE_SIZE];
+  strcpy(dump_filename, log.base_name);
+  strcat(dump_filename, ".dump");
+
+  /* Write file header. */
+  logger_write_file_header(&log);
+
+  /* clean memory. */
+  logger_free(&log);
+  /*
+    Then read the file.
+  */
+
+  message("Reading the header.");
+  /* Generate required structure for reading. */
+  struct logger_reader reader;
+  struct logger_logfile *logfile = &reader.log;
+  logfile->reader = &reader;
+
+  /* Set verbose level. */
+  reader.verbose = 1;
+
+  /* Read the header */
+  logger_logfile_init_from_file(logfile, dump_filename, &reader,
+                                /* only_header */ 1);
+  /*
+    Finally check everything.
+  */
+
+  struct header *h = &logfile->header;
+  message("Checking versions.");
+  assert(h->major_version == logger_major_version);
+  assert(h->minor_version == logger_minor_version);
+
+  message("Checking offset of first record");
+  assert(h->offset_first_record == logfile->log.file_size);
+
+  message("Checking number of masks");
+  assert(h->number_mask == logger_count_mask);
+
+  message("Checking masks");
+  for (int i = 0; i < logger_count_mask; i++) {
+    assert(logger_mask_data[i].size == h->masks[i].size);
+    assert(logger_mask_data[i].mask == h->masks[i].mask);
+    assert(strcmp(logger_mask_data[i].name, h->masks[i].name) == 0);
+  }
+
+  message("Checking offset direction");
+  assert(h->offset_direction == logger_offset_backward);
+
+  return 0;
+}
diff --git a/logger/tests/testLogfileHeader.yml b/logger/tests/testLogfileHeader.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b97c513fa9ee1c3d9816b54afed38f4124dc3957
--- /dev/null
+++ b/logger/tests/testLogfileHeader.yml
@@ -0,0 +1,6 @@
+# Parameter file for the tests
+Logger:
+  delta_step: 10
+  initial_buffer_size: 0.1 # in GB
+  buffer_scale: 10
+  basename: test_header
\ No newline at end of file
diff --git a/logger/tests/testLogfileReader.c b/logger/tests/testLogfileReader.c
new file mode 100644
index 0000000000000000000000000000000000000000..751c6b7d628fcd1191e8deba9135cddd8cd04bf8
--- /dev/null
+++ b/logger/tests/testLogfileReader.c
@@ -0,0 +1,311 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2019 Loic Hausammann (loic.hausammann@epfl.ch).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "logger_header.h"
+#include "logger_loader_io.h"
+#include "logger_particle.h"
+#include "logger_reader.h"
+#include "swift.h"
+
+#define number_parts 100
+/* Not all the fields are written at every step.
+ * Here we define how often a few fields are written.
+ */
+#define period_rho 2
+#define period_h 4
+
+/**
+ * @brief Initialize the particles.
+ *
+ * @param p The array of #part.
+ * @param xp The array of #xpart.
+ */
+void init_particles(struct part *p, struct xpart *xp) {
+  struct hydro_space hs;
+
+  for (int i = 0; i < number_parts; i++) {
+    /* Set internal energy. */
+    hydro_set_init_internal_energy(&p[i], 100);
+
+    /* Initialize particle. */
+    hydro_first_init_part(&p[i], &xp[i]);
+    hydro_init_part(&p[i], &hs);
+
+    for (int j = 0; j < 3; j++) {
+      p[i].x[j] = i;
+      p[i].v[j] = (j == 0) ? -1 : 0;
+      p[i].a_hydro[j] = (j == 1) ? 1e-2 : 0;
+    }
+    p[i].h = 15;
+    p[i].rho = 50;
+    p[i].id = i;
+    hydro_set_mass(&p[i], 1.5);
+    xp[i].logger_data.last_offset = 0;
+
+    /* Add time bin in order to skip particles. */
+    p[i].time_bin = (i % 10) + 1;
+  }
+}
+
+/** Provides a integer time given the step number.*/
+integertime_t get_integer_time(int step) { return step; }
+
+/** Provides a double time given the step number. */
+double get_double_time(int step) {
+  const double time_base = 1e-4;
+  return step * time_base;
+}
+
+/**
+ * @brief Write a few particles during multiple time steps.
+ *
+ * As only the logger is tested, there is no need to really
+ * evolve the particles.
+ */
+void write_particles(struct logger_writer *log, struct part *parts,
+                     struct xpart *xparts) {
+
+  const int number_steps = 100;
+
+  /* Loop over all the steps. */
+  for (int i = 0; i < number_steps; i++) {
+    integertime_t ti_int = get_integer_time(i);
+    double ti_double = get_double_time(i);
+
+    /* Mark the current time step in the particle logger file. */
+    logger_log_timestamp(log, ti_int, ti_double, &log->timestamp_offset);
+    /* Make sure that we have enough space in the particle logger file
+     * to store the particles in current time step. */
+    logger_ensure_size(log, number_parts, /* number gpart */ 0, 0);
+
+    /* Loop over all the particles. */
+    for (int j = 0; j < number_parts; j++) {
+
+      /* Skip some particles. */
+      if (i % parts[j].time_bin != 0) continue;
+
+      /* Write a time information to check that the correct particle is read. */
+      parts[j].x[0] = i;
+
+      /* Write this particle. */
+      unsigned int mask =
+          logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask |
+          logger_mask_data[logger_a].mask | logger_mask_data[logger_u].mask |
+          logger_mask_data[logger_consts].mask;
+
+      int number_particle_step = i / parts[j].time_bin;
+
+      if (number_particle_step % period_h == 0)
+        mask |= logger_mask_data[logger_h].mask;
+      if (number_particle_step % period_rho == 0)
+        mask |= logger_mask_data[logger_rho].mask;
+
+      logger_log_part(log, &parts[j], mask, &xparts[j].logger_data.last_offset);
+    }
+
+    // TODO write index files.
+  }
+
+  /* Mark the current time step in the particle logger file. */
+  integertime_t ti_int = get_integer_time(number_steps);
+  double ti_double = get_double_time(number_steps);
+  logger_log_timestamp(log, ti_int, ti_double, &log->timestamp_offset);
+}
+
+/** Count the number of active particles. */
+int get_number_active_particles(int step, struct part *p) {
+  int count = 0;
+  for (int i = 0; i < number_parts; i++) {
+    if (step % p[i].time_bin == 0) count += 1;
+  }
+  return count;
+}
+/**
+ * @brief Check that the reader contains the correct data
+ *
+ * @param reader The #logger_reader.
+ */
+void check_data(struct logger_reader *reader, struct part *parts,
+                struct xpart *xparts) {
+
+  /* No need to check the header, this is already done in testHeader.c */
+
+  /* Get required structures. */
+  struct logger_logfile *logfile = &reader->log;
+
+  struct logger_particle lp;
+  logger_particle_init(&lp);
+
+  /* Define a few variables */
+  double time = get_double_time(0);
+  int is_particle = 0;
+  int step = -1;
+
+  /* Number of particle found during this time step. */
+  int count = 0;
+  /* Set it to an impossible value in order to flag it. */
+  const size_t id_flag = 5 * number_parts;
+  size_t previous_id = id_flag;
+
+  /* Loop over each record. */
+  for (size_t offset = reader_read_record(reader, &lp, &time, &is_particle,
+                                          logfile->header.offset_first_record);
+       offset < logfile->log.file_size;
+       offset = reader_read_record(reader, &lp, &time, &is_particle, offset)) {
+
+    /* Do the particle case */
+    if (is_particle) {
+      count += 1;
+
+      /*
+        Check that we are really increasing the id in the logfile.
+        See the writing part to see that we are always increasing the id.
+      */
+      if (previous_id != id_flag && previous_id >= lp.id) {
+        error("Wrong particle found");
+        previous_id = lp.id;
+      }
+
+      /* Get the corresponding particle */
+      if (lp.id >= number_parts) error("Wrong id %zi", lp.id);
+
+      struct part *p = &parts[lp.id];
+
+      /* Check the record's data. */
+      for (int i = 0; i < 3; i++) {
+        /* in the first index, we are storing the step information. */
+        if (i == 0)
+          assert(step == lp.pos[i]);
+        else
+          assert(p->x[i] == lp.pos[i]);
+        assert(p->v[i] == lp.vel[i]);
+        assert(p->a_hydro[i] == lp.acc[i]);
+      }
+
+      assert(p->entropy == lp.entropy);
+      assert(p->mass == lp.mass);
+
+      /* Check optional fields. */
+      int number_steps = step / p->time_bin;
+      if (number_steps % period_h == 0) {
+        assert(p->h == lp.h);
+      } else {
+        assert(-1 == lp.h);
+      }
+      if (number_steps % period_rho == 0) {
+        assert(p->rho == lp.density);
+      } else {
+        assert(-1 == lp.density);
+      }
+    }
+    /* Time stamp case. */
+    else {
+
+      /* Check if we have the current amount of particles in previous step. */
+      if (step != -1 && count != get_number_active_particles(step, parts))
+        error(
+            "The reader did not find the correct number of particles during "
+            "step %i",
+            step);
+
+      step += 1;
+
+      /* Reset some variables. */
+      previous_id = id_flag;
+      count = 0;
+
+      /* Check the record's data. */
+      assert(time == get_double_time(step));
+    }
+  }
+}
+
+int main(int argc, char *argv[]) {
+
+  /*
+    First generate the file.
+  */
+
+  message("Generating the dump.");
+
+  /* Create required structures. */
+  struct logger_writer log;
+  struct swift_params params;
+  char filename[200] = "testLogfileReader.yml";
+
+  /* Read parameters. */
+  parser_read_file(filename, &params);
+
+  /* Initialize the particles. */
+  struct part *parts;
+  if ((parts = (struct part *)malloc(sizeof(struct part) * number_parts)) ==
+      NULL)
+    error("Failed to allocate particles array.");
+
+  struct xpart *xparts;
+  if ((xparts = (struct xpart *)malloc(sizeof(struct xpart) * number_parts)) ==
+      NULL)
+    error("Failed to allocate xparticles array.");
+
+  init_particles(parts, xparts);
+
+  /* Initialize the logger. */
+  logger_init(&log, &params);
+
+  /* get dump filename. */
+  char dump_filename[PARSER_MAX_LINE_SIZE];
+  message("%s", log.base_name);
+  strcpy(dump_filename, log.base_name);
+  strcat(dump_filename, ".dump");
+
+  /* Write file header. */
+  logger_write_file_header(&log);
+
+  /* Write particles. */
+  write_particles(&log, parts, xparts);
+
+  /* clean memory */
+  logger_free(&log);
+  /*
+    Then read the file.
+  */
+
+  message("Reading the header.");
+
+  /* Generate required structure for reading. */
+  struct logger_reader reader;
+
+  /* Set verbose level. */
+  reader.verbose = 1;
+
+  /* Read the header. */
+  logger_reader_init(&reader, dump_filename, /* verbose */ 1);
+
+  /*
+    Finally check everything.
+  */
+
+  check_data(&reader, parts, xparts);
+
+  /* Do some cleanup. */
+  free(parts);
+  free(xparts);
+
+  return 0;
+}
diff --git a/logger/tests/testLogfileReader.yml b/logger/tests/testLogfileReader.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ac5e2da909f1fe53cba052bbd24c5c3ce98dfed
--- /dev/null
+++ b/logger/tests/testLogfileReader.yml
@@ -0,0 +1,6 @@
+# Parameter file for the tests
+Logger:
+  delta_step: 10
+  initial_buffer_size: 0.01 # in GB
+  buffer_scale: 10
+  basename: test_reader
\ No newline at end of file
diff --git a/logger/tests/testTimeArray.c b/logger/tests/testTimeArray.c
new file mode 100644
index 0000000000000000000000000000000000000000..929a7124baa8ab05fd3452f87076d95c88c2f3b2
--- /dev/null
+++ b/logger/tests/testTimeArray.c
@@ -0,0 +1,78 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2019 Loic Hausammann (loic.hausammann@epfl.ch)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <stdlib.h>
+#include <time.h>
+#include "logger_time.h"
+
+#define NUMBER_OF_ELEMENT 10000
+#define TIME_BASE 0.04
+#define OFFSET_BASE 1000
+
+int main(int argc, char *argv[]) {
+
+  /* Check that we are really testing the reallocation */
+  if (NUMBER_OF_ELEMENT < LOGGER_TIME_INIT_SIZE) {
+    error("Not testing the reallocation.");
+  }
+
+  /* Fix the random seed in order to reproduce the results */
+  srand(100);
+
+  /* Initialize the time array */
+  struct time_array times;
+  time_array_init(&times);
+
+  /* Add elements */
+  for (size_t i = 0; i < NUMBER_OF_ELEMENT; i++) {
+    integertime_t int_time = i;
+    double time = i * TIME_BASE;
+    size_t offset = i * OFFSET_BASE;
+
+    time_array_append(&times, int_time, time, offset);
+  }
+
+  /* Check the elements */
+  for (size_t i = 0; i < NUMBER_OF_ELEMENT; i++) {
+    integertime_t int_time = i;
+    double time = i * TIME_BASE;
+    size_t offset = i * OFFSET_BASE;
+
+    /* Ensure that we can get the correct offset when looking
+       in between the records. */
+    int r = rand() % OFFSET_BASE;
+    size_t read_offset = offset + r;
+
+    /* The offset cannot be larger than the largest one */
+    if (i == NUMBER_OF_ELEMENT - 1) {
+      read_offset = offset;
+    }
+
+    /* Get the index from the offset */
+    size_t ind = time_array_get_index(&times, read_offset);
+
+    /* Check the values obtained */
+    assert(i == ind);
+    assert(int_time == times.records[ind].int_time);
+    assert(time == times.records[ind].time);
+    assert(offset == times.records[ind].offset);
+  }
+
+  return 0;
+}
diff --git a/src/Makefile.am b/src/Makefile.am
index 947fd1a82c487a514c7f1556fa0c1b469a408d8c..665aa4b24c94162fb8f772edd346f3c95a1d7ddb 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h partition_fixed_costs.h \
     clocks.h parser.h physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling_io.h cooling.h cooling_struct.h \
-    statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h entropy_floor.h \
+    statistics.h memswap.h cache.h runner_doiact_hydro_vec.h profiler.h entropy_floor.h \
     dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \
     gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \
     chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \
@@ -69,13 +69,18 @@ EAGLE_FEEDBACK_SOURCES += feedback/EAGLE/feedback.c
 endif
 
 # Common source files
-AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \
-    engine_marktasks.c engine_drift.c serial_io.c timers.c debug.c scheduler.c \
+AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_grav.c \
+    runner_doiact_stars.c runner_doiact_black_holes.c runner_ghost.c runner_recv.c \
+    runner_sort.c runner_drift.c runner_black_holes.c runner_time_integration.c \
+    runner_doiact_hydro_vec.c runner_others.c\
+    queue.c task.c cell.c engine.c engine_maketasks.c \
+    engine_marktasks.c engine_drift.c engine_unskip.c engine_collect_end_of_step.c \
+    engine_redistribute.c engine_fof.c serial_io.c timers.c debug.c scheduler.c \
     proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
     threadpool.c cooling.c star_formation.c \
-    statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \
+    statistics.c profiler.c dump.c logger.c \
     part_type.c xmf.c gravity_properties.c gravity.c \
     collectgroup.c hydro_space.c equation_of_state.c \
     chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \
@@ -85,8 +90,10 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
-		 gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h  \
-                 runner_doiact_nosort.h runner_doiact_stars.h runner_doiact_black_holes.h units.h intrinsics.h minmax.h \
+		 gravity_iact.h kernel_long_gravity.h vector.h cache.h \
+	         runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \
+                 runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \
+		 units.h intrinsics.h minmax.h \
                  kick.h timestep.h drift.h adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h \
                  dump.h logger.h sign.h logger_io.h timestep_limiter.h hashmap.h \
 		 gravity.h gravity_io.h gravity_cache.h \
diff --git a/src/cell.c b/src/cell.c
index 4b9746e92e31f2a5ae6c4b5d01ade8d83939e412..92f53d7ca8499457248baa6b1bd0fb86380e4159 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -2443,7 +2443,8 @@ void cell_activate_star_resort_tasks(struct cell *c, struct scheduler *s) {
 
   /* The resort tasks are at either the chosen depth or the super level,
    * whichever comes first. */
-  if (c->depth == engine_star_resort_task_depth || c->hydro.super == c) {
+  if ((c->depth == engine_star_resort_task_depth || c->hydro.super == c) &&
+      c->hydro.count > 0) {
     scheduler_activate(s, c->hydro.stars_resort);
   } else {
     for (int k = 0; k < 8; ++k) {
@@ -2478,6 +2479,50 @@ void cell_activate_star_formation_tasks(struct cell *c, struct scheduler *s) {
   cell_activate_star_resort_tasks(c, s);
 }
 
+/**
+ * @brief Recursively activate the hydro ghosts (and implicit links) in a cell
+ * hierarchy.
+ *
+ * @param c The #cell.
+ * @param s The #scheduler.
+ * @param e The #engine.
+ */
+void cell_recursively_activate_hydro_ghosts(struct cell *c, struct scheduler *s,
+                                            const struct engine *e) {
+  /* Early abort? */
+  if ((c->hydro.count == 0) || !cell_is_active_hydro(c, e)) return;
+
+  /* Is the ghost at this level? */
+  if (c->hydro.ghost != NULL) {
+    scheduler_activate(s, c->hydro.ghost);
+  } else {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!c->split)
+      error("Reached the leaf level without finding a hydro ghost!");
+#endif
+
+    /* Keep recursing */
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        cell_recursively_activate_hydro_ghosts(c->progeny[k], s, e);
+  }
+}
+
+/**
+ * @brief Activate the hydro ghosts (and implicit links) in a cell hierarchy.
+ *
+ * @param c The #cell.
+ * @param s The #scheduler.
+ * @param e The #engine.
+ */
+void cell_activate_hydro_ghosts(struct cell *c, struct scheduler *s,
+                                const struct engine *e) {
+  scheduler_activate(s, c->hydro.ghost_in);
+  scheduler_activate(s, c->hydro.ghost_out);
+  cell_recursively_activate_hydro_ghosts(c, s, e);
+}
+
 /**
  * @brief Recurse down in a cell hierarchy until the hydro.super level is
  * reached and activate the spart drift at that level.
@@ -2486,6 +2531,10 @@ void cell_activate_star_formation_tasks(struct cell *c, struct scheduler *s) {
  * @param s The #scheduler.
  */
 void cell_activate_super_spart_drifts(struct cell *c, struct scheduler *s) {
+
+  /* Early abort? */
+  if (c->hydro.count == 0) return;
+
   if (c == c->hydro.super) {
     cell_activate_drift_spart(c, s);
   } else {
@@ -3500,9 +3549,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
 
     if (c->hydro.extra_ghost != NULL)
       scheduler_activate(s, c->hydro.extra_ghost);
-    if (c->hydro.ghost_in != NULL) scheduler_activate(s, c->hydro.ghost_in);
-    if (c->hydro.ghost_out != NULL) scheduler_activate(s, c->hydro.ghost_out);
-    if (c->hydro.ghost != NULL) scheduler_activate(s, c->hydro.ghost);
+    if (c->hydro.ghost_in != NULL) cell_activate_hydro_ghosts(c, s, e);
     if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
     if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
     if (c->timestep != NULL) scheduler_activate(s, c->timestep);
diff --git a/src/cell.h b/src/cell.h
index 8067a3189818ab8738de848ea698fbf25c78ebba..10a3e2bddfebd907b8efaffa472f94c421e4966c 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -273,8 +273,10 @@ struct pcell_sf {
   } stars;
 };
 
-/** Bitmasks for the cell flags. Beware when adding flags that you don't exceed
-    the size of the flags variable in the struct cell. */
+/**
+ * @brief Bitmasks for the cell flags. Beware when adding flags that you don't
+ * exceed the size of the flags variable in the struct cell.
+ */
 enum cell_flags {
   cell_flag_split = (1UL << 0),
   cell_flag_do_hydro_drift = (1UL << 1),
@@ -289,7 +291,8 @@ enum cell_flags {
   cell_flag_do_stars_sub_drift = (1UL << 10),
   cell_flag_do_bh_drift = (1UL << 11),
   cell_flag_do_bh_sub_drift = (1UL << 12),
-  cell_flag_do_stars_resort = (1UL << 13)
+  cell_flag_do_stars_resort = (1UL << 13),
+  cell_flag_has_tasks = (1UL << 14),
 };
 
 /**
diff --git a/src/cooling/EAGLE/cooling_tables.c b/src/cooling/EAGLE/cooling_tables.c
index 4261e9ac0a6fee9f77c03afe22b7a9b66ade487d..1de3265df6298eeb955758e272c7e17afb64de00 100644
--- a/src/cooling/EAGLE/cooling_tables.c
+++ b/src/cooling/EAGLE/cooling_tables.c
@@ -293,7 +293,10 @@ void read_cooling_header(const char *fname,
     cooling->nH[i] = log10(cooling->nH[i]);
   }
 
-  /* Compute inverse of solar mass fractions */
+    /* Compute inverse of solar mass fractions */
+#if defined(__ICC)
+#pragma novector
+#endif
   for (int i = 0; i < N_SolarAbundances; ++i) {
     cooling->SolarAbundances_inv[i] = 1.f / cooling->SolarAbundances[i];
   }
diff --git a/src/engine.c b/src/engine.c
index 6784e8b271353eae2a238e5c7a90a9aeb9fc06db..1b5a409cdf0488a545448c8bbcec562d04a748bc 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -44,11 +44,6 @@
 #include <numa.h>
 #endif
 
-/* Load the profiler header, if needed. */
-#ifdef WITH_PROFILER
-#include <gperftools/profiler.h>
-#endif
-
 /* This object's header. */
 #include "engine.h"
 
@@ -72,7 +67,6 @@
 #include "logger.h"
 #include "logger_io.h"
 #include "map.h"
-#include "memswap.h"
 #include "memuse.h"
 #include "minmax.h"
 #include "outputlist.h"
@@ -133,22 +127,6 @@ int engine_current_step;
 extern int engine_max_parts_per_ghost;
 extern int engine_max_sparts_per_ghost;
 
-/**
- * @brief Data collected from the cells at the end of a time-step
- */
-struct end_of_step_data {
-
-  size_t updated, g_updated, s_updated, b_updated;
-  size_t inhibited, g_inhibited, s_inhibited, b_inhibited;
-  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
-  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
-  integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max;
-  integertime_t ti_black_holes_end_min, ti_black_holes_end_max,
-      ti_black_holes_beg_max;
-  struct engine *e;
-  struct star_formation_history sfh;
-};
-
 /**
  * @brief Link a density/force task to a cell.
  *
@@ -2777,544 +2755,6 @@ void engine_barrier(struct engine *e) {
   swift_barrier_wait(&e->run_barrier);
 }
 
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_hydro(struct cell *c,
-                                              const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_part) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* if (!c->split) error("Reached a leaf without finding a time-step task!
-     * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */
-    /* 		       c->depth, c->maxdepth, c->hydro.count, c->nodeID); */
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-
-  /* Local Star formation history properties */
-  struct star_formation_history sfh_updated;
-
-  /* Initialize the star formation structs */
-  star_formation_logger_init(&sfh_updated);
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->hydro.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_hydro(cp, e);
-
-      /* And update */
-      ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min);
-      ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max);
-      ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max);
-
-      updated += cp->hydro.updated;
-
-      /* Check if the cell is inactive and in that case reorder the SFH */
-      if (!cell_is_starting_hydro(cp, e)) {
-        star_formation_logger_log_inactive_cell(&cp->stars.sfh);
-      }
-
-      /* Add the star formation history in this cell to sfh_updated */
-      star_formation_logger_add(&sfh_updated, &cp->stars.sfh);
-
-      /* Collected, so clear for next time. */
-      cp->hydro.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->hydro.ti_end_min = ti_hydro_end_min;
-  c->hydro.ti_end_max = ti_hydro_end_max;
-  c->hydro.ti_beg_max = ti_hydro_beg_max;
-  c->hydro.updated = updated;
-  // c->hydro.inhibited = inhibited;
-
-  /* Store the star formation history in the parent cell */
-  star_formation_logger_add(&c->stars.sfh, &sfh_updated);
-}
-
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_grav(struct cell *c,
-                                             const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    //  if (!c->split) error("Reached a leaf without finding a time-step
-    //  task!");
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0,
-                ti_grav_beg_max = 0;
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->grav.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_grav(cp, e);
-
-      /* And update */
-      ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min);
-      ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max);
-      ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max);
-
-      updated += cp->grav.updated;
-
-      /* Collected, so clear for next time. */
-      cp->grav.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->grav.ti_end_min = ti_grav_end_min;
-  c->grav.ti_end_max = ti_grav_end_max;
-  c->grav.ti_beg_max = ti_grav_beg_max;
-  c->grav.updated = updated;
-}
-
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_stars(struct cell *c,
-                                              const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    // if (!c->split) error("Reached a leaf without finding a time-step task!");
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
-                ti_stars_beg_max = 0;
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->stars.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_stars(cp, e);
-
-      /* And update */
-      ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min);
-      ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max);
-      ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max);
-
-      updated += cp->stars.updated;
-
-      /* Collected, so clear for next time. */
-      cp->stars.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->stars.ti_end_min = ti_stars_end_min;
-  c->stars.ti_end_max = ti_stars_end_max;
-  c->stars.ti_beg_max = ti_stars_beg_max;
-  c->stars.updated = updated;
-}
-
-/**
- * @brief Recursive function gathering end-of-step data.
- *
- * We recurse until we encounter a timestep or time-step MPI recv task
- * as the values will have been set at that level. We then bring these
- * values upwards.
- *
- * @param c The #cell to recurse into.
- * @param e The #engine.
- */
-void engine_collect_end_of_step_recurse_black_holes(struct cell *c,
-                                                    const struct engine *e) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->timestep != NULL) return;
-#ifdef WITH_MPI
-  if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return;
-#endif /* WITH_MPI */
-
-#ifdef SWIFT_DEBUG_CHECKS
-    // if (!c->split) error("Reached a leaf without finding a time-step task!");
-#endif
-
-  /* Counters for the different quantities. */
-  size_t updated = 0;
-  integertime_t ti_black_holes_end_min = max_nr_timesteps,
-                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
-
-  /* Collect the values from the progeny. */
-  for (int k = 0; k < 8; k++) {
-    struct cell *cp = c->progeny[k];
-    if (cp != NULL && cp->black_holes.count > 0) {
-
-      /* Recurse */
-      engine_collect_end_of_step_recurse_black_holes(cp, e);
-
-      /* And update */
-      ti_black_holes_end_min =
-          min(ti_black_holes_end_min, cp->black_holes.ti_end_min);
-      ti_black_holes_end_max =
-          max(ti_black_holes_end_max, cp->black_holes.ti_end_max);
-      ti_black_holes_beg_max =
-          max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max);
-
-      updated += cp->black_holes.updated;
-
-      /* Collected, so clear for next time. */
-      cp->black_holes.updated = 0;
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->black_holes.ti_end_min = ti_black_holes_end_min;
-  c->black_holes.ti_end_max = ti_black_holes_end_max;
-  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
-  c->black_holes.updated = updated;
-}
-
-/**
- * @brief Mapping function to collect the data from the end of the step
- *
- * This function will call a recursive function on all the top-level cells
- * to collect the information we are after.
- *
- * @param map_data The list of cells with tasks on this node.
- * @param num_elements The number of elements in the list this thread will work
- * on.
- * @param extra_data The #engine.
- */
-void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
-                                       void *extra_data) {
-
-  struct end_of_step_data *data = (struct end_of_step_data *)extra_data;
-  const struct engine *e = data->e;
-  const int with_hydro = (e->policy & engine_policy_hydro);
-  const int with_self_grav = (e->policy & engine_policy_self_gravity);
-  const int with_ext_grav = (e->policy & engine_policy_external_gravity);
-  const int with_grav = (with_self_grav || with_ext_grav);
-  const int with_stars = (e->policy & engine_policy_stars);
-  const int with_black_holes = (e->policy & engine_policy_black_holes);
-  struct space *s = e->s;
-  int *local_cells = (int *)map_data;
-  struct star_formation_history *sfh_top = &data->sfh;
-
-  /* Local collectible */
-  size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
-                ti_gravity_beg_max = 0;
-  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
-                ti_stars_beg_max = 0;
-  integertime_t ti_black_holes_end_min = max_nr_timesteps,
-                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
-
-  /* Local Star formation history properties */
-  struct star_formation_history sfh_updated;
-
-  /* Initialize the star formation structs for this engine to zero */
-  star_formation_logger_init(&sfh_updated);
-
-  for (int ind = 0; ind < num_elements; ind++) {
-    struct cell *c = &s->cells_top[local_cells[ind]];
-
-    if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 ||
-        c->black_holes.count > 0) {
-
-      /* Make the top-cells recurse */
-      if (with_hydro) {
-        engine_collect_end_of_step_recurse_hydro(c, e);
-      }
-      if (with_grav) {
-        engine_collect_end_of_step_recurse_grav(c, e);
-      }
-      if (with_stars) {
-        engine_collect_end_of_step_recurse_stars(c, e);
-      }
-      if (with_black_holes) {
-        engine_collect_end_of_step_recurse_black_holes(c, e);
-      }
-
-      /* And aggregate */
-      if (c->hydro.ti_end_min > e->ti_current)
-        ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min);
-      ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max);
-      ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max);
-
-      if (c->grav.ti_end_min > e->ti_current)
-        ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min);
-      ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max);
-      ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max);
-
-      if (c->stars.ti_end_min > e->ti_current)
-        ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min);
-      ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max);
-      ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max);
-
-      if (c->black_holes.ti_end_min > e->ti_current)
-        ti_black_holes_end_min =
-            min(ti_black_holes_end_min, c->black_holes.ti_end_min);
-      ti_black_holes_end_max =
-          max(ti_black_holes_end_max, c->black_holes.ti_end_max);
-      ti_black_holes_beg_max =
-          max(ti_black_holes_beg_max, c->black_holes.ti_beg_max);
-
-      updated += c->hydro.updated;
-      g_updated += c->grav.updated;
-      s_updated += c->stars.updated;
-      b_updated += c->black_holes.updated;
-
-      /* Check if the cell is inactive and in that case reorder the SFH */
-      if (!cell_is_starting_hydro(c, e)) {
-        star_formation_logger_log_inactive_cell(&c->stars.sfh);
-      }
-
-      /* Get the star formation history from the current cell and store it in
-       * the star formation history struct */
-      star_formation_logger_add(&sfh_updated, &c->stars.sfh);
-
-      /* Collected, so clear for next time. */
-      c->hydro.updated = 0;
-      c->grav.updated = 0;
-      c->stars.updated = 0;
-      c->black_holes.updated = 0;
-    }
-  }
-
-  /* Let's write back to the global data.
-   * We use the space lock to garanty single access*/
-  if (lock_lock(&s->lock) == 0) {
-    data->updated += updated;
-    data->g_updated += g_updated;
-    data->s_updated += s_updated;
-    data->b_updated += b_updated;
-
-    /* Add the SFH information from this engine to the global data */
-    star_formation_logger_add(sfh_top, &sfh_updated);
-
-    if (ti_hydro_end_min > e->ti_current)
-      data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min);
-    data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max);
-    data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max);
-
-    if (ti_gravity_end_min > e->ti_current)
-      data->ti_gravity_end_min =
-          min(ti_gravity_end_min, data->ti_gravity_end_min);
-    data->ti_gravity_end_max =
-        max(ti_gravity_end_max, data->ti_gravity_end_max);
-    data->ti_gravity_beg_max =
-        max(ti_gravity_beg_max, data->ti_gravity_beg_max);
-
-    if (ti_stars_end_min > e->ti_current)
-      data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min);
-    data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max);
-    data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max);
-
-    if (ti_black_holes_end_min > e->ti_current)
-      data->ti_black_holes_end_min =
-          min(ti_black_holes_end_min, data->ti_black_holes_end_min);
-    data->ti_black_holes_end_max =
-        max(ti_black_holes_end_max, data->ti_black_holes_end_max);
-    data->ti_black_holes_beg_max =
-        max(ti_black_holes_beg_max, data->ti_black_holes_beg_max);
-  }
-
-  if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
-}
-
-/**
- * @brief Collects the next time-step and rebuild flag.
- *
- * The next time-step is determined by making each super-cell recurse to
- * collect the minimal of ti_end and the number of updated particles.  When in
- * MPI mode this routines reduces these across all nodes and also collects the
- * forcerebuild flag -- this is so that we only use a single collective MPI
- * call per step for all these values.
- *
- * Note that the results are stored in e->collect_group1 struct not in the
- * engine fields, unless apply is true. These can be applied field-by-field
- * or all at once using collectgroup1_copy();
- *
- * @param e The #engine.
- * @param apply whether to apply the results to the engine or just keep in the
- *              group1 struct.
- */
-void engine_collect_end_of_step(struct engine *e, int apply) {
-
-  const ticks tic = getticks();
-  struct space *s = e->s;
-  struct end_of_step_data data;
-  data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0;
-  data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0,
-  data.ti_hydro_beg_max = 0;
-  data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0,
-  data.ti_gravity_beg_max = 0;
-  data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0,
-  data.ti_stars_beg_max = 0;
-  data.ti_black_holes_end_min = max_nr_timesteps,
-  data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0;
-  data.e = e;
-
-  /* Initialize the total SFH of the simulation to zero */
-  star_formation_logger_init(&data.sfh);
-
-  /* Collect information from the local top-level cells */
-  threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper,
-                 s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks,
-                 sizeof(int), 0, &data);
-
-  /* Get the number of inhibited particles from the space-wide counters
-   * since these have been updated atomically during the time-steps. */
-  data.inhibited = s->nr_inhibited_parts;
-  data.g_inhibited = s->nr_inhibited_gparts;
-  data.s_inhibited = s->nr_inhibited_sparts;
-  data.b_inhibited = s->nr_inhibited_bparts;
-
-  /* Store these in the temporary collection group. */
-  collectgroup1_init(
-      &e->collect_group1, data.updated, data.g_updated, data.s_updated,
-      data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited,
-      data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max,
-      data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max,
-      data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max,
-      data.ti_stars_beg_max, data.ti_black_holes_end_min,
-      data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild,
-      e->s->tot_cells, e->sched.nr_tasks,
-      (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh);
-
-/* Aggregate collective data from the different nodes for this step. */
-#ifdef WITH_MPI
-  collectgroup1_reduce(&e->collect_group1);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  {
-    /* Check the above using the original MPI calls. */
-    integertime_t in_i[2], out_i[2];
-    in_i[0] = 0;
-    in_i[1] = 0;
-    out_i[0] = data.ti_hydro_end_min;
-    out_i[1] = data.ti_gravity_end_min;
-    if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate ti_end_min.");
-    if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min)
-      error("Failed to get same ti_hydro_end_min, is %lld, should be %lld",
-            in_i[0], e->collect_group1.ti_hydro_end_min);
-    if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min)
-      error("Failed to get same ti_gravity_end_min, is %lld, should be %lld",
-            in_i[1], e->collect_group1.ti_gravity_end_min);
-
-    long long in_ll[4], out_ll[4];
-    out_ll[0] = data.updated;
-    out_ll[1] = data.g_updated;
-    out_ll[2] = data.s_updated;
-    out_ll[3] = data.b_updated;
-    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate particle counts.");
-    if (in_ll[0] != (long long)e->collect_group1.updated)
-      error("Failed to get same updated, is %lld, should be %lld", in_ll[0],
-            e->collect_group1.updated);
-    if (in_ll[1] != (long long)e->collect_group1.g_updated)
-      error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1],
-            e->collect_group1.g_updated);
-    if (in_ll[2] != (long long)e->collect_group1.s_updated)
-      error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2],
-            e->collect_group1.s_updated);
-    if (in_ll[3] != (long long)e->collect_group1.b_updated)
-      error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3],
-            e->collect_group1.b_updated);
-
-    out_ll[0] = data.inhibited;
-    out_ll[1] = data.g_inhibited;
-    out_ll[2] = data.s_inhibited;
-    out_ll[3] = data.b_inhibited;
-    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate particle counts.");
-    if (in_ll[0] != (long long)e->collect_group1.inhibited)
-      error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0],
-            e->collect_group1.inhibited);
-    if (in_ll[1] != (long long)e->collect_group1.g_inhibited)
-      error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1],
-            e->collect_group1.g_inhibited);
-    if (in_ll[2] != (long long)e->collect_group1.s_inhibited)
-      error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2],
-            e->collect_group1.s_inhibited);
-    if (in_ll[3] != (long long)e->collect_group1.b_inhibited)
-      error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3],
-            e->collect_group1.b_inhibited);
-
-    int buff = 0;
-    if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate the rebuild flag across nodes.");
-    if (!!buff != !!e->collect_group1.forcerebuild)
-      error(
-          "Failed to get same rebuild flag from all nodes, is %d,"
-          "should be %d",
-          buff, e->collect_group1.forcerebuild);
-  }
-#endif
-#endif
-
-  /* Apply to the engine, if requested. */
-  if (apply) collectgroup1_apply(&e->collect_group1, e);
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-}
-
 /**
  * @brief Print the conserved quantities statistics to a log file
  *
@@ -3642,7 +3082,6 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
 #endif
 
   scheduler_write_dependencies(&e->sched, e->verbose);
-  space_write_cell_hierarchy(e->s);
   if (e->nodeID == 0) scheduler_write_task_level(&e->sched);
 
   /* Run the 0th time-step */
@@ -4217,64 +3656,6 @@ int engine_is_done(struct engine *e) {
   return !(e->ti_current < max_nr_timesteps);
 }
 
-/**
- * @brief Unskip all the tasks that act on active cells at this time.
- *
- * @param e The #engine.
- */
-void engine_unskip(struct engine *e) {
-
-  const ticks tic = getticks();
-  struct space *s = e->s;
-  const int nodeID = e->nodeID;
-
-  const int with_hydro = e->policy & engine_policy_hydro;
-  const int with_self_grav = e->policy & engine_policy_self_gravity;
-  const int with_ext_grav = e->policy & engine_policy_external_gravity;
-  const int with_stars = e->policy & engine_policy_stars;
-  const int with_feedback = e->policy & engine_policy_feedback;
-  const int with_black_holes = e->policy & engine_policy_black_holes;
-
-#ifdef WITH_PROFILER
-  static int count = 0;
-  char filename[100];
-  sprintf(filename, "/tmp/swift_runner_do_usnkip_mapper_%06i.prof", count++);
-  ProfilerStart(filename);
-#endif  // WITH_PROFILER
-
-  /* Move the active local cells to the top of the list. */
-  int *local_cells = e->s->local_cells_with_tasks_top;
-  int num_active_cells = 0;
-  for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
-    struct cell *c = &s->cells_top[local_cells[k]];
-
-    if ((with_hydro && cell_is_active_hydro(c, e)) ||
-        (with_self_grav && cell_is_active_gravity(c, e)) ||
-        (with_ext_grav && c->nodeID == nodeID &&
-         cell_is_active_gravity(c, e)) ||
-        (with_feedback && cell_is_active_stars(c, e)) ||
-        (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) ||
-        (with_black_holes && cell_is_active_black_holes(c, e))) {
-
-      if (num_active_cells != k)
-        memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
-      num_active_cells += 1;
-    }
-  }
-
-  /* Activate all the regular tasks */
-  threadpool_map(&e->threadpool, runner_do_unskip_mapper, local_cells,
-                 num_active_cells, sizeof(int), 1, e);
-
-#ifdef WITH_PROFILER
-  ProfilerStop();
-#endif  // WITH_PROFILER
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-}
-
 void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements,
                                              void *extra_data) {
 
@@ -4877,7 +4258,7 @@ void engine_dump_snapshot(struct engine *e) {
  */
 void engine_dump_index(struct engine *e) {
 
-#if defined(WITH_LOGGER)
+#if defined(WITH_LOGGER) && !defined(WITH_MPI)
   struct clocks_time time1, time2;
   clocks_gettime(&time1);
 
@@ -5094,7 +4475,7 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params,
   e->total_nr_tasks = 0;
 
 #if defined(WITH_LOGGER)
-  e->logger = (struct logger *)malloc(sizeof(struct logger));
+  e->logger = (struct logger_writer *)malloc(sizeof(struct logger_writer));
   logger_init(e->logger, params);
 #endif
 
@@ -5842,7 +5223,7 @@ void engine_config(int restart, int fof, struct engine *e,
 
 #ifdef WITH_LOGGER
   /* Write the particle logger header */
-  logger_write_file_header(e->logger, e);
+  logger_write_file_header(e->logger);
 #endif
 
   /* Initialise the structure finder */
@@ -6360,7 +5741,7 @@ void engine_clean(struct engine *e, const int fof) {
 
   swift_free("links", e->links);
 #if defined(WITH_LOGGER)
-  logger_clean(e->logger);
+  logger_free(e->logger);
   free(e->logger);
 #endif
   scheduler_clean(&e->sched);
@@ -6577,127 +5958,3 @@ void engine_struct_restore(struct engine *e, FILE *stream) {
   e->forcerebuild = 1;
   e->forcerepart = 0;
 }
-
-/**
- * @brief Activate all the #gpart communications in preparation
- * fof a call to FOF.
- *
- * @param e The #engine to act on.
- */
-void engine_activate_gpart_comms(struct engine *e) {
-
-#ifdef WITH_MPI
-
-  const ticks tic = getticks();
-
-  struct scheduler *s = &e->sched;
-  const int nr_tasks = s->nr_tasks;
-  struct task *tasks = s->tasks;
-
-  for (int k = 0; k < nr_tasks; ++k) {
-
-    struct task *t = &tasks[k];
-
-    if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) {
-      scheduler_activate(s, t);
-    } else if ((t->type == task_type_recv) &&
-               (t->subtype == task_subtype_gpart)) {
-      scheduler_activate(s, t);
-    } else {
-      t->skip = 1;
-    }
-  }
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-
-#else
-  error("Calling an MPI function in non-MPI mode.");
-#endif
-}
-
-/**
- * @brief Activate all the FOF tasks.
- *
- * Marks all the other task types to be skipped.
- *
- * @param e The #engine to act on.
- */
-void engine_activate_fof_tasks(struct engine *e) {
-
-  const ticks tic = getticks();
-
-  struct scheduler *s = &e->sched;
-  const int nr_tasks = s->nr_tasks;
-  struct task *tasks = s->tasks;
-
-  for (int k = 0; k < nr_tasks; k++) {
-
-    struct task *t = &tasks[k];
-
-    if (t->type == task_type_fof_self || t->type == task_type_fof_pair)
-      scheduler_activate(s, t);
-    else
-      t->skip = 1;
-  }
-
-  if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
-}
-
-/**
- * @brief Run a FOF search.
- *
- * @param e the engine
- * @param dump_results Are we writing group catalogues to output files?
- * @param seed_black_holes Are we seeding black holes?
- */
-void engine_fof(struct engine *e, const int dump_results,
-                const int seed_black_holes) {
-
-#ifdef WITH_FOF
-
-  ticks tic = getticks();
-
-  /* Compute number of DM particles */
-  const long long total_nr_baryons =
-      e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts;
-  const long long total_nr_dmparts =
-      e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons;
-
-  /* Initialise FOF parameters and allocate FOF arrays. */
-  fof_allocate(e->s, total_nr_dmparts, e->fof_properties);
-
-  /* Make FOF tasks */
-  engine_make_fof_tasks(e);
-
-  /* and activate them. */
-  engine_activate_fof_tasks(e);
-
-  /* Perform local FOF tasks. */
-  engine_launch(e);
-
-  /* Perform FOF search over foreign particles and
-   * find groups which require black hole seeding.  */
-  fof_search_tree(e->fof_properties, e->black_holes_properties,
-                  e->physical_constants, e->cosmology, e->s, dump_results,
-                  seed_black_holes);
-
-  /* Reset flag. */
-  e->run_fof = 0;
-
-  /* Flag that a FOF has taken place */
-  e->step_props |= engine_step_prop_fof;
-
-  /* ... and find the next FOF time */
-  if (seed_black_holes) engine_compute_next_fof_time(e);
-
-  if (engine_rank == 0)
-    message("Complete FOF search took: %.3f %s.",
-            clocks_from_ticks(getticks() - tic), clocks_getunit());
-#else
-  error("SWIFT was not compiled with FOF enabled!");
-#endif
-}
diff --git a/src/engine.h b/src/engine.h
index d7da2942dac7c03dba9d66ea1499e8a4fd2202d9..68a4df10c08325d5d810b361dab863bf9ee68ea6 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -383,7 +383,7 @@ struct engine {
   struct repartition *reparttype;
 
 #ifdef WITH_LOGGER
-  struct logger *logger;
+  struct logger_writer *logger;
 #endif
 
   /* How many steps have we done with the same set of tasks? */
@@ -494,6 +494,7 @@ void engine_reconstruct_multipoles(struct engine *e);
 void engine_allocate_foreign_particles(struct engine *e);
 void engine_print_stats(struct engine *e);
 void engine_check_for_dumps(struct engine *e);
+void engine_collect_end_of_step(struct engine *e, int apply);
 void engine_dump_snapshot(struct engine *e);
 void engine_init_output_lists(struct engine *e, struct swift_params *params);
 void engine_init(struct engine *e, struct space *s, struct swift_params *params,
diff --git a/src/engine_collect_end_of_step.c b/src/engine_collect_end_of_step.c
new file mode 100644
index 0000000000000000000000000000000000000000..ec02acfefdf65aca13d44a7cf90d48f31b99778f
--- /dev/null
+++ b/src/engine_collect_end_of_step.c
@@ -0,0 +1,584 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/* Local headers. */
+#include "active.h"
+#include "timeline.h"
+
+/**
+ * @brief Data collected from the cells at the end of a time-step
+ */
+struct end_of_step_data {
+
+  size_t updated, g_updated, s_updated, b_updated;
+  size_t inhibited, g_inhibited, s_inhibited, b_inhibited;
+  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
+  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
+  integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max;
+  integertime_t ti_black_holes_end_min, ti_black_holes_end_max,
+      ti_black_holes_beg_max;
+  struct engine *e;
+  struct star_formation_history sfh;
+};
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_hydro(struct cell *c,
+                                              const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_part) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* if (!c->split) error("Reached a leaf without finding a time-step task!
+     * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */
+    /* 		       c->depth, c->maxdepth, c->hydro.count, c->nodeID); */
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+
+  /* Local Star formation history properties */
+  struct star_formation_history sfh_updated;
+
+  /* Initialize the star formation structs */
+  star_formation_logger_init(&sfh_updated);
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->hydro.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_hydro(cp, e);
+
+      /* And update */
+      ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min);
+      ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max);
+      ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max);
+
+      updated += cp->hydro.updated;
+
+      /* Check if the cell is inactive and in that case reorder the SFH */
+      if (!cell_is_starting_hydro(cp, e)) {
+        star_formation_logger_log_inactive_cell(&cp->stars.sfh);
+      }
+
+      /* Add the star formation history in this cell to sfh_updated */
+      star_formation_logger_add(&sfh_updated, &cp->stars.sfh);
+
+      /* Collected, so clear for next time. */
+      cp->hydro.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->hydro.ti_end_min = ti_hydro_end_min;
+  c->hydro.ti_end_max = ti_hydro_end_max;
+  c->hydro.ti_beg_max = ti_hydro_beg_max;
+  c->hydro.updated = updated;
+  // c->hydro.inhibited = inhibited;
+
+  /* Store the star formation history in the parent cell */
+  star_formation_logger_add(&c->stars.sfh, &sfh_updated);
+}
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_grav(struct cell *c,
+                                             const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    //  if (!c->split) error("Reached a leaf without finding a time-step
+    //  task!");
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0,
+                ti_grav_beg_max = 0;
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->grav.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_grav(cp, e);
+
+      /* And update */
+      ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min);
+      ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max);
+      ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max);
+
+      updated += cp->grav.updated;
+
+      /* Collected, so clear for next time. */
+      cp->grav.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->grav.ti_end_min = ti_grav_end_min;
+  c->grav.ti_end_max = ti_grav_end_max;
+  c->grav.ti_beg_max = ti_grav_beg_max;
+  c->grav.updated = updated;
+}
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_stars(struct cell *c,
+                                              const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    // if (!c->split) error("Reached a leaf without finding a time-step task!");
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
+                ti_stars_beg_max = 0;
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->stars.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_stars(cp, e);
+
+      /* And update */
+      ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min);
+      ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max);
+      ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max);
+
+      updated += cp->stars.updated;
+
+      /* Collected, so clear for next time. */
+      cp->stars.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->stars.ti_end_min = ti_stars_end_min;
+  c->stars.ti_end_max = ti_stars_end_max;
+  c->stars.ti_beg_max = ti_stars_beg_max;
+  c->stars.updated = updated;
+}
+
+/**
+ * @brief Recursive function gathering end-of-step data.
+ *
+ * We recurse until we encounter a timestep or time-step MPI recv task
+ * as the values will have been set at that level. We then bring these
+ * values upwards.
+ *
+ * @param c The #cell to recurse into.
+ * @param e The #engine.
+ */
+void engine_collect_end_of_step_recurse_black_holes(struct cell *c,
+                                                    const struct engine *e) {
+
+  /* Skip super-cells (Their values are already set) */
+  if (c->timestep != NULL) return;
+#ifdef WITH_MPI
+  if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return;
+#endif /* WITH_MPI */
+
+#ifdef SWIFT_DEBUG_CHECKS
+    // if (!c->split) error("Reached a leaf without finding a time-step task!");
+#endif
+
+  /* Counters for the different quantities. */
+  size_t updated = 0;
+  integertime_t ti_black_holes_end_min = max_nr_timesteps,
+                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
+
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && cp->black_holes.count > 0) {
+
+      /* Recurse */
+      engine_collect_end_of_step_recurse_black_holes(cp, e);
+
+      /* And update */
+      ti_black_holes_end_min =
+          min(ti_black_holes_end_min, cp->black_holes.ti_end_min);
+      ti_black_holes_end_max =
+          max(ti_black_holes_end_max, cp->black_holes.ti_end_max);
+      ti_black_holes_beg_max =
+          max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max);
+
+      updated += cp->black_holes.updated;
+
+      /* Collected, so clear for next time. */
+      cp->black_holes.updated = 0;
+    }
+  }
+
+  /* Store the collected values in the cell. */
+  c->black_holes.ti_end_min = ti_black_holes_end_min;
+  c->black_holes.ti_end_max = ti_black_holes_end_max;
+  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
+  c->black_holes.updated = updated;
+}
+
+/**
+ * @brief Mapping function to collect the data from the end of the step
+ *
+ * This function will call a recursive function on all the top-level cells
+ * to collect the information we are after.
+ *
+ * @param map_data The list of cells with tasks on this node.
+ * @param num_elements The number of elements in the list this thread will work
+ * on.
+ * @param extra_data The #engine.
+ */
+void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
+                                       void *extra_data) {
+
+  struct end_of_step_data *data = (struct end_of_step_data *)extra_data;
+  const struct engine *e = data->e;
+  const int with_hydro = (e->policy & engine_policy_hydro);
+  const int with_self_grav = (e->policy & engine_policy_self_gravity);
+  const int with_ext_grav = (e->policy & engine_policy_external_gravity);
+  const int with_grav = (with_self_grav || with_ext_grav);
+  const int with_stars = (e->policy & engine_policy_stars);
+  const int with_black_holes = (e->policy & engine_policy_black_holes);
+  struct space *s = e->s;
+  int *local_cells = (int *)map_data;
+  struct star_formation_history *sfh_top = &data->sfh;
+
+  /* Local collectible */
+  size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
+                ti_stars_beg_max = 0;
+  integertime_t ti_black_holes_end_min = max_nr_timesteps,
+                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
+
+  /* Local Star formation history properties */
+  struct star_formation_history sfh_updated;
+
+  /* Initialize the star formation structs for this engine to zero */
+  star_formation_logger_init(&sfh_updated);
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct cell *c = &s->cells_top[local_cells[ind]];
+
+    if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 ||
+        c->black_holes.count > 0) {
+
+      /* Make the top-cells recurse */
+      if (with_hydro) {
+        engine_collect_end_of_step_recurse_hydro(c, e);
+      }
+      if (with_grav) {
+        engine_collect_end_of_step_recurse_grav(c, e);
+      }
+      if (with_stars) {
+        engine_collect_end_of_step_recurse_stars(c, e);
+      }
+      if (with_black_holes) {
+        engine_collect_end_of_step_recurse_black_holes(c, e);
+      }
+
+      /* And aggregate */
+      if (c->hydro.ti_end_min > e->ti_current)
+        ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min);
+      ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max);
+      ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max);
+
+      if (c->grav.ti_end_min > e->ti_current)
+        ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min);
+      ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max);
+      ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max);
+
+      if (c->stars.ti_end_min > e->ti_current)
+        ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min);
+      ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max);
+      ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max);
+
+      if (c->black_holes.ti_end_min > e->ti_current)
+        ti_black_holes_end_min =
+            min(ti_black_holes_end_min, c->black_holes.ti_end_min);
+      ti_black_holes_end_max =
+          max(ti_black_holes_end_max, c->black_holes.ti_end_max);
+      ti_black_holes_beg_max =
+          max(ti_black_holes_beg_max, c->black_holes.ti_beg_max);
+
+      updated += c->hydro.updated;
+      g_updated += c->grav.updated;
+      s_updated += c->stars.updated;
+      b_updated += c->black_holes.updated;
+
+      /* Check if the cell is inactive and in that case reorder the SFH */
+      if (!cell_is_starting_hydro(c, e)) {
+        star_formation_logger_log_inactive_cell(&c->stars.sfh);
+      }
+
+      /* Get the star formation history from the current cell and store it in
+       * the star formation history struct */
+      star_formation_logger_add(&sfh_updated, &c->stars.sfh);
+
+      /* Collected, so clear for next time. */
+      c->hydro.updated = 0;
+      c->grav.updated = 0;
+      c->stars.updated = 0;
+      c->black_holes.updated = 0;
+    }
+  }
+
+  /* Let's write back to the global data.
+   * We use the space lock to garanty single access*/
+  if (lock_lock(&s->lock) == 0) {
+    data->updated += updated;
+    data->g_updated += g_updated;
+    data->s_updated += s_updated;
+    data->b_updated += b_updated;
+
+    /* Add the SFH information from this engine to the global data */
+    star_formation_logger_add(sfh_top, &sfh_updated);
+
+    if (ti_hydro_end_min > e->ti_current)
+      data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min);
+    data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max);
+    data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max);
+
+    if (ti_gravity_end_min > e->ti_current)
+      data->ti_gravity_end_min =
+          min(ti_gravity_end_min, data->ti_gravity_end_min);
+    data->ti_gravity_end_max =
+        max(ti_gravity_end_max, data->ti_gravity_end_max);
+    data->ti_gravity_beg_max =
+        max(ti_gravity_beg_max, data->ti_gravity_beg_max);
+
+    if (ti_stars_end_min > e->ti_current)
+      data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min);
+    data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max);
+    data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max);
+
+    if (ti_black_holes_end_min > e->ti_current)
+      data->ti_black_holes_end_min =
+          min(ti_black_holes_end_min, data->ti_black_holes_end_min);
+    data->ti_black_holes_end_max =
+        max(ti_black_holes_end_max, data->ti_black_holes_end_max);
+    data->ti_black_holes_beg_max =
+        max(ti_black_holes_beg_max, data->ti_black_holes_beg_max);
+  }
+
+  if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
+}
+
+/**
+ * @brief Collects the next time-step and rebuild flag.
+ *
+ * The next time-step is determined by making each super-cell recurse to
+ * collect the minimal of ti_end and the number of updated particles.  When in
+ * MPI mode this routines reduces these across all nodes and also collects the
+ * forcerebuild flag -- this is so that we only use a single collective MPI
+ * call per step for all these values.
+ *
+ * Note that the results are stored in e->collect_group1 struct not in the
+ * engine fields, unless apply is true. These can be applied field-by-field
+ * or all at once using collectgroup1_copy();
+ *
+ * @param e The #engine.
+ * @param apply whether to apply the results to the engine or just keep in the
+ *              group1 struct.
+ */
+void engine_collect_end_of_step(struct engine *e, int apply) {
+
+  const ticks tic = getticks();
+  struct space *s = e->s;
+  struct end_of_step_data data;
+  data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0;
+  data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0,
+  data.ti_hydro_beg_max = 0;
+  data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0,
+  data.ti_gravity_beg_max = 0;
+  data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0,
+  data.ti_stars_beg_max = 0;
+  data.ti_black_holes_end_min = max_nr_timesteps,
+  data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0;
+  data.e = e;
+
+  /* Initialize the total SFH of the simulation to zero */
+  star_formation_logger_init(&data.sfh);
+
+  /* Collect information from the local top-level cells */
+  threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper,
+                 s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks,
+                 sizeof(int), 0, &data);
+
+  /* Get the number of inhibited particles from the space-wide counters
+   * since these have been updated atomically during the time-steps. */
+  data.inhibited = s->nr_inhibited_parts;
+  data.g_inhibited = s->nr_inhibited_gparts;
+  data.s_inhibited = s->nr_inhibited_sparts;
+  data.b_inhibited = s->nr_inhibited_bparts;
+
+  /* Store these in the temporary collection group. */
+  collectgroup1_init(
+      &e->collect_group1, data.updated, data.g_updated, data.s_updated,
+      data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited,
+      data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max,
+      data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max,
+      data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max,
+      data.ti_stars_beg_max, data.ti_black_holes_end_min,
+      data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild,
+      e->s->tot_cells, e->sched.nr_tasks,
+      (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh);
+
+/* Aggregate collective data from the different nodes for this step. */
+#ifdef WITH_MPI
+  collectgroup1_reduce(&e->collect_group1);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  {
+    /* Check the above using the original MPI calls. */
+    integertime_t in_i[2], out_i[2];
+    in_i[0] = 0;
+    in_i[1] = 0;
+    out_i[0] = data.ti_hydro_end_min;
+    out_i[1] = data.ti_gravity_end_min;
+    if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate ti_end_min.");
+    if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min)
+      error("Failed to get same ti_hydro_end_min, is %lld, should be %lld",
+            in_i[0], e->collect_group1.ti_hydro_end_min);
+    if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min)
+      error("Failed to get same ti_gravity_end_min, is %lld, should be %lld",
+            in_i[1], e->collect_group1.ti_gravity_end_min);
+
+    long long in_ll[4], out_ll[4];
+    out_ll[0] = data.updated;
+    out_ll[1] = data.g_updated;
+    out_ll[2] = data.s_updated;
+    out_ll[3] = data.b_updated;
+    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate particle counts.");
+    if (in_ll[0] != (long long)e->collect_group1.updated)
+      error("Failed to get same updated, is %lld, should be %lld", in_ll[0],
+            e->collect_group1.updated);
+    if (in_ll[1] != (long long)e->collect_group1.g_updated)
+      error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1],
+            e->collect_group1.g_updated);
+    if (in_ll[2] != (long long)e->collect_group1.s_updated)
+      error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2],
+            e->collect_group1.s_updated);
+    if (in_ll[3] != (long long)e->collect_group1.b_updated)
+      error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3],
+            e->collect_group1.b_updated);
+
+    out_ll[0] = data.inhibited;
+    out_ll[1] = data.g_inhibited;
+    out_ll[2] = data.s_inhibited;
+    out_ll[3] = data.b_inhibited;
+    if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate particle counts.");
+    if (in_ll[0] != (long long)e->collect_group1.inhibited)
+      error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0],
+            e->collect_group1.inhibited);
+    if (in_ll[1] != (long long)e->collect_group1.g_inhibited)
+      error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1],
+            e->collect_group1.g_inhibited);
+    if (in_ll[2] != (long long)e->collect_group1.s_inhibited)
+      error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2],
+            e->collect_group1.s_inhibited);
+    if (in_ll[3] != (long long)e->collect_group1.b_inhibited)
+      error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3],
+            e->collect_group1.b_inhibited);
+
+    int buff = 0;
+    if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
+      error("Failed to aggregate the rebuild flag across nodes.");
+    if (!!buff != !!e->collect_group1.forcerebuild)
+      error(
+          "Failed to get same rebuild flag from all nodes, is %d,"
+          "should be %d",
+          buff, e->collect_group1.forcerebuild);
+  }
+#endif
+#endif
+
+  /* Apply to the engine, if requested. */
+  if (apply) collectgroup1_apply(&e->collect_group1, e);
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
diff --git a/src/engine_fof.c b/src/engine_fof.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1bb5b452104642f68b4a9987a1ab8d8e3b0162b
--- /dev/null
+++ b/src/engine_fof.c
@@ -0,0 +1,150 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/**
+ * @brief Activate all the #gpart communications in preparation
+ * fof a call to FOF.
+ *
+ * @param e The #engine to act on.
+ */
+void engine_activate_gpart_comms(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  const ticks tic = getticks();
+
+  struct scheduler *s = &e->sched;
+  const int nr_tasks = s->nr_tasks;
+  struct task *tasks = s->tasks;
+
+  for (int k = 0; k < nr_tasks; ++k) {
+
+    struct task *t = &tasks[k];
+
+    if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) {
+      scheduler_activate(s, t);
+    } else if ((t->type == task_type_recv) &&
+               (t->subtype == task_subtype_gpart)) {
+      scheduler_activate(s, t);
+    } else {
+      t->skip = 1;
+    }
+  }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+
+#else
+  error("Calling an MPI function in non-MPI mode.");
+#endif
+}
+
+/**
+ * @brief Activate all the FOF tasks.
+ *
+ * Marks all the other task types to be skipped.
+ *
+ * @param e The #engine to act on.
+ */
+void engine_activate_fof_tasks(struct engine *e) {
+
+  const ticks tic = getticks();
+
+  struct scheduler *s = &e->sched;
+  const int nr_tasks = s->nr_tasks;
+  struct task *tasks = s->tasks;
+
+  for (int k = 0; k < nr_tasks; k++) {
+
+    struct task *t = &tasks[k];
+
+    if (t->type == task_type_fof_self || t->type == task_type_fof_pair)
+      scheduler_activate(s, t);
+    else
+      t->skip = 1;
+  }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+/**
+ * @brief Run a FOF search.
+ *
+ * @param e the engine
+ * @param dump_results Are we writing group catalogues to output files?
+ * @param seed_black_holes Are we seeding black holes?
+ */
+void engine_fof(struct engine *e, const int dump_results,
+                const int seed_black_holes) {
+
+#ifdef WITH_FOF
+
+  ticks tic = getticks();
+
+  /* Compute number of DM particles */
+  const long long total_nr_baryons =
+      e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts;
+  const long long total_nr_dmparts =
+      e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons;
+
+  /* Initialise FOF parameters and allocate FOF arrays. */
+  fof_allocate(e->s, total_nr_dmparts, e->fof_properties);
+
+  /* Make FOF tasks */
+  engine_make_fof_tasks(e);
+
+  /* and activate them. */
+  engine_activate_fof_tasks(e);
+
+  /* Perform local FOF tasks. */
+  engine_launch(e);
+
+  /* Perform FOF search over foreign particles and
+   * find groups which require black hole seeding.  */
+  fof_search_tree(e->fof_properties, e->black_holes_properties,
+                  e->physical_constants, e->cosmology, e->s, dump_results,
+                  seed_black_holes);
+
+  /* Reset flag. */
+  e->run_fof = 0;
+
+  /* Flag that a FOF has taken place */
+  e->step_props |= engine_step_prop_fof;
+
+  /* ... and find the next FOF time */
+  if (seed_black_holes) engine_compute_next_fof_time(e);
+
+  if (engine_rank == 0)
+    message("Complete FOF search took: %.3f %s.",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
+#else
+  error("SWIFT was not compiled with FOF enabled!");
+#endif
+}
diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c
index 05bde9091dd55904063133c1a70cc004f0a05512..42590cb5f41539d11ca39639c369ea68472e9826 100644
--- a/src/engine_maketasks.c
+++ b/src/engine_maketasks.c
@@ -74,6 +74,9 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   /* Check if any of the gravity tasks are for the target node. */
   for (l = ci->grav.grav; l != NULL; l = l->next)
     if (l->t->ci->nodeID == nodeID ||
@@ -141,6 +144,9 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   /* Check if any of the density tasks are for the target node. */
   for (l = ci->hydro.density; l != NULL; l = l->next)
     if (l->t->ci->nodeID == nodeID ||
@@ -248,6 +254,9 @@ void engine_addtasks_send_stars(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   if (t_sf_counts == NULL && with_star_formation && ci->hydro.count > 0) {
 #ifdef SWIFT_DEBUG_CHECKS
     if (ci->depth != 0)
@@ -339,6 +348,9 @@ void engine_addtasks_send_black_holes(struct engine *e, struct cell *ci,
   struct scheduler *s = &e->sched;
   const int nodeID = cj->nodeID;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(ci, cell_flag_has_tasks)) return;
+
   /* Check if any of the density tasks are for the target node. */
   for (l = ci->black_holes.density; l != NULL; l = l->next)
     if (l->t->ci->nodeID == nodeID ||
@@ -434,6 +446,9 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   /* Have we reached a level where there are any hydro tasks ? */
   if (t_xv == NULL && c->hydro.density != NULL) {
 
@@ -533,6 +548,9 @@ void engine_addtasks_recv_stars(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   if (t_sf_counts == NULL && with_star_formation && c->hydro.count > 0) {
 #ifdef SWIFT_DEBUG_CHECKS
     if (c->depth != 0)
@@ -624,6 +642,9 @@ void engine_addtasks_recv_black_holes(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   /* Have we reached a level where there are any black_holes tasks ? */
   if (t_rho == NULL && c->black_holes.density != NULL) {
 
@@ -714,6 +735,9 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c,
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
   /* Have we reached a level where there are any gravity tasks ? */
   if (t_grav == NULL && c->grav.grav != NULL) {
 
diff --git a/src/engine_redistribute.c b/src/engine_redistribute.c
new file mode 100644
index 0000000000000000000000000000000000000000..3132ad2665c67cd244ae1ec9ece75726788c1506
--- /dev/null
+++ b/src/engine_redistribute.c
@@ -0,0 +1,1031 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/* Local headers. */
+#include "memswap.h"
+
+#ifdef WITH_MPI
+
+/**
+ * Do the exchange of one type of particles with all the other nodes.
+ *
+ * @param label a label for the memory allocations of this particle type.
+ * @param counts 2D array with the counts of particles to exchange with
+ *               each other node.
+ * @param parts the particle data to exchange
+ * @param new_nr_parts the number of particles this node will have after all
+ *                     exchanges have completed.
+ * @param sizeofparts sizeof the particle struct.
+ * @param alignsize the memory alignment required for this particle type.
+ * @param mpi_type the MPI_Datatype for these particles.
+ * @param nr_nodes the number of nodes to exchange with.
+ * @param nodeID the id of this node.
+ *
+ * @result new particle data constructed from all the exchanges with the
+ *         given alignment.
+ */
+static void *engine_do_redistribute(const char *label, int *counts, char *parts,
+                                    size_t new_nr_parts, size_t sizeofparts,
+                                    size_t alignsize, MPI_Datatype mpi_type,
+                                    int nr_nodes, int nodeID) {
+
+  /* Allocate a new particle array with some extra margin */
+  char *parts_new = NULL;
+  if (swift_memalign(
+          label, (void **)&parts_new, alignsize,
+          sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new particle data.");
+
+  /* Prepare MPI requests for the asynchronous communications */
+  MPI_Request *reqs;
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) ==
+      NULL)
+    error("Failed to allocate MPI request list.");
+
+  /* Only send and receive only "chunk" particles per request. So we need to
+   * loop as many times as necessary here. Make 2Gb/sizeofparts so we only
+   * send 2Gb packets. */
+  const int chunk = INT_MAX / sizeofparts;
+  int sent = 0;
+  int recvd = 0;
+
+  int activenodes = 1;
+  while (activenodes) {
+
+    for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
+
+    /* Emit the sends and recvs for the data. */
+    size_t offset_send = sent;
+    size_t offset_recv = recvd;
+    activenodes = 0;
+
+    for (int k = 0; k < nr_nodes; k++) {
+
+      /* Indices in the count arrays of the node of interest */
+      const int ind_send = nodeID * nr_nodes + k;
+      const int ind_recv = k * nr_nodes + nodeID;
+
+      /* Are we sending any data this loop? */
+      int sending = counts[ind_send] - sent;
+      if (sending > 0) {
+        activenodes++;
+        if (sending > chunk) sending = chunk;
+
+        /* If the send and receive is local then just copy. */
+        if (k == nodeID) {
+          int receiving = counts[ind_recv] - recvd;
+          if (receiving > chunk) receiving = chunk;
+          memcpy(&parts_new[offset_recv * sizeofparts],
+                 &parts[offset_send * sizeofparts], sizeofparts * receiving);
+        } else {
+          /* Otherwise send it. */
+          int res =
+              MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k,
+                        ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]);
+          if (res != MPI_SUCCESS)
+            mpi_error(res, "Failed to isend parts to node %i.", k);
+        }
+      }
+
+      /* If we're sending to this node, then move past it to next. */
+      if (counts[ind_send] > 0) offset_send += counts[ind_send];
+
+      /* Are we receiving any data from this node? Note already done if coming
+       * from this node. */
+      if (k != nodeID) {
+        int receiving = counts[ind_recv] - recvd;
+        if (receiving > 0) {
+          activenodes++;
+          if (receiving > chunk) receiving = chunk;
+          int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving,
+                              mpi_type, k, ind_recv, MPI_COMM_WORLD,
+                              &reqs[2 * k + 1]);
+          if (res != MPI_SUCCESS)
+            mpi_error(res, "Failed to emit irecv of parts from node %i.", k);
+        }
+      }
+
+      /* If we're receiving from this node, then move past it to next. */
+      if (counts[ind_recv] > 0) offset_recv += counts[ind_recv];
+    }
+
+    /* Wait for all the sends and recvs to tumble in. */
+    MPI_Status stats[2 * nr_nodes];
+    int res;
+    if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
+      for (int k = 0; k < 2 * nr_nodes; k++) {
+        char buff[MPI_MAX_ERROR_STRING];
+        MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
+        message("request from source %i, tag %i has error '%s'.",
+                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
+      }
+      error("Failed during waitall for part data.");
+    }
+
+    /* Move to next chunks. */
+    sent += chunk;
+    recvd += chunk;
+  }
+
+  /* Free temps. */
+  free(reqs);
+
+  /* And return new memory. */
+  return parts_new;
+}
+#endif
+
+#ifdef WITH_MPI /* redist_mapper */
+
+/* Support for engine_redistribute threadpool dest mappers. */
+struct redist_mapper_data {
+  int *counts;
+  int *dest;
+  int nodeID;
+  int nr_nodes;
+  struct cell *cells;
+  struct space *s;
+  void *base;
+};
+
+/* Generic function for accumulating counts for TYPE parts. Note
+ * we use a local counts array to avoid the atomic_add in the parts
+ * loop. */
+#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE)                              \
+  engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \
+                                         void *extra_data) {               \
+    struct TYPE *parts = (struct TYPE *)map_data;                          \
+    struct redist_mapper_data *mydata =                                    \
+        (struct redist_mapper_data *)extra_data;                           \
+    struct space *s = mydata->s;                                           \
+    int *dest =                                                            \
+        mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base);   \
+    int *lcounts = NULL;                                                   \
+    if ((lcounts = (int *)calloc(                                          \
+             sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL)   \
+      error("Failed to allocate counts thread-specific buffer");           \
+    for (int k = 0; k < num_elements; k++) {                               \
+      for (int j = 0; j < 3; j++) {                                        \
+        if (parts[k].x[j] < 0.0)                                           \
+          parts[k].x[j] += s->dim[j];                                      \
+        else if (parts[k].x[j] >= s->dim[j])                               \
+          parts[k].x[j] -= s->dim[j];                                      \
+      }                                                                    \
+      const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0],    \
+                                 parts[k].x[1] * s->iwidth[1],             \
+                                 parts[k].x[2] * s->iwidth[2]);            \
+      dest[k] = s->cells_top[cid].nodeID;                                  \
+      size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k];            \
+      lcounts[ind] += 1;                                                   \
+    }                                                                      \
+    for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++)        \
+      atomic_add(&mydata->counts[k], lcounts[k]);                          \
+    free(lcounts);                                                         \
+  }
+
+/**
+ * @brief Accumulate the counts of particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * part version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part);
+
+/**
+ * @brief Accumulate the counts of star particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * spart version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart);
+
+/**
+ * @brief Accumulate the counts of gravity particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * gpart version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart);
+
+/**
+ * @brief Accumulate the counts of black holes particles per cell.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ *
+ * bpart version.
+ */
+static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart);
+
+#endif /* redist_mapper_data */
+
+#ifdef WITH_MPI /* savelink_mapper_data */
+
+/* Support for saving the linkage between gparts and parts/sparts. */
+struct savelink_mapper_data {
+  int nr_nodes;
+  int *counts;
+  void *parts;
+  int nodeID;
+};
+
+/**
+ * @brief Save the offset of each gravity partner of a part or spart.
+ *
+ * The offset is from the start of the sorted particles to be sent to a node.
+ * This is possible as parts without gravity partners have a positive id.
+ * These offsets are used to restore the pointers on the receiving node.
+ *
+ * CHECKS should be eliminated as dead code when optimizing.
+ */
+#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS)                      \
+  engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \
+                                             void *extra_data) {               \
+    int *nodes = (int *)map_data;                                              \
+    struct savelink_mapper_data *mydata =                                      \
+        (struct savelink_mapper_data *)extra_data;                             \
+    int nodeID = mydata->nodeID;                                               \
+    int nr_nodes = mydata->nr_nodes;                                           \
+    int *counts = mydata->counts;                                              \
+    struct TYPE *parts = (struct TYPE *)mydata->parts;                         \
+                                                                               \
+    for (int j = 0; j < num_elements; j++) {                                   \
+      int node = nodes[j];                                                     \
+      int count = 0;                                                           \
+      size_t offset = 0;                                                       \
+      for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i];  \
+                                                                               \
+      for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) {             \
+        if (parts[k + offset].gpart != NULL) {                                 \
+          if (CHECKS)                                                          \
+            if (parts[k + offset].gpart->id_or_neg_offset > 0)                 \
+              error("Trying to link a partnerless " #TYPE "!");                \
+          parts[k + offset].gpart->id_or_neg_offset = -count;                  \
+          count++;                                                             \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+  }
+
+/**
+ * @brief Save position of part-gpart links.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ */
+#ifdef SWIFT_DEBUG_CHECKS
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1);
+#else
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0);
+#endif
+
+/**
+ * @brief Save position of spart-gpart links.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ */
+#ifdef SWIFT_DEBUG_CHECKS
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1);
+#else
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0);
+#endif
+
+/**
+ * @brief Save position of bpart-gpart links.
+ * Threadpool helper for accumulating the counts of particles per cell.
+ */
+#ifdef SWIFT_DEBUG_CHECKS
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1);
+#else
+static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0);
+#endif
+
+#endif /* savelink_mapper_data */
+
+#ifdef WITH_MPI /* relink_mapper_data */
+
+/* Support for relinking parts, gparts, sparts and bparts after moving between
+ * nodes. */
+struct relink_mapper_data {
+  int nodeID;
+  int nr_nodes;
+  int *counts;
+  int *s_counts;
+  int *g_counts;
+  int *b_counts;
+  struct space *s;
+};
+
+/**
+ * @brief Restore the part/gpart and spart/gpart links for a list of nodes.
+ *
+ * @param map_data address of nodes to process.
+ * @param num_elements the number nodes to process.
+ * @param extra_data additional data defining the context (a
+ * relink_mapper_data).
+ */
+static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
+                                              void *extra_data) {
+
+  int *nodes = (int *)map_data;
+  struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data;
+
+  int nodeID = mydata->nodeID;
+  int nr_nodes = mydata->nr_nodes;
+  int *counts = mydata->counts;
+  int *g_counts = mydata->g_counts;
+  int *s_counts = mydata->s_counts;
+  int *b_counts = mydata->b_counts;
+  struct space *s = mydata->s;
+
+  for (int i = 0; i < num_elements; i++) {
+
+    int node = nodes[i];
+
+    /* Get offsets to correct parts of the counts arrays for this node. */
+    size_t offset_parts = 0;
+    size_t offset_gparts = 0;
+    size_t offset_sparts = 0;
+    size_t offset_bparts = 0;
+    for (int n = 0; n < node; n++) {
+      int ind_recv = n * nr_nodes + nodeID;
+      offset_parts += counts[ind_recv];
+      offset_gparts += g_counts[ind_recv];
+      offset_sparts += s_counts[ind_recv];
+      offset_bparts += b_counts[ind_recv];
+    }
+
+    /* Number of gparts sent from this node. */
+    int ind_recv = node * nr_nodes + nodeID;
+    const size_t count_gparts = g_counts[ind_recv];
+
+    /* Loop over the gparts received from this node */
+    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) {
+
+      /* Does this gpart have a gas partner ? */
+      if (s->gparts[k].type == swift_type_gas) {
+
+        const ptrdiff_t partner_index =
+            offset_parts - s->gparts[k].id_or_neg_offset;
+
+        /* Re-link */
+        s->gparts[k].id_or_neg_offset = -partner_index;
+        s->parts[partner_index].gpart = &s->gparts[k];
+      }
+
+      /* Does this gpart have a star partner ? */
+      else if (s->gparts[k].type == swift_type_stars) {
+
+        const ptrdiff_t partner_index =
+            offset_sparts - s->gparts[k].id_or_neg_offset;
+
+        /* Re-link */
+        s->gparts[k].id_or_neg_offset = -partner_index;
+        s->sparts[partner_index].gpart = &s->gparts[k];
+      }
+
+      /* Does this gpart have a black hole partner ? */
+      else if (s->gparts[k].type == swift_type_black_hole) {
+
+        const ptrdiff_t partner_index =
+            offset_bparts - s->gparts[k].id_or_neg_offset;
+
+        /* Re-link */
+        s->gparts[k].id_or_neg_offset = -partner_index;
+        s->bparts[partner_index].gpart = &s->gparts[k];
+      }
+    }
+  }
+}
+
+#endif /* relink_mapper_data */
+
+/**
+ * @brief Redistribute the particles amongst the nodes according
+ *      to their cell's node IDs.
+ *
+ * The strategy here is as follows:
+ * 1) Each node counts the number of particles it has to send to each other
+ * node.
+ * 2) The number of particles of each type is then exchanged.
+ * 3) The particles to send are placed in a temporary buffer in which the
+ * part-gpart links are preserved.
+ * 4) Each node allocates enough space for the new particles.
+ * 5) (Asynchronous) communications are issued to transfer the data.
+ *
+ *
+ * @param e The #engine.
+ */
+void engine_redistribute(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  const int nr_nodes = e->nr_nodes;
+  const int nodeID = e->nodeID;
+  struct space *s = e->s;
+  struct cell *cells = s->cells_top;
+  const int nr_cells = s->nr_cells;
+  struct xpart *xparts = s->xparts;
+  struct part *parts = s->parts;
+  struct gpart *gparts = s->gparts;
+  struct spart *sparts = s->sparts;
+  struct bpart *bparts = s->bparts;
+  ticks tic = getticks();
+
+  size_t nr_parts = s->nr_parts;
+  size_t nr_gparts = s->nr_gparts;
+  size_t nr_sparts = s->nr_sparts;
+  size_t nr_bparts = s->nr_bparts;
+
+  /* Start by moving inhibited particles to the end of the arrays */
+  for (size_t k = 0; k < nr_parts; /* void */) {
+    if (parts[k].time_bin == time_bin_inhibited ||
+        parts[k].time_bin == time_bin_not_created) {
+      nr_parts -= 1;
+
+      /* Swap the particle */
+      memswap(&parts[k], &parts[nr_parts], sizeof(struct part));
+
+      /* Swap the xpart */
+      memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart));
+
+      /* Swap the link with the gpart */
+      if (parts[k].gpart != NULL) {
+        parts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (parts[nr_parts].gpart != NULL) {
+        parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Now move inhibited star particles to the end of the arrays */
+  for (size_t k = 0; k < nr_sparts; /* void */) {
+    if (sparts[k].time_bin == time_bin_inhibited ||
+        sparts[k].time_bin == time_bin_not_created) {
+      nr_sparts -= 1;
+
+      /* Swap the particle */
+      memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));
+
+      /* Swap the link with the gpart */
+      if (s->sparts[k].gpart != NULL) {
+        s->sparts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (s->sparts[nr_sparts].gpart != NULL) {
+        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Now move inhibited black hole particles to the end of the arrays */
+  for (size_t k = 0; k < nr_bparts; /* void */) {
+    if (bparts[k].time_bin == time_bin_inhibited ||
+        bparts[k].time_bin == time_bin_not_created) {
+      nr_bparts -= 1;
+
+      /* Swap the particle */
+      memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart));
+
+      /* Swap the link with the gpart */
+      if (s->bparts[k].gpart != NULL) {
+        s->bparts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (s->bparts[nr_bparts].gpart != NULL) {
+        s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts;
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Finally do the same with the gravity particles */
+  for (size_t k = 0; k < nr_gparts; /* void */) {
+    if (gparts[k].time_bin == time_bin_inhibited ||
+        gparts[k].time_bin == time_bin_not_created) {
+      nr_gparts -= 1;
+
+      /* Swap the particle */
+      memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart));
+
+      /* Swap the link with part/spart */
+      if (s->gparts[k].type == swift_type_gas) {
+        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      } else if (s->gparts[k].type == swift_type_stars) {
+        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      } else if (s->gparts[k].type == swift_type_black_hole) {
+        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      }
+
+      if (s->gparts[nr_gparts].type == swift_type_gas) {
+        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
+      } else if (s->gparts[nr_gparts].type == swift_type_stars) {
+        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
+      } else if (s->gparts[nr_gparts].type == swift_type_black_hole) {
+        s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
+      }
+    } else {
+      k++;
+    }
+  }
+
+  /* Now we are ready to deal with real particles and can start the exchange. */
+
+  /* Allocate temporary arrays to store the counts of particles to be sent
+   * and the destination of each particle */
+  int *counts;
+  if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate counts temporary buffer.");
+
+  int *dest;
+  if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL)
+    error("Failed to allocate dest temporary buffer.");
+
+  /* Simple index of node IDs, used for mappers over nodes. */
+  int *nodes = NULL;
+  if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL)
+    error("Failed to allocate nodes temporary buffer.");
+  for (int k = 0; k < nr_nodes; k++) nodes[k] = k;
+
+  /* Get destination of each particle */
+  struct redist_mapper_data redist_data;
+  redist_data.s = s;
+  redist_data.nodeID = nodeID;
+  redist_data.nr_nodes = nr_nodes;
+
+  redist_data.counts = counts;
+  redist_data.dest = dest;
+  redist_data.base = (void *)parts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts,
+                 nr_parts, sizeof(struct part), 0, &redist_data);
+
+  /* Sort the particles according to their cell index. */
+  if (nr_parts > 0)
+    space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes],
+                     nr_nodes, 0);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the part have been sorted correctly. */
+  for (size_t k = 0; k < nr_parts; k++) {
+    const struct part *p = &s->parts[k];
+
+    if (p->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (p->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
+                   p->x[2] * s->iwidth[2]);
+
+    /* New cell of this part */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (dest[k] != new_node)
+      error("part's new node index not matching sorted index.");
+
+    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
+        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
+        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
+      error("part not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We will need to re-link the gpart partners of parts, so save their
+   * relative positions in the sent lists. */
+  if (nr_parts > 0 && nr_gparts > 0) {
+
+    struct savelink_mapper_data savelink_data;
+    savelink_data.nr_nodes = nr_nodes;
+    savelink_data.counts = counts;
+    savelink_data.parts = (void *)parts;
+    savelink_data.nodeID = nodeID;
+    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part,
+                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
+  }
+  swift_free("dest", dest);
+
+  /* Get destination of each s-particle */
+  int *s_counts;
+  if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate s_counts temporary buffer.");
+
+  int *s_dest;
+  if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL)
+    error("Failed to allocate s_dest temporary buffer.");
+
+  redist_data.counts = s_counts;
+  redist_data.dest = s_dest;
+  redist_data.base = (void *)sparts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts,
+                 nr_sparts, sizeof(struct spart), 0, &redist_data);
+
+  /* Sort the particles according to their cell index. */
+  if (nr_sparts > 0)
+    space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes,
+                      0);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the spart have been sorted correctly. */
+  for (size_t k = 0; k < nr_sparts; k++) {
+    const struct spart *sp = &s->sparts[k];
+
+    if (sp->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (sp->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
+                   sp->x[2] * s->iwidth[2]);
+
+    /* New cell of this spart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (s_dest[k] != new_node)
+      error("spart's new node index not matching sorted index.");
+
+    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
+        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
+        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
+      error("spart not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We need to re-link the gpart partners of sparts. */
+  if (nr_sparts > 0) {
+
+    struct savelink_mapper_data savelink_data;
+    savelink_data.nr_nodes = nr_nodes;
+    savelink_data.counts = s_counts;
+    savelink_data.parts = (void *)sparts;
+    savelink_data.nodeID = nodeID;
+    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart,
+                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
+  }
+  swift_free("s_dest", s_dest);
+
+  /* Get destination of each b-particle */
+  int *b_counts;
+  if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate b_counts temporary buffer.");
+
+  int *b_dest;
+  if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL)
+    error("Failed to allocate b_dest temporary buffer.");
+
+  redist_data.counts = b_counts;
+  redist_data.dest = b_dest;
+  redist_data.base = (void *)bparts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts,
+                 nr_bparts, sizeof(struct bpart), 0, &redist_data);
+
+  /* Sort the particles according to their cell index. */
+  if (nr_bparts > 0)
+    space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes,
+                      0);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the bpart have been sorted correctly. */
+  for (size_t k = 0; k < nr_bparts; k++) {
+    const struct bpart *bp = &s->bparts[k];
+
+    if (bp->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (bp->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1],
+                   bp->x[2] * s->iwidth[2]);
+
+    /* New cell of this bpart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (b_dest[k] != new_node)
+      error("bpart's new node index not matching sorted index.");
+
+    if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] ||
+        bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] ||
+        bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2])
+      error("bpart not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We need to re-link the gpart partners of bparts. */
+  if (nr_bparts > 0) {
+
+    struct savelink_mapper_data savelink_data;
+    savelink_data.nr_nodes = nr_nodes;
+    savelink_data.counts = b_counts;
+    savelink_data.parts = (void *)bparts;
+    savelink_data.nodeID = nodeID;
+    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart,
+                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
+  }
+  swift_free("b_dest", b_dest);
+
+  /* Get destination of each g-particle */
+  int *g_counts;
+  if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate g_gcount temporary buffer.");
+
+  int *g_dest;
+  if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL)
+    error("Failed to allocate g_dest temporary buffer.");
+
+  redist_data.counts = g_counts;
+  redist_data.dest = g_dest;
+  redist_data.base = (void *)gparts;
+
+  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts,
+                 nr_gparts, sizeof(struct gpart), 0, &redist_data);
+
+  /* Sort the gparticles according to their cell index. */
+  if (nr_gparts > 0)
+    space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest,
+                      &g_counts[nodeID * nr_nodes], nr_nodes);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the gpart have been sorted correctly. */
+  for (size_t k = 0; k < nr_gparts; k++) {
+    const struct gpart *gp = &s->gparts[k];
+
+    if (gp->time_bin == time_bin_inhibited)
+      error("Inhibited particle found after sorting!");
+
+    if (gp->time_bin == time_bin_not_created)
+      error("Inhibited particle found after sorting!");
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
+                   gp->x[2] * s->iwidth[2]);
+
+    /* New cell of this gpart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (g_dest[k] != new_node)
+      error("gpart's new node index not matching sorted index (%d != %d).",
+            g_dest[k], new_node);
+
+    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
+        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
+        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
+      error("gpart not sorted into the right top-level cell!");
+  }
+#endif
+
+  swift_free("g_dest", g_dest);
+
+  /* Get all the counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
+                    MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce particle transfer counts.");
+
+  /* Get all the g_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce gparticle transfer counts.");
+
+  /* Get all the s_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce sparticle transfer counts.");
+
+  /* Get all the b_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce bparticle transfer counts.");
+
+  /* Report how many particles will be moved. */
+  if (e->verbose) {
+    if (e->nodeID == 0) {
+      size_t total = 0, g_total = 0, s_total = 0, b_total = 0;
+      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0;
+      for (int p = 0, r = 0; p < nr_nodes; p++) {
+        for (int n = 0; n < nr_nodes; n++) {
+          total += counts[r];
+          g_total += g_counts[r];
+          s_total += s_counts[r];
+          b_total += b_counts[r];
+          if (p == n) {
+            unmoved += counts[r];
+            g_unmoved += g_counts[r];
+            s_unmoved += s_counts[r];
+            b_unmoved += b_counts[r];
+          }
+          r++;
+        }
+      }
+      if (total > 0)
+        message("%zu of %zu (%.2f%%) of particles moved", total - unmoved,
+                total, 100.0 * (double)(total - unmoved) / (double)total);
+      if (g_total > 0)
+        message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved,
+                g_total,
+                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
+      if (s_total > 0)
+        message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved,
+                s_total,
+                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
+      if (b_total > 0)
+        message("%ld of %ld (%.2f%%) of b-particles moved", b_total - b_unmoved,
+                b_total,
+                100.0 * (double)(b_total - b_unmoved) / (double)b_total);
+    }
+  }
+
+  /* Now each node knows how many parts, sparts, bparts, and gparts will be
+   * transferred to every other node. Get the new numbers of particles for this
+   * node. */
+  size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0,
+         nr_bparts_new = 0;
+  for (int k = 0; k < nr_nodes; k++)
+    nr_parts_new += counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_gparts_new += g_counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_sparts_new += s_counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_bparts_new += b_counts[k * nr_nodes + nodeID];
+
+  /* Now exchange the particles, type by type to keep the memory required
+   * under control. */
+
+  /* SPH particles. */
+  void *new_parts = engine_do_redistribute(
+      "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part),
+      part_align, part_mpi_type, nr_nodes, nodeID);
+  swift_free("parts", s->parts);
+  s->parts = (struct part *)new_parts;
+  s->nr_parts = nr_parts_new;
+  s->size_parts = engine_redistribute_alloc_margin * nr_parts_new;
+
+  /* Extra SPH particle properties. */
+  new_parts = engine_do_redistribute(
+      "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart),
+      xpart_align, xpart_mpi_type, nr_nodes, nodeID);
+  swift_free("xparts", s->xparts);
+  s->xparts = (struct xpart *)new_parts;
+
+  /* Gravity particles. */
+  new_parts = engine_do_redistribute(
+      "gparts", g_counts, (char *)s->gparts, nr_gparts_new,
+      sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID);
+  swift_free("gparts", s->gparts);
+  s->gparts = (struct gpart *)new_parts;
+  s->nr_gparts = nr_gparts_new;
+  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new;
+
+  /* Star particles. */
+  new_parts = engine_do_redistribute(
+      "sparts", s_counts, (char *)s->sparts, nr_sparts_new,
+      sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID);
+  swift_free("sparts", s->sparts);
+  s->sparts = (struct spart *)new_parts;
+  s->nr_sparts = nr_sparts_new;
+  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new;
+
+  /* Black holes particles. */
+  new_parts = engine_do_redistribute(
+      "bparts", b_counts, (char *)s->bparts, nr_bparts_new,
+      sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID);
+  swift_free("bparts", s->bparts);
+  s->bparts = (struct bpart *)new_parts;
+  s->nr_bparts = nr_bparts_new;
+  s->size_bparts = engine_redistribute_alloc_margin * nr_bparts_new;
+
+  /* All particles have now arrived. Time for some final operations on the
+     stuff we just received */
+
+  /* Restore the part<->gpart and spart<->gpart links.
+   * Generate indices and counts for threadpool tasks. Note we process a node
+   * at a time. */
+  struct relink_mapper_data relink_data;
+  relink_data.s = s;
+  relink_data.counts = counts;
+  relink_data.g_counts = g_counts;
+  relink_data.s_counts = s_counts;
+  relink_data.b_counts = b_counts;
+  relink_data.nodeID = nodeID;
+  relink_data.nr_nodes = nr_nodes;
+
+  threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes,
+                 nr_nodes, sizeof(int), 1, &relink_data);
+  free(nodes);
+
+  /* Clean up the counts now we are done. */
+  free(counts);
+  free(g_counts);
+  free(s_counts);
+  free(b_counts);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that all parts are in the right place. */
+  for (size_t k = 0; k < nr_parts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
+                               s->parts[k].x[1] * s->iwidth[1],
+                               s->parts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
+            cells[cid].nodeID);
+  }
+  for (size_t k = 0; k < nr_gparts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0],
+                               s->gparts[k].x[1] * s->iwidth[1],
+                               s->gparts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
+  }
+  for (size_t k = 0; k < nr_sparts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0],
+                               s->sparts[k].x[1] * s->iwidth[1],
+                               s->sparts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
+  }
+  for (size_t k = 0; k < nr_bparts_new; k++) {
+    const int cid = cell_getid(s->cdim, s->bparts[k].x[0] * s->iwidth[0],
+                               s->bparts[k].x[1] * s->iwidth[1],
+                               s->bparts[k].x[2] * s->iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received b-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
+  }
+
+  /* Verify that the links are correct */
+  part_verify_links(s->parts, s->gparts, s->sparts, s->bparts, nr_parts_new,
+                    nr_gparts_new, nr_sparts_new, nr_bparts_new, e->verbose);
+
+#endif
+
+  /* Be verbose about what just happened. */
+  if (e->verbose) {
+    int my_cells = 0;
+    for (int k = 0; k < nr_cells; k++)
+      if (cells[k].nodeID == nodeID) my_cells += 1;
+    message(
+        "node %i now has %zu parts, %zu sparts, %zu bparts and %zu gparts in "
+        "%i cells.",
+        nodeID, nr_parts_new, nr_sparts_new, nr_bparts_new, nr_gparts_new,
+        my_cells);
+  }
+
+  /* Flag that we do not have any extra particles any more */
+  s->nr_extra_parts = 0;
+  s->nr_extra_gparts = 0;
+  s->nr_extra_sparts = 0;
+  s->nr_extra_bparts = 0;
+
+  /* Flag that a redistribute has taken place */
+  e->step_props |= engine_step_prop_redistribute;
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
diff --git a/src/engine_unskip.c b/src/engine_unskip.c
new file mode 100644
index 0000000000000000000000000000000000000000..dfadfa5ca1a6aebd0d7a277164eca9707ac97a62
--- /dev/null
+++ b/src/engine_unskip.c
@@ -0,0 +1,400 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "engine.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "memswap.h"
+
+/* Load the profiler header, if needed. */
+#ifdef WITH_PROFILER
+#include <gperftools/profiler.h>
+#endif
+
+/**
+ * @brief Broad categories of tasks.
+ *
+ * Each category is unskipped independently
+ * of the others.
+ */
+enum task_broad_types {
+  task_broad_types_hydro = 1,
+  task_broad_types_gravity,
+  task_broad_types_stars,
+  task_broad_types_black_holes,
+  task_broad_types_count,
+};
+
+/**
+ * @brief Meta-data for the unskipping
+ */
+struct unskip_data {
+
+  /*! The #engine */
+  struct engine *e;
+
+  /*! Pointer to the start of the list of cells to unskip */
+  int *list_base;
+
+  /*! Number of times the list has been duplicated */
+  int multiplier;
+
+  /*! The number of active cells (without dulication) */
+  int num_active_cells;
+
+  /*! The #task_broad_types corresponding to each copy of the list */
+  enum task_broad_types task_types[task_broad_types_count];
+};
+
+/**
+ * @brief Unskip any hydro tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void engine_do_unskip_hydro(struct cell *c, struct engine *e) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  /* Ignore empty cells. */
+  if (c->hydro.count == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_hydro(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched);
+  if (forcerebuild) atomic_inc(&e->forcerebuild);
+}
+
+/**
+ * @brief Unskip any stars tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ * @param with_star_formation Are we running with star formation switched on?
+ */
+static void engine_do_unskip_stars(struct cell *c, struct engine *e,
+                                   const int with_star_formation) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  const int non_empty =
+      c->stars.count > 0 || (with_star_formation && c->hydro.count > 0);
+
+  /* Ignore empty cells. */
+  if (!non_empty) return;
+
+  const int ci_active = cell_is_active_stars(c, e) ||
+                        (with_star_formation && cell_is_active_hydro(c, e));
+
+  /* Skip inactive cells. */
+  if (!ci_active) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_stars(cp, e, with_star_formation);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  const int forcerebuild =
+      cell_unskip_stars_tasks(c, &e->sched, with_star_formation);
+  if (forcerebuild) atomic_inc(&e->forcerebuild);
+}
+
+/**
+ * @brief Unskip any black hole tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void engine_do_unskip_black_holes(struct cell *c, struct engine *e) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  /* Ignore empty cells. */
+  if (c->black_holes.count == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_black_holes(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched);
+  if (forcerebuild) atomic_inc(&e->forcerebuild);
+}
+
+/**
+ * @brief Unskip any gravity tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void engine_do_unskip_gravity(struct cell *c, struct engine *e) {
+
+  /* Early abort (are we below the level where tasks are)? */
+  if (!cell_get_flag(c, cell_flag_has_tasks)) return;
+
+  /* Ignore empty cells. */
+  if (c->grav.count == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse */
+  if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        engine_do_unskip_gravity(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  cell_unskip_gravity_tasks(c, &e->sched);
+}
+
+/**
+ * @brief Mapper function to unskip active tasks.
+ *
+ * @param map_data An array of #cell%s.
+ * @param num_elements Chunk size.
+ * @param extra_data Pointer to an unskip_data structure.
+ */
+void engine_do_unskip_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
+
+  /* Unpack the meta data */
+  struct unskip_data *data = (struct unskip_data *)extra_data;
+  const int num_active_cells = data->num_active_cells;
+  const enum task_broad_types *const task_types = data->task_types;
+  const int *const list_base = data->list_base;
+  struct engine *e = data->e;
+  struct cell *const cells_top = e->s->cells_top;
+
+  /* What policies are we running? */
+  const int with_star_formation = e->policy & engine_policy_star_formation;
+
+  /* The current chunk of active cells */
+  const int *const local_cells = (int *)map_data;
+
+  /* Loop over this thread's chunk of cells to unskip */
+  for (int ind = 0; ind < num_elements; ind++) {
+
+    /* Handle on the cell */
+    struct cell *const c = &cells_top[local_cells[ind]];
+
+    /* In what copy of the global list are we?
+     * This gives us the broad type of task we are working on. */
+    const ptrdiff_t delta = &local_cells[ind] - list_base;
+    const int type = delta / num_active_cells;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (type >= data->multiplier) error("Invalid broad task type!");
+    if (c == NULL) error("Got an invalid cell index!");
+#endif
+
+    /* What broad type of tasks are we unskipping? */
+    switch (task_types[type]) {
+      case task_broad_types_hydro:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_hydro))
+          error("Trying to unskip hydro tasks in a non-hydro run!");
+#endif
+        engine_do_unskip_hydro(c, e);
+        break;
+      case task_broad_types_gravity:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_self_gravity) &&
+            !(e->policy & engine_policy_external_gravity))
+          error("Trying to unskip gravity tasks in a non-gravity run!");
+#endif
+        engine_do_unskip_gravity(c, e);
+        break;
+      case task_broad_types_stars:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_stars))
+          error("Trying to unskip star tasks in a non-stars run!");
+#endif
+        engine_do_unskip_stars(c, e, with_star_formation);
+        break;
+      case task_broad_types_black_holes:
+#ifdef SWIFT_DEBUG_CHECKS
+        if (!(e->policy & engine_policy_black_holes))
+          error("Trying to unskip black holes tasks in a non-BH run!");
+#endif
+        engine_do_unskip_black_holes(c, e);
+        break;
+      default:
+#ifdef SWIFT_DEBUG_CHECKS
+        error("Invalid broad task type!");
+#endif
+        continue;
+    }
+  }
+}
+
+/**
+ * @brief Unskip all the tasks that act on active cells at this time.
+ *
+ * @param e The #engine.
+ */
+void engine_unskip(struct engine *e) {
+
+  const ticks tic = getticks();
+  struct space *s = e->s;
+  const int nodeID = e->nodeID;
+
+  const int with_hydro = e->policy & engine_policy_hydro;
+  const int with_self_grav = e->policy & engine_policy_self_gravity;
+  const int with_ext_grav = e->policy & engine_policy_external_gravity;
+  const int with_stars = e->policy & engine_policy_stars;
+  const int with_feedback = e->policy & engine_policy_feedback;
+  const int with_black_holes = e->policy & engine_policy_black_holes;
+
+#ifdef WITH_PROFILER
+  static int count = 0;
+  char filename[100];
+  sprintf(filename, "/tmp/swift_engine_do_usnkip_mapper_%06i.prof", count++);
+  ProfilerStart(filename);
+#endif  // WITH_PROFILER
+
+  /* Move the active local cells to the top of the list. */
+  int *local_cells = e->s->local_cells_with_tasks_top;
+  int num_active_cells = 0;
+  for (int k = 0; k < s->nr_local_cells_with_tasks; k++) {
+    struct cell *c = &s->cells_top[local_cells[k]];
+
+    if ((with_hydro && cell_is_active_hydro(c, e)) ||
+        (with_self_grav && cell_is_active_gravity(c, e)) ||
+        (with_ext_grav && c->nodeID == nodeID &&
+         cell_is_active_gravity(c, e)) ||
+        (with_feedback && cell_is_active_stars(c, e)) ||
+        (with_stars && c->nodeID == nodeID && cell_is_active_stars(c, e)) ||
+        (with_black_holes && cell_is_active_black_holes(c, e))) {
+
+      if (num_active_cells != k)
+        memswap(&local_cells[k], &local_cells[num_active_cells], sizeof(int));
+      num_active_cells += 1;
+    }
+  }
+
+  /* What kind of tasks do we have? */
+  struct unskip_data data;
+  bzero(&data, sizeof(struct unskip_data));
+  int multiplier = 0;
+  if (with_hydro) {
+    data.task_types[multiplier] = task_broad_types_hydro;
+    multiplier++;
+  }
+  if (with_self_grav || with_ext_grav) {
+    data.task_types[multiplier] = task_broad_types_gravity;
+    multiplier++;
+  }
+  if (with_feedback || with_stars) {
+    data.task_types[multiplier] = task_broad_types_stars;
+    multiplier++;
+  }
+  if (with_black_holes) {
+    data.task_types[multiplier] = task_broad_types_black_holes;
+    multiplier++;
+  }
+
+  /* Should we duplicate the list of active cells to better parallelise the
+     unskip over the threads ? */
+  int *local_active_cells;
+  if (multiplier > 1) {
+
+    /* Make space for copies of the list */
+    local_active_cells =
+        (int *)malloc(multiplier * num_active_cells * sizeof(int));
+    if (local_active_cells == NULL)
+      error(
+          "Couldn't allocate memory for duplicated list of local active "
+          "cells.");
+
+    /* Make blind copies of the list */
+    for (int m = 0; m < multiplier; m++) {
+      memcpy(local_active_cells + m * num_active_cells, local_cells,
+             num_active_cells * sizeof(int));
+    }
+  } else {
+    local_active_cells = local_cells;
+  }
+
+  /* We now have a list of local active cells duplicated as many times as
+   * we have broad task types. We can now release all the threads on the list */
+
+  data.e = e;
+  data.list_base = local_active_cells;
+  data.num_active_cells = num_active_cells;
+  data.multiplier = multiplier;
+
+  /* Activate all the regular tasks */
+  threadpool_map(&e->threadpool, engine_do_unskip_mapper, local_active_cells,
+                 num_active_cells * multiplier, sizeof(int), 1, &data);
+
+#ifdef WITH_PROFILER
+  ProfilerStop();
+#endif  // WITH_PROFILER
+
+  /* Free stuff? */
+  if (multiplier > 1) {
+    free(local_active_cells);
+  }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
diff --git a/src/logger.c b/src/logger.c
index 8be521b27f949ea0d496a5207335f1ec68208489..762eb516077ef82f08b6c34da09cd7bc9eb6a280 100644
--- a/src/logger.c
+++ b/src/logger.c
@@ -44,44 +44,41 @@
 /*
  * Thoses are definitions from the format and therefore should not be changed!
  */
-/* number of bytes for a mask */
+/* Number of bytes for a mask. */
 // TODO change this to number of bits
 #define logger_mask_size 1
 
-/* number of bits for chunk header */
+/* Number of bits for chunk header. */
 #define logger_header_bytes 8
 
-/* number bytes for an offset */
+/* Number bytes for an offset. */
 #define logger_offset_size logger_header_bytes - logger_mask_size
 
-/* number of bytes for the version information */
-#define logger_version_size 20
+/* Number of bytes for the file format information. */
+#define logger_format_size 20
 
-/* number of bytes for the labels in the header */
+/* Number of bytes for the labels in the header. */
 #define logger_label_size 20
 
-/* number of bytes for the number in the header */
-#define logger_number_size 4
-
-char logger_version[logger_version_size] = "0.1";
+char logger_file_format[logger_format_size] = "SWIFT_LOGGER";
 
 const struct mask_data logger_mask_data[logger_count_mask] = {
-    /* Particle's position */
+    /* Particle's position. */
     {3 * sizeof(double), 1 << logger_x, "positions"},
-    /* Particle's velocity */
+    /* Particle's velocity. */
     {3 * sizeof(float), 1 << logger_v, "velocities"},
-    /* Particle's acceleration */
+    /* Particle's acceleration. */
     {3 * sizeof(float), 1 << logger_a, "accelerations"},
-    /* Particle's entropy */
+    /* Particle's entropy. */
     {sizeof(float), 1 << logger_u, "entropy"},
-    /* Particle's smoothing length */
+    /* Particle's smoothing length. */
     {sizeof(float), 1 << logger_h, "smoothing length"},
-    /* Particle's density */
+    /* Particle's density. */
     {sizeof(float), 1 << logger_rho, "density"},
-    /* Particle's constants: mass (float) and ID (long long) */
+    /* Particle's constants: mass (float) and ID (long long). */
     {sizeof(float) + sizeof(long long), 1 << logger_consts, "consts"},
     /* Simulation time stamp: integertime and double time (e.g. scale
-       factor or time) */
+       factor or time). */
     {sizeof(integertime_t) + sizeof(double), 1 << logger_timestamp,
      "timestamp"}};
 
@@ -99,11 +96,11 @@ const struct mask_data logger_mask_data[logger_count_mask] = {
  */
 char *logger_write_chunk_header(char *buff, const unsigned int *mask,
                                 const size_t *offset, const size_t offset_new) {
-  /* write mask */
+  /* write mask. */
   memcpy(buff, mask, logger_mask_size);
   buff += logger_mask_size;
 
-  /* write offset */
+  /* write offset. */
   size_t diff_offset = offset_new - *offset;
   memcpy(buff, &diff_offset, logger_offset_size);
   buff += logger_offset_size;
@@ -112,7 +109,7 @@ char *logger_write_chunk_header(char *buff, const unsigned int *mask,
 }
 
 /**
- * @brief Write to the dump
+ * @brief Write to the dump.
  *
  * @param d #dump file
  * @param offset (return) offset of the data
@@ -121,13 +118,13 @@ char *logger_write_chunk_header(char *buff, const unsigned int *mask,
  */
 void logger_write_data(struct dump *d, size_t *offset, size_t size,
                        const void *p) {
-  /* get buffer */
+  /* get buffer. */
   char *buff = dump_get(d, size, offset);
 
-  /* write data to the buffer */
+  /* write data to the buffer. */
   memcpy(buff, p, size);
 
-  /* Update offset to end of chunk */
+  /* Update offset to end of chunk. */
   *offset += size;
 }
 
@@ -171,15 +168,15 @@ int logger_compute_chunk_size(unsigned int mask) {
  * @param log The #logger
  * @param e The #engine
  */
-void logger_log_all(struct logger *log, const struct engine *e) {
+void logger_log_all(struct logger_writer *log, const struct engine *e) {
 
-  /* Ensure that enough space is available */
+  /* Ensure that enough space is available. */
   logger_ensure_size(log, e->total_nr_parts, e->total_nr_gparts, 0);
 #ifdef SWIFT_DEBUG_CHECKS
   message("Need to implement stars");
 #endif
 
-  /* some constants */
+  /* some constants. */
   const struct space *s = e->s;
   const unsigned int mask =
       logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask |
@@ -187,17 +184,17 @@ void logger_log_all(struct logger *log, const struct engine *e) {
       logger_mask_data[logger_h].mask | logger_mask_data[logger_rho].mask |
       logger_mask_data[logger_consts].mask;
 
-  /* loop over all parts */
+  /* loop over all parts. */
   for (long long i = 0; i < e->total_nr_parts; i++) {
     logger_log_part(log, &s->parts[i], mask,
                     &s->xparts[i].logger_data.last_offset);
     s->xparts[i].logger_data.steps_since_last_output = 0;
   }
 
-  /* loop over all gparts */
+  /* loop over all gparts. */
   if (e->total_nr_gparts > 0) error("Not implemented");
 
-  /* loop over all sparts */
+  /* loop over all sparts. */
   // TODO
 }
 
@@ -210,7 +207,7 @@ void logger_log_all(struct logger *log, const struct engine *e) {
  * @param offset Pointer to the offset of the previous log of this particle;
  * (return) offset of this log.
  */
-void logger_log_part(struct logger *log, const struct part *p,
+void logger_log_part(struct logger_writer *log, const struct part *p,
                      unsigned int mask, size_t *offset) {
 
   /* Make sure we're not writing a timestamp. */
@@ -289,7 +286,7 @@ void logger_log_part(struct logger *log, const struct part *p,
  * @param offset Pointer to the offset of the previous log of this particle;
  * (return) offset of this log.
  */
-void logger_log_gpart(struct logger *log, const struct gpart *p,
+void logger_log_gpart(struct logger_writer *log, const struct gpart *p,
                       unsigned int mask, size_t *offset) {
 
   /* Make sure we're not writing a timestamp. */
@@ -331,7 +328,7 @@ void logger_log_gpart(struct logger *log, const struct gpart *p,
 
   /* Particle constants, which is a bit more complicated. */
   if (mask & logger_mask_data[logger_consts].mask) {
-    // TODO make it dependent of logger_mask_data
+    // TODO make it dependent of logger_mask_data.
     memcpy(buff, &p->mass, sizeof(float));
     buff += sizeof(float);
     memcpy(buff, &p->id_or_neg_offset, sizeof(long long));
@@ -351,7 +348,7 @@ void logger_log_gpart(struct logger *log, const struct gpart *p,
  * @param offset Pointer to the offset of the previous log of this particle;
  * (return) offset of this log.
  */
-void logger_log_timestamp(struct logger *log, integertime_t timestamp,
+void logger_log_timestamp(struct logger_writer *log, integertime_t timestamp,
                           double time, size_t *offset) {
   struct dump *dump = &log->dump;
 
@@ -368,11 +365,11 @@ void logger_log_timestamp(struct logger *log, integertime_t timestamp,
   buff = logger_write_chunk_header(buff, &mask, offset, offset_new);
 
   /* Store the timestamp. */
-  // TODO make it dependent of logger_mask_data
+  // TODO make it dependent of logger_mask_data.
   memcpy(buff, &timestamp, sizeof(integertime_t));
   buff += sizeof(integertime_t);
 
-  /* Store the time */
+  /* Store the time. */
   memcpy(buff, &time, sizeof(double));
 
   /* Update the log message offset. */
@@ -390,21 +387,21 @@ void logger_log_timestamp(struct logger *log, integertime_t timestamp,
  * @param total_nr_gparts total number of gpart
  * @param total_nr_sparts total number of spart
  */
-void logger_ensure_size(struct logger *log, size_t total_nr_parts,
+void logger_ensure_size(struct logger_writer *log, size_t total_nr_parts,
                         size_t total_nr_gparts, size_t total_nr_sparts) {
 
-  /* count part memory */
+  /* count part memory. */
   size_t limit = log->max_chunk_size;
 
   limit *= total_nr_parts;
 
-  /* count gpart memory */
+  /* count gpart memory. */
   if (total_nr_gparts > 0) error("Not implemented");
 
-  /* count spart memory */
+  /* count spart memory. */
   if (total_nr_sparts > 0) error("Not implemented");
 
-  /* ensure enough space in dump */
+  /* ensure enough space in dump. */
   dump_ensure(&log->dump, limit, log->buffer_scale * limit);
 }
 
@@ -414,8 +411,8 @@ void logger_ensure_size(struct logger *log, size_t total_nr_parts,
  * @param log The #logger
  * @param params The #swift_params
  */
-void logger_init(struct logger *log, struct swift_params *params) {
-  /* read parameters */
+void logger_init(struct logger_writer *log, struct swift_params *params) {
+  /* read parameters. */
   log->delta_step = parser_get_param_int(params, "Logger:delta_step");
   size_t buffer_size =
       parser_get_opt_param_float(params, "Logger:initial_buffer_size", 0.5) *
@@ -424,24 +421,24 @@ void logger_init(struct logger *log, struct swift_params *params) {
       parser_get_opt_param_float(params, "Logger:buffer_scale", 10);
   parser_get_param_string(params, "Logger:basename", log->base_name);
 
-  /* set initial value of parameters */
+  /* set initial value of parameters. */
   log->timestamp_offset = 0;
 
-  /* generate dump filename */
+  /* generate dump filename. */
   char logger_name_file[PARSER_MAX_LINE_SIZE];
   strcpy(logger_name_file, log->base_name);
   strcat(logger_name_file, ".dump");
 
-  /* Compute max size for a particle chunk */
+  /* Compute max size for a particle chunk. */
   int max_size = logger_offset_size + logger_mask_size;
 
-  /* Loop over all fields except timestamp */
+  /* Loop over all fields except timestamp. */
   for (int i = 0; i < logger_count_mask - 1; i++) {
     max_size += logger_mask_data[i].size;
   }
   log->max_chunk_size = max_size;
 
-  /* init dump */
+  /* init dump. */
   dump_init(&log->dump, logger_name_file, buffer_size);
 }
 
@@ -450,18 +447,17 @@ void logger_init(struct logger *log, struct swift_params *params) {
  *
  * @param log The #logger
  */
-void logger_clean(struct logger *log) { dump_close(&log->dump); }
+void logger_free(struct logger_writer *log) { dump_close(&log->dump); }
 
 /**
  * @brief Write a file header to a logger file
  *
  * @param log The #logger
- * @param dump The #dump in which to log the particle data.
  *
  */
-void logger_write_file_header(struct logger *log, const struct engine *e) {
+void logger_write_file_header(struct logger_writer *log) {
 
-  /* get required variables */
+  /* get required variables. */
   struct dump *dump = &log->dump;
 
   size_t file_offset = dump->file_offset;
@@ -471,37 +467,46 @@ void logger_write_file_header(struct logger *log, const struct engine *e) {
         "The logger is not empty."
         "This function should be called before writing anything in the logger");
 
-  /* Write version information */
-  logger_write_data(dump, &file_offset, logger_version_size, &logger_version);
+  /* Write format information. */
+  logger_write_data(dump, &file_offset, logger_format_size,
+                    &logger_file_format);
+
+  /* Write the major version number. */
+  int major = logger_major_version;
+  logger_write_data(dump, &file_offset, sizeof(int), &major);
 
-  /* write offset direction */
+  /* Write the minor version number. */
+  int minor = logger_minor_version;
+  logger_write_data(dump, &file_offset, sizeof(int), &minor);
+
+  /* write offset direction. */
   const int reversed = 0;
-  logger_write_data(dump, &file_offset, logger_number_size, &reversed);
+  logger_write_data(dump, &file_offset, sizeof(int), &reversed);
 
-  /* placeholder to write the offset of the first log here */
+  /* placeholder to write the offset of the first log here. */
   char *skip_header = dump_get(dump, logger_offset_size, &file_offset);
 
-  /* write number of bytes used for names */
-  const int label_size = logger_label_size;
-  logger_write_data(dump, &file_offset, logger_number_size, &label_size);
+  /* write number of bytes used for names. */
+  const unsigned int label_size = logger_label_size;
+  logger_write_data(dump, &file_offset, sizeof(unsigned int), &label_size);
 
-  /* write number of masks */
-  int count_mask = logger_count_mask;
-  logger_write_data(dump, &file_offset, logger_number_size, &count_mask);
+  /* write number of masks. */
+  const unsigned int count_mask = logger_count_mask;
+  logger_write_data(dump, &file_offset, sizeof(unsigned int), &count_mask);
 
-  /* write masks */
-  // loop over all mask type
+  /* write masks. */
+  // loop over all mask type.
   for (int i = 0; i < logger_count_mask; i++) {
-    // mask name
+    // mask name.
     logger_write_data(dump, &file_offset, logger_label_size,
                       &logger_mask_data[i].name);
 
-    // mask size
-    logger_write_data(dump, &file_offset, logger_number_size,
+    // mask size.
+    logger_write_data(dump, &file_offset, sizeof(unsigned int),
                       &logger_mask_data[i].size);
   }
 
-  /* last step: write first offset */
+  /* last step: write first offset. */
   memcpy(skip_header, &file_offset, logger_offset_size);
 }
 
@@ -591,7 +596,7 @@ int logger_read_part(struct part *p, size_t *offset, const char *buff) {
 
   /* Particle constants, which is a bit more complicated. */
   if (mask & logger_mask_data[logger_rho].mask) {
-    // TODO make it dependent of logger_mask_data
+    // TODO make it dependent of logger_mask_data.
     memcpy(&p->mass, buff, sizeof(float));
     buff += sizeof(float);
     memcpy(&p->id, buff, sizeof(long long));
@@ -694,7 +699,7 @@ int logger_read_timestamp(unsigned long long int *t, double *time,
     error("Timestamp message contains extra fields.");
 
   /* Copy the timestamp value from the buffer. */
-  // TODO make it dependent of logger_mask_data
+  // TODO make it dependent of logger_mask_data.
   memcpy(t, buff, sizeof(unsigned long long int));
   buff += sizeof(unsigned long long int);
 
diff --git a/src/logger.h b/src/logger.h
index 56e2c8ab94c66b24df1800877bb9cfb129c3e645..ed2d6374fa9031f526e79e790572c89f6176df4b 100644
--- a/src/logger.h
+++ b/src/logger.h
@@ -28,13 +28,15 @@
 #include "timeline.h"
 #include "units.h"
 
-/* Forward declaration */
+/* Forward declaration. */
 struct dump;
 struct gpart;
 struct part;
-/* TODO remove dependency */
 struct engine;
 
+#define logger_major_version 0
+#define logger_minor_version 1
+
 /**
  * Logger entries contain messages representing the particle data at a given
  * point in time during the simulation.
@@ -82,16 +84,18 @@ enum logger_masks_number {
   logger_h = 4,
   logger_rho = 5,
   logger_consts = 6,
-  logger_timestamp = 7,  /* expect it to be before count */
-  logger_count_mask = 8, /* Need to be the last */
+  logger_timestamp = 7,  /* expect it to be before count. */
+  logger_count_mask = 8, /* Need to be the last. */
 } __attribute__((packed));
 
 struct mask_data {
-  /* Number of bytes for a mask */
+  /* Number of bytes for a mask. */
   int size;
-  /* Mask value */
+
+  /* Mask value. */
   unsigned int mask;
-  /* name of the mask */
+
+  /* Name of the mask. */
   char name[100];
 };
 
@@ -100,51 +104,52 @@ extern const struct mask_data logger_mask_data[logger_count_mask];
 /* Size of the strings. */
 #define logger_string_length 200
 
-/* structure containing global data */
-struct logger {
-  /* Number of particle steps between dumping a chunk of data */
+/* structure containing global data. */
+struct logger_writer {
+  /* Number of particle steps between dumping a chunk of data. */
   short int delta_step;
 
-  /* Logger basename */
+  /* Logger basename. */
   char base_name[logger_string_length];
 
-  /* Dump file */
+  /*  Dump file (In the reader, the dump is cleaned, therefore it is renamed
+   * logfile). */
   struct dump dump;
 
-  /* timestamp offset for logger*/
+  /* timestamp offset for logger. */
   size_t timestamp_offset;
 
-  /* scaling factor when buffer is too small */
+  /* scaling factor when buffer is too small. */
   float buffer_scale;
 
-  /* Size of a chunk if every mask are activated */
+  /* Size of a chunk if every mask are activated. */
   int max_chunk_size;
 
 } SWIFT_STRUCT_ALIGN;
 
-/* required structure for each particle type */
+/* required structure for each particle type. */
 struct logger_part_data {
-  /* Number of particle updates since last output */
+  /* Number of particle updates since last output. */
   int steps_since_last_output;
 
-  /* offset of last particle log entry */
+  /* offset of last particle log entry. */
   size_t last_offset;
 };
 
 /* Function prototypes. */
 int logger_compute_chunk_size(unsigned int mask);
-void logger_log_all(struct logger *log, const struct engine *e);
-void logger_log_part(struct logger *log, const struct part *p,
+void logger_log_all(struct logger_writer *log, const struct engine *e);
+void logger_log_part(struct logger_writer *log, const struct part *p,
                      unsigned int mask, size_t *offset);
-void logger_log_gpart(struct logger *log, const struct gpart *p,
+void logger_log_gpart(struct logger_writer *log, const struct gpart *p,
                       unsigned int mask, size_t *offset);
-void logger_init(struct logger *log, struct swift_params *params);
-void logger_clean(struct logger *log);
-void logger_log_timestamp(struct logger *log, integertime_t t, double time,
-                          size_t *offset);
-void logger_ensure_size(struct logger *log, size_t total_nr_parts,
+void logger_init(struct logger_writer *log, struct swift_params *params);
+void logger_free(struct logger_writer *log);
+void logger_log_timestamp(struct logger_writer *log, integertime_t t,
+                          double time, size_t *offset);
+void logger_ensure_size(struct logger_writer *log, size_t total_nr_parts,
                         size_t total_nr_gparts, size_t total_nr_sparts);
-void logger_write_file_header(struct logger *log, const struct engine *e);
+void logger_write_file_header(struct logger_writer *log);
 
 int logger_read_part(struct part *p, size_t *offset, const char *buff);
 int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff);
@@ -164,12 +169,14 @@ INLINE static void logger_part_data_init(struct logger_part_data *logger) {
 /**
  * @brief Should this particle write its data now ?
  *
- * @param xp The #xpart.
- * @param e The #engine containing information about the current time.
- * @return 1 if the #part should write, 0 otherwise.
+ * @param logger_data The #logger_part_data of a particle.
+ * @param log The #logger.
+ *
+ * @return 1 if the particule should be writen, 0 otherwise.
  */
 __attribute__((always_inline)) INLINE static int logger_should_write(
-    const struct logger_part_data *logger_data, const struct logger *log) {
+    const struct logger_part_data *logger_data,
+    const struct logger_writer *log) {
 
   return (logger_data->steps_since_last_output > log->delta_step);
 }
diff --git a/src/logger_io.c b/src/logger_io.c
index 3cef3497b2912411cea6763f5418bc76a7f5ece0..c6be1f292434c759e20064542e91caa2cd238a4d 100644
--- a/src/logger_io.c
+++ b/src/logger_io.c
@@ -21,7 +21,7 @@
 /* Config parameters. */
 #include "../config.h"
 
-#ifdef WITH_LOGGER
+#if defined(WITH_LOGGER) && defined(HAVE_HDF5) && !defined(WITH_MPI)
 
 /* Some standard headers. */
 #include <hdf5.h>
@@ -87,7 +87,7 @@ void write_index_single(struct engine* e, const char* baseName,
   // struct spart* sparts = e->s->sparts;
   static int outputCount = 0;
 
-  struct logger* log = e->logger;
+  struct logger_writer* log = e->logger;
 
   /* Number of unassociated gparts */
   const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0;
@@ -296,4 +296,4 @@ void write_index_single(struct engine* e, const char* baseName,
   ++outputCount;
 }
 
-#endif /* HAVE_HDF5 */
+#endif /* WITH_LOGGER && HAVE_HDF5 && !WITH_MPI */
diff --git a/src/logger_io.h b/src/logger_io.h
index f5b1274fb7b957d5b48bc8425bf784c586ac6a08..a424c5c104b9f1090c69f7e0bb37e72635636f82 100644
--- a/src/logger_io.h
+++ b/src/logger_io.h
@@ -50,11 +50,13 @@ __attribute__((always_inline)) INLINE static void hydro_write_index(
   *num_fields = 2;
 
   /* List what we want to write */
-  list[0] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
-                                 UNIT_CONV_NO_UNITS, parts, id);
+  list[0] =
+      io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f,
+                           parts, id, "will be erased");
 
-  list[1] = io_make_output_field("Offset", ULONGLONG, 1, UNIT_CONV_NO_UNITS,
-                                 xparts, logger_data.last_offset);
+  list[1] =
+      io_make_output_field("Offset", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f,
+                           xparts, logger_data.last_offset, "will be erased");
 }
 #endif
 
diff --git a/src/parallel_io.c b/src/parallel_io.c
index ccba33d07500f4e22e365942622f6392cfbb0166..d469de729bd08c79889b031e9d25d796cabad28e 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -680,6 +680,8 @@ void writeArray(struct engine* e, hid_t grp, char* fileName,
  * @param bparts (output) The array of #bpart read from the file.
  * @param Ngas (output) The number of particles read from the file.
  * @param Ngparts (output) The number of particles read from the file.
+ * @param Ngparts_background (output) The number of background DM particles read
+ * from the file.
  * @param Nstars (output) The number of particles read from the file.
  * @param Nblackholes (output) The number of particles read from the file.
  * @param flag_entropy (output) 1 if the ICs contained Entropy in the
diff --git a/src/runner.c b/src/runner.c
deleted file mode 100644
index db7e512873b51a7329e19e75a763b69521efb0eb..0000000000000000000000000000000000000000
--- a/src/runner.c
+++ /dev/null
@@ -1,5048 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
- *               2016 John A. Regan (john.a.regan@durham.ac.uk)
- *                    Tom Theuns (tom.theuns@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- ******************************************************************************/
-
-/* Config parameters. */
-#include "../config.h"
-
-/* Some standard headers. */
-#include <float.h>
-#include <limits.h>
-#include <stdlib.h>
-
-/* MPI headers. */
-#ifdef WITH_MPI
-#include <mpi.h>
-#endif
-
-/* This object's header. */
-#include "runner.h"
-
-/* Local headers. */
-#include "active.h"
-#include "approx_math.h"
-#include "atomic.h"
-#include "black_holes.h"
-#include "black_holes_properties.h"
-#include "cell.h"
-#include "chemistry.h"
-#include "const.h"
-#include "cooling.h"
-#include "debug.h"
-#include "drift.h"
-#include "engine.h"
-#include "entropy_floor.h"
-#include "error.h"
-#include "feedback.h"
-#include "gravity.h"
-#include "hydro.h"
-#include "hydro_properties.h"
-#include "kick.h"
-#include "logger.h"
-#include "memuse.h"
-#include "minmax.h"
-#include "pressure_floor.h"
-#include "pressure_floor_iact.h"
-#include "runner_doiact_vec.h"
-#include "scheduler.h"
-#include "sort_part.h"
-#include "space.h"
-#include "space_getsid.h"
-#include "star_formation.h"
-#include "star_formation_logger.h"
-#include "stars.h"
-#include "task.h"
-#include "timers.h"
-#include "timestep.h"
-#include "timestep_limiter.h"
-#include "tracers.h"
-
-/* Unique identifier of loop types */
-#define TASK_LOOP_DENSITY 0
-#define TASK_LOOP_GRADIENT 1
-#define TASK_LOOP_FORCE 2
-#define TASK_LOOP_LIMITER 3
-#define TASK_LOOP_FEEDBACK 4
-#define TASK_LOOP_SWALLOW 5
-
-/* Import the density loop functions. */
-#define FUNCTION density
-#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-
-/* Import the gradient loop functions (if required). */
-#ifdef EXTRA_HYDRO_LOOP
-#define FUNCTION gradient
-#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-#endif
-
-/* Import the force loop functions. */
-#define FUNCTION force
-#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-
-/* Import the limiter loop functions. */
-#define FUNCTION limiter
-#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
-#include "runner_doiact.h"
-#undef FUNCTION
-#undef FUNCTION_TASK_LOOP
-
-/* Import the gravity loop functions. */
-#include "runner_doiact_grav.h"
-
-/* Import the stars density loop functions. */
-#define FUNCTION density
-#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
-#include "runner_doiact_stars.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the stars feedback loop functions. */
-#define FUNCTION feedback
-#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
-#include "runner_doiact_stars.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the black hole density loop functions. */
-#define FUNCTION density
-#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
-#include "runner_doiact_black_holes.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the black hole feedback loop functions. */
-#define FUNCTION swallow
-#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW
-#include "runner_doiact_black_holes.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/* Import the black hole feedback loop functions. */
-#define FUNCTION feedback
-#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
-#include "runner_doiact_black_holes.h"
-#undef FUNCTION_TASK_LOOP
-#undef FUNCTION
-
-/**
- * @brief Intermediate task after the density to check that the smoothing
- * lengths are correct.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) {
-
-  struct spart *restrict sparts = c->stars.parts;
-  const struct engine *e = r->e;
-  const struct unit_system *us = e->internal_units;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const struct cosmology *cosmo = e->cosmology;
-  const struct feedback_props *feedback_props = e->feedback_props;
-  const float stars_h_max = e->hydro_properties->h_max;
-  const float stars_h_min = e->hydro_properties->h_min;
-  const float eps = e->stars_properties->h_tolerance;
-  const float stars_eta_dim =
-      pow_dimension(e->stars_properties->eta_neighbours);
-  const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations;
-  int redo = 0, scount = 0;
-
-  /* Running value of the maximal smoothing length */
-  double h_max = c->stars.h_max;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != e->nodeID)
-    error("Running the star ghost on a foreign node!");
-#endif
-
-  /* Anything to do here? */
-  if (c->stars.count == 0) return;
-  if (!cell_is_active_stars(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        runner_do_stars_ghost(r, c->progeny[k], 0);
-
-        /* Update h_max */
-        h_max = max(h_max, c->progeny[k]->stars.h_max);
-      }
-    }
-  } else {
-
-    /* Init the list of active particles that have to be updated. */
-    int *sid = NULL;
-    float *h_0 = NULL;
-    float *left = NULL;
-    float *right = NULL;
-    if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL)
-      error("Can't allocate memory for sid.");
-    if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
-      error("Can't allocate memory for h_0.");
-    if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
-      error("Can't allocate memory for left.");
-    if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
-      error("Can't allocate memory for right.");
-    for (int k = 0; k < c->stars.count; k++)
-      if (spart_is_active(&sparts[k], e) &&
-          feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) {
-        sid[scount] = k;
-        h_0[scount] = sparts[k].h;
-        left[scount] = 0.f;
-        right[scount] = stars_h_max;
-        ++scount;
-      }
-
-    /* While there are particles that need to be updated... */
-    for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter;
-         num_reruns++) {
-
-      /* Reset the redo-count. */
-      redo = 0;
-
-      /* Loop over the remaining active parts in this cell. */
-      for (int i = 0; i < scount; i++) {
-
-        /* Get a direct pointer on the part. */
-        struct spart *sp = &sparts[sid[i]];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Is this part within the timestep? */
-        if (!spart_is_active(sp, e))
-          error("Ghost applied to inactive particle");
-#endif
-
-        /* Get some useful values */
-        const float h_init = h_0[i];
-        const float h_old = sp->h;
-        const float h_old_dim = pow_dimension(h_old);
-        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
-
-        float h_new;
-        int has_no_neighbours = 0;
-
-        if (sp->density.wcount == 0.f) { /* No neighbours case */
-
-          /* Flag that there were no neighbours */
-          has_no_neighbours = 1;
-
-          /* Double h and try again */
-          h_new = 2.f * h_old;
-
-        } else {
-
-          /* Finish the density calculation */
-          stars_end_density(sp, cosmo);
-
-          /* Compute one step of the Newton-Raphson scheme */
-          const float n_sum = sp->density.wcount * h_old_dim;
-          const float n_target = stars_eta_dim;
-          const float f = n_sum - n_target;
-          const float f_prime =
-              sp->density.wcount_dh * h_old_dim +
-              hydro_dimension * sp->density.wcount * h_old_dim_minus_one;
-
-          /* Improve the bisection bounds */
-          if (n_sum < n_target)
-            left[i] = max(left[i], h_old);
-          else if (n_sum > n_target)
-            right[i] = min(right[i], h_old);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Check the validity of the left and right bounds */
-          if (left[i] > right[i])
-            error("Invalid left (%e) and right (%e)", left[i], right[i]);
-#endif
-
-          /* Skip if h is already h_max and we don't have enough neighbours */
-          /* Same if we are below h_min */
-          if (((sp->h >= stars_h_max) && (f < 0.f)) ||
-              ((sp->h <= stars_h_min) && (f > 0.f))) {
-
-            stars_reset_feedback(sp);
-
-            /* Only do feedback if stars have a reasonable birth time */
-            if (feedback_do_feedback(sp)) {
-
-              const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-              const integertime_t ti_begin =
-                  get_integer_time_begin(e->ti_current - 1, sp->time_bin);
-
-              /* Get particle time-step */
-              double dt;
-              if (with_cosmology) {
-                dt = cosmology_get_delta_time(e->cosmology, ti_begin,
-                                              ti_begin + ti_step);
-              } else {
-                dt = get_timestep(sp->time_bin, e->time_base);
-              }
-
-              /* Calculate age of the star at current time */
-              double star_age_end_of_step;
-              if (with_cosmology) {
-                star_age_end_of_step =
-                    cosmology_get_delta_time_from_scale_factors(
-                        cosmo, (double)sp->birth_scale_factor, cosmo->a);
-              } else {
-                star_age_end_of_step = (float)e->time - sp->birth_time;
-              }
-
-              /* Has this star been around for a while ? */
-              if (star_age_end_of_step > 0.) {
-
-                /* Age of the star at the start of the step */
-                const double star_age_beg_of_step =
-                    max(star_age_end_of_step - dt, 0.);
-
-                /* Compute the stellar evolution  */
-                feedback_evolve_spart(sp, feedback_props, cosmo, us,
-                                      star_age_beg_of_step, dt);
-              } else {
-
-                /* Reset the feedback fields of the star particle */
-                feedback_reset_feedback(sp, feedback_props);
-              }
-            } else {
-
-              feedback_reset_feedback(sp, feedback_props);
-            }
-
-            /* Ok, we are done with this particle */
-            continue;
-          }
-
-          /* Normal case: Use Newton-Raphson to get a better value of h */
-
-          /* Avoid floating point exception from f_prime = 0 */
-          h_new = h_old - f / (f_prime + FLT_MIN);
-
-          /* Be verbose about the particles that struggle to converge */
-          if (num_reruns > max_smoothing_iter - 10) {
-
-            message(
-                "Smoothing length convergence problem: iter=%d p->id=%lld "
-                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
-                num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum,
-                n_target, left[i], right[i]);
-          }
-
-          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
-          h_new = min(h_new, 2.f * h_old);
-          h_new = max(h_new, 0.5f * h_old);
-
-          /* Verify that we are actually progrssing towards the answer */
-          h_new = max(h_new, left[i]);
-          h_new = min(h_new, right[i]);
-        }
-
-        /* Check whether the particle has an inappropriate smoothing length */
-        if (fabsf(h_new - h_old) > eps * h_old) {
-
-          /* Ok, correct then */
-
-          /* Case where we have been oscillating around the solution */
-          if ((h_new == left[i] && h_old == right[i]) ||
-              (h_old == left[i] && h_new == right[i])) {
-
-            /* Bissect the remaining interval */
-            sp->h = pow_inv_dimension(
-                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
-
-          } else {
-
-            /* Normal case */
-            sp->h = h_new;
-          }
-
-          /* If below the absolute maximum, try again */
-          if (sp->h < stars_h_max && sp->h > stars_h_min) {
-
-            /* Flag for another round of fun */
-            sid[redo] = sid[i];
-            h_0[redo] = h_0[i];
-            left[redo] = left[i];
-            right[redo] = right[i];
-            redo += 1;
-
-            /* Re-initialise everything */
-            stars_init_spart(sp);
-            feedback_init_spart(sp);
-
-            /* Off we go ! */
-            continue;
-
-          } else if (sp->h <= stars_h_min) {
-
-            /* Ok, this particle is a lost cause... */
-            sp->h = stars_h_min;
-
-          } else if (sp->h >= stars_h_max) {
-
-            /* Ok, this particle is a lost cause... */
-            sp->h = stars_h_max;
-
-            /* Do some damage control if no neighbours at all were found */
-            if (has_no_neighbours) {
-              stars_spart_has_no_neighbours(sp, cosmo);
-            }
-
-          } else {
-            error(
-                "Fundamental problem with the smoothing length iteration "
-                "logic.");
-          }
-        }
-
-        /* We now have a particle whose smoothing length has converged */
-
-        /* Check if h_max has increased */
-        h_max = max(h_max, sp->h);
-
-        stars_reset_feedback(sp);
-
-        /* Only do feedback if stars have a reasonable birth time */
-        if (feedback_do_feedback(sp)) {
-
-          const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(e->ti_current - 1, sp->time_bin);
-
-          /* Get particle time-step */
-          double dt;
-          if (with_cosmology) {
-            dt = cosmology_get_delta_time(e->cosmology, ti_begin,
-                                          ti_begin + ti_step);
-          } else {
-            dt = get_timestep(sp->time_bin, e->time_base);
-          }
-
-          /* Calculate age of the star at current time */
-          double star_age_end_of_step;
-          if (with_cosmology) {
-            star_age_end_of_step = cosmology_get_delta_time_from_scale_factors(
-                cosmo, sp->birth_scale_factor, (float)cosmo->a);
-          } else {
-            star_age_end_of_step = (float)e->time - sp->birth_time;
-          }
-
-          /* Has this star been around for a while ? */
-          if (star_age_end_of_step > 0.) {
-
-            /* Age of the star at the start of the step */
-            const double star_age_beg_of_step =
-                max(star_age_end_of_step - dt, 0.);
-
-            /* Compute the stellar evolution  */
-            feedback_evolve_spart(sp, feedback_props, cosmo, us,
-                                  star_age_beg_of_step, dt);
-          } else {
-
-            /* Reset the feedback fields of the star particle */
-            feedback_reset_feedback(sp, feedback_props);
-          }
-        } else {
-
-          /* Reset the feedback fields of the star particle */
-          feedback_reset_feedback(sp, feedback_props);
-        }
-      }
-
-      /* We now need to treat the particles whose smoothing length had not
-       * converged again */
-
-      /* Re-set the counter for the next loop (potentially). */
-      scount = redo;
-      if (scount > 0) {
-
-        /* Climb up the cell hierarchy. */
-        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
-
-          /* Run through this cell's density interactions. */
-          for (struct link *l = finger->stars.density; l != NULL; l = l->next) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-            if (l->t->ti_run < r->e->ti_current)
-              error("Density task should have been run.");
-#endif
-
-            /* Self-interaction? */
-            if (l->t->type == task_type_self)
-              runner_doself_subset_branch_stars_density(r, finger, sparts, sid,
-                                                        scount);
-
-            /* Otherwise, pair interaction? */
-            else if (l->t->type == task_type_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dopair_subset_branch_stars_density(
-                    r, finger, sparts, sid, scount, l->t->cj);
-              else
-                runner_dopair_subset_branch_stars_density(
-                    r, finger, sparts, sid, scount, l->t->ci);
-            }
-
-            /* Otherwise, sub-self interaction? */
-            else if (l->t->type == task_type_sub_self)
-              runner_dosub_subset_stars_density(r, finger, sparts, sid, scount,
-                                                NULL, 1);
-
-            /* Otherwise, sub-pair interaction? */
-            else if (l->t->type == task_type_sub_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dosub_subset_stars_density(r, finger, sparts, sid,
-                                                  scount, l->t->cj, 1);
-              else
-                runner_dosub_subset_stars_density(r, finger, sparts, sid,
-                                                  scount, l->t->ci, 1);
-            }
-          }
-        }
-      }
-    }
-
-    if (scount) {
-      error("Smoothing length failed to converge on %i particles.", scount);
-    }
-
-    /* Be clean */
-    free(left);
-    free(right);
-    free(sid);
-    free(h_0);
-  }
-
-  /* Update h_max */
-  c->stars.h_max = h_max;
-
-  /* The ghost may not always be at the top level.
-   * Therefore we need to update h_max between the super- and top-levels */
-  if (c->stars.ghost) {
-    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
-      atomic_max_d(&tmp->stars.h_max, h_max);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_stars_ghost);
-}
-
-/**
- * @brief Intermediate task after the density to check that the smoothing
- * lengths are correct.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c,
-                                         int timer) {
-
-  struct bpart *restrict bparts = c->black_holes.parts;
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const float black_holes_h_max = e->hydro_properties->h_max;
-  const float black_holes_h_min = e->hydro_properties->h_min;
-  const float eps = e->black_holes_properties->h_tolerance;
-  const float black_holes_eta_dim =
-      pow_dimension(e->black_holes_properties->eta_neighbours);
-  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
-  int redo = 0, bcount = 0;
-
-  /* Running value of the maximal smoothing length */
-  double h_max = c->black_holes.h_max;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (c->black_holes.count == 0) return;
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        runner_do_black_holes_density_ghost(r, c->progeny[k], 0);
-
-        /* Update h_max */
-        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
-      }
-    }
-  } else {
-
-    /* Init the list of active particles that have to be updated. */
-    int *sid = NULL;
-    float *h_0 = NULL;
-    float *left = NULL;
-    float *right = NULL;
-    if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for sid.");
-    if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for h_0.");
-    if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for left.");
-    if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
-      error("Can't allocate memory for right.");
-    for (int k = 0; k < c->black_holes.count; k++)
-      if (bpart_is_active(&bparts[k], e)) {
-        sid[bcount] = k;
-        h_0[bcount] = bparts[k].h;
-        left[bcount] = 0.f;
-        right[bcount] = black_holes_h_max;
-        ++bcount;
-      }
-
-    /* While there are particles that need to be updated... */
-    for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter;
-         num_reruns++) {
-
-      /* Reset the redo-count. */
-      redo = 0;
-
-      /* Loop over the remaining active parts in this cell. */
-      for (int i = 0; i < bcount; i++) {
-
-        /* Get a direct pointer on the part. */
-        struct bpart *bp = &bparts[sid[i]];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Is this part within the timestep? */
-        if (!bpart_is_active(bp, e))
-          error("Ghost applied to inactive particle");
-#endif
-
-        /* Get some useful values */
-        const float h_init = h_0[i];
-        const float h_old = bp->h;
-        const float h_old_dim = pow_dimension(h_old);
-        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
-
-        float h_new;
-        int has_no_neighbours = 0;
-
-        if (bp->density.wcount == 0.f) { /* No neighbours case */
-
-          /* Flag that there were no neighbours */
-          has_no_neighbours = 1;
-
-          /* Double h and try again */
-          h_new = 2.f * h_old;
-
-        } else {
-
-          /* Finish the density calculation */
-          black_holes_end_density(bp, cosmo);
-
-          /* Compute one step of the Newton-Raphson scheme */
-          const float n_sum = bp->density.wcount * h_old_dim;
-          const float n_target = black_holes_eta_dim;
-          const float f = n_sum - n_target;
-          const float f_prime =
-              bp->density.wcount_dh * h_old_dim +
-              hydro_dimension * bp->density.wcount * h_old_dim_minus_one;
-
-          /* Improve the bisection bounds */
-          if (n_sum < n_target)
-            left[i] = max(left[i], h_old);
-          else if (n_sum > n_target)
-            right[i] = min(right[i], h_old);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Check the validity of the left and right bounds */
-          if (left[i] > right[i])
-            error("Invalid left (%e) and right (%e)", left[i], right[i]);
-#endif
-
-          /* Skip if h is already h_max and we don't have enough neighbours */
-          /* Same if we are below h_min */
-          if (((bp->h >= black_holes_h_max) && (f < 0.f)) ||
-              ((bp->h <= black_holes_h_min) && (f > 0.f))) {
-
-            black_holes_reset_feedback(bp);
-
-            /* Ok, we are done with this particle */
-            continue;
-          }
-
-          /* Normal case: Use Newton-Raphson to get a better value of h */
-
-          /* Avoid floating point exception from f_prime = 0 */
-          h_new = h_old - f / (f_prime + FLT_MIN);
-
-          /* Be verbose about the particles that struggle to converge */
-          if (num_reruns > max_smoothing_iter - 10) {
-
-            message(
-                "Smoothing length convergence problem: iter=%d p->id=%lld "
-                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
-                num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum,
-                n_target, left[i], right[i]);
-          }
-
-          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
-          h_new = min(h_new, 2.f * h_old);
-          h_new = max(h_new, 0.5f * h_old);
-
-          /* Verify that we are actually progrssing towards the answer */
-          h_new = max(h_new, left[i]);
-          h_new = min(h_new, right[i]);
-        }
-
-        /* Check whether the particle has an inappropriate smoothing length */
-        if (fabsf(h_new - h_old) > eps * h_old) {
-
-          /* Ok, correct then */
-
-          /* Case where we have been oscillating around the solution */
-          if ((h_new == left[i] && h_old == right[i]) ||
-              (h_old == left[i] && h_new == right[i])) {
-
-            /* Bissect the remaining interval */
-            bp->h = pow_inv_dimension(
-                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
-
-          } else {
-
-            /* Normal case */
-            bp->h = h_new;
-          }
-
-          /* If below the absolute maximum, try again */
-          if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) {
-
-            /* Flag for another round of fun */
-            sid[redo] = sid[i];
-            h_0[redo] = h_0[i];
-            left[redo] = left[i];
-            right[redo] = right[i];
-            redo += 1;
-
-            /* Re-initialise everything */
-            black_holes_init_bpart(bp);
-
-            /* Off we go ! */
-            continue;
-
-          } else if (bp->h <= black_holes_h_min) {
-
-            /* Ok, this particle is a lost cause... */
-            bp->h = black_holes_h_min;
-
-          } else if (bp->h >= black_holes_h_max) {
-
-            /* Ok, this particle is a lost cause... */
-            bp->h = black_holes_h_max;
-
-            /* Do some damage control if no neighbours at all were found */
-            if (has_no_neighbours) {
-              black_holes_bpart_has_no_neighbours(bp, cosmo);
-            }
-
-          } else {
-            error(
-                "Fundamental problem with the smoothing length iteration "
-                "logic.");
-          }
-        }
-
-        /* We now have a particle whose smoothing length has converged */
-
-        black_holes_reset_feedback(bp);
-
-        /* Check if h_max has increased */
-        h_max = max(h_max, bp->h);
-      }
-
-      /* We now need to treat the particles whose smoothing length had not
-       * converged again */
-
-      /* Re-set the counter for the next loop (potentially). */
-      bcount = redo;
-      if (bcount > 0) {
-
-        /* Climb up the cell hierarchy. */
-        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
-
-          /* Run through this cell's density interactions. */
-          for (struct link *l = finger->black_holes.density; l != NULL;
-               l = l->next) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-            if (l->t->ti_run < r->e->ti_current)
-              error("Density task should have been run.");
-#endif
-
-            /* Self-interaction? */
-            if (l->t->type == task_type_self)
-              runner_doself_subset_branch_bh_density(r, finger, bparts, sid,
-                                                     bcount);
-
-            /* Otherwise, pair interaction? */
-            else if (l->t->type == task_type_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
-                                                       bcount, l->t->cj);
-              else
-                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
-                                                       bcount, l->t->ci);
-            }
-
-            /* Otherwise, sub-self interaction? */
-            else if (l->t->type == task_type_sub_self)
-              runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
-                                             NULL, 1);
-
-            /* Otherwise, sub-pair interaction? */
-            else if (l->t->type == task_type_sub_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
-                                               l->t->cj, 1);
-              else
-                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
-                                               l->t->ci, 1);
-            }
-          }
-        }
-      }
-    }
-
-    if (bcount) {
-      error("Smoothing length failed to converge on %i particles.", bcount);
-    }
-
-    /* Be clean */
-    free(left);
-    free(right);
-    free(sid);
-    free(h_0);
-  }
-
-  /* Update h_max */
-  c->black_holes.h_max = h_max;
-
-  /* The ghost may not always be at the top level.
-   * Therefore we need to update h_max between the super- and top-levels */
-  if (c->black_holes.density_ghost) {
-    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
-      atomic_max_d(&tmp->black_holes.h_max, h_max);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
-}
-
-/**
- * @brief Intermediate task after the BHs have done their swallowing step.
- * This is used to update the BH quantities if necessary.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c,
-                                         int timer) {
-
-  struct bpart *restrict bparts = c->black_holes.parts;
-  const int count = c->black_holes.count;
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL)
-        runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int i = 0; i < count; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct bpart *bp = &bparts[i];
-
-      if (bpart_is_active(bp, e)) {
-
-        /* Compute the final operations for repositioning of this BH */
-        black_holes_end_reposition(bp, e->black_holes_properties,
-                                   e->physical_constants, e->cosmology);
-
-        /* Get particle time-step */
-        double dt;
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(bp->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(e->ti_current - 1, bp->time_bin);
-
-          dt = cosmology_get_delta_time(e->cosmology, ti_begin,
-                                        ti_begin + ti_step);
-        } else {
-          dt = get_timestep(bp->time_bin, e->time_base);
-        }
-
-        /* Compute variables required for the feedback loop */
-        black_holes_prepare_feedback(bp, e->black_holes_properties,
-                                     e->physical_constants, e->cosmology, dt);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
-}
-
-/**
- * @brief Calculate gravity acceleration from external potential
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
-
-  struct gpart *restrict gparts = c->grav.parts;
-  const int gcount = c->grav.count;
-  const struct engine *e = r->e;
-  const struct external_potential *potential = e->external_potential;
-  const struct phys_const *constants = e->physical_constants;
-  const double time = r->e->time;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the gparts in this cell. */
-    for (int i = 0; i < gcount; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct gpart *restrict gp = &gparts[i];
-
-      /* Is this part within the time step? */
-      if (gpart_is_active(gp, e)) {
-        external_gravity_acceleration(time, potential, constants, gp);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_dograv_external);
-}
-
-/**
- * @brief Calculate gravity accelerations from the periodic mesh
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) {
-
-  struct gpart *restrict gparts = c->grav.parts;
-  const int gcount = c->grav.count;
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic) error("Calling mesh forces in non-periodic mode.");
-#endif
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0);
-  } else {
-
-    /* Get the forces from the gravity mesh */
-    pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount);
-  }
-
-  if (timer) TIMER_TOC(timer_dograv_mesh);
-}
-
-/**
- * @brief Calculate change in thermal state of particles induced
- * by radiative cooling and heating.
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const struct cooling_function_data *cooling_func = e->cooling_func;
-  const struct phys_const *constants = e->physical_constants;
-  const struct unit_system *us = e->internal_units;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-  const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor;
-  const double time_base = e->time_base;
-  const integertime_t ti_current = e->ti_current;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int count = c->hydro.count;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int i = 0; i < count; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct part *restrict p = &parts[i];
-      struct xpart *restrict xp = &xparts[i];
-
-      if (part_is_active(p, e)) {
-
-        double dt_cool, dt_therm;
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(p->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(ti_current - 1, p->time_bin);
-
-          dt_cool =
-              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-          dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin,
-                                                     ti_begin + ti_step);
-
-        } else {
-          dt_cool = get_timestep(p->time_bin, time_base);
-          dt_therm = get_timestep(p->time_bin, time_base);
-        }
-
-        /* Let's cool ! */
-        cooling_cool_part(constants, us, cosmo, hydro_props,
-                          entropy_floor_props, cooling_func, p, xp, dt_cool,
-                          dt_therm);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_cooling);
-}
-
-/**
- *
- */
-void runner_do_star_formation(struct runner *r, struct cell *c, int timer) {
-
-  struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct star_formation *sf_props = e->star_formation;
-  const struct phys_const *phys_const = e->physical_constants;
-  const int count = c->hydro.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const int with_feedback = (e->policy & engine_policy_feedback);
-  const struct hydro_props *restrict hydro_props = e->hydro_properties;
-  const struct unit_system *restrict us = e->internal_units;
-  struct cooling_function_data *restrict cooling = e->cooling_func;
-  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
-  const double time_base = e->time_base;
-  const integertime_t ti_current = e->ti_current;
-  const int current_stars_count = c->stars.count;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != e->nodeID)
-    error("Running star formation task on a foreign node!");
-#endif
-
-  /* Anything to do here? */
-  if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) {
-    star_formation_logger_log_inactive_cell(&c->stars.sfh);
-    return;
-  }
-
-  /* Reset the SFR */
-  star_formation_logger_init(&c->stars.sfh);
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) {
-        /* Load the child cell */
-        struct cell *restrict cp = c->progeny[k];
-
-        /* Do the recursion */
-        runner_do_star_formation(r, cp, 0);
-
-        /* Update current cell using child cells */
-        star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh);
-      }
-  } else {
-
-    /* Loop over the gas particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* Only work on active particles */
-      if (part_is_active(p, e)) {
-
-        /* Is this particle star forming? */
-        if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo,
-                                           hydro_props, us, cooling,
-                                           entropy_floor)) {
-
-          /* Time-step size for this particle */
-          double dt_star;
-          if (with_cosmology) {
-            const integertime_t ti_step = get_integer_timestep(p->time_bin);
-            const integertime_t ti_begin =
-                get_integer_time_begin(ti_current - 1, p->time_bin);
-
-            dt_star =
-                cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-
-          } else {
-            dt_star = get_timestep(p->time_bin, time_base);
-          }
-
-          /* Compute the SF rate of the particle */
-          star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo,
-                                     dt_star);
-
-          /* Add the SFR and SFR*dt to the SFH struct of this cell */
-          star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star);
-
-          /* Are we forming a star particle from this SF rate? */
-          if (star_formation_should_convert_to_star(p, xp, sf_props, e,
-                                                    dt_star)) {
-
-            /* Convert the gas particle to a star particle */
-            struct spart *sp = cell_convert_part_to_spart(e, c, p, xp);
-
-            /* Did we get a star? (Or did we run out of spare ones?) */
-            if (sp != NULL) {
-
-              /* message("We formed a star id=%lld cellID=%d", sp->id,
-               * c->cellID); */
-
-              /* Copy the properties of the gas particle to the star particle */
-              star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo,
-                                             with_cosmology, phys_const,
-                                             hydro_props, us, cooling);
-
-              /* Update the Star formation history */
-              star_formation_logger_log_new_spart(sp, &c->stars.sfh);
-            }
-          }
-
-        } else { /* Are we not star-forming? */
-
-          /* Update the particle to flag it as not star-forming */
-          star_formation_update_part_not_SFR(p, xp, e, sf_props,
-                                             with_cosmology);
-
-        } /* Not Star-forming? */
-
-      } else { /* is active? */
-
-        /* Check if the particle is not inhibited */
-        if (!part_is_inhibited(p, e)) {
-          star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh);
-        }
-      }
-    } /* Loop over particles */
-  }
-
-  /* If we formed any stars, the star sorts are now invalid. We need to
-   * re-compute them. */
-  if (with_feedback && (c == c->top) &&
-      (current_stars_count != c->stars.count)) {
-    cell_set_star_resort_flag(c);
-  }
-
-  if (timer) TIMER_TOC(timer_do_star_formation);
-}
-
-/**
- * @brief Sorts again all the stars in a given cell hierarchy.
- *
- * This is intended to be used after the star formation task has been run
- * to get the cells back into a state where self/pair star tasks can be run.
- *
- * @param r The thread #runner.
- * @param c The top-level cell to run on.
- * @param timer Are we timing this?
- */
-void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != r->e->nodeID) error("Task must be run locally!");
-#endif
-
-  TIMER_TIC;
-
-  /* Did we demand a recalculation of the stars'sorts? */
-  if (cell_get_flag(c, cell_flag_do_stars_resort)) {
-    runner_do_all_stars_sort(r, c);
-    cell_clear_flag(c, cell_flag_do_stars_resort);
-  }
-
-  if (timer) TIMER_TOC(timer_do_stars_resort);
-}
-
-/**
- * @brief Sort the entries in ascending order using QuickSort.
- *
- * @param sort The entries
- * @param N The number of entries.
- */
-void runner_do_sort_ascending(struct sort_entry *sort, int N) {
-
-  struct {
-    short int lo, hi;
-  } qstack[10];
-  int qpos, i, j, lo, hi, imin;
-  struct sort_entry temp;
-  float pivot;
-
-  /* Sort parts in cell_i in decreasing order with quicksort */
-  qstack[0].lo = 0;
-  qstack[0].hi = N - 1;
-  qpos = 0;
-  while (qpos >= 0) {
-    lo = qstack[qpos].lo;
-    hi = qstack[qpos].hi;
-    qpos -= 1;
-    if (hi - lo < 15) {
-      for (i = lo; i < hi; i++) {
-        imin = i;
-        for (j = i + 1; j <= hi; j++)
-          if (sort[j].d < sort[imin].d) imin = j;
-        if (imin != i) {
-          temp = sort[imin];
-          sort[imin] = sort[i];
-          sort[i] = temp;
-        }
-      }
-    } else {
-      pivot = sort[(lo + hi) / 2].d;
-      i = lo;
-      j = hi;
-      while (i <= j) {
-        while (sort[i].d < pivot) i++;
-        while (sort[j].d > pivot) j--;
-        if (i <= j) {
-          if (i < j) {
-            temp = sort[i];
-            sort[i] = sort[j];
-            sort[j] = temp;
-          }
-          i += 1;
-          j -= 1;
-        }
-      }
-      if (j > (lo + hi) / 2) {
-        if (lo < j) {
-          qpos += 1;
-          qstack[qpos].lo = lo;
-          qstack[qpos].hi = j;
-        }
-        if (i < hi) {
-          qpos += 1;
-          qstack[qpos].lo = i;
-          qstack[qpos].hi = hi;
-        }
-      } else {
-        if (i < hi) {
-          qpos += 1;
-          qstack[qpos].lo = i;
-          qstack[qpos].hi = hi;
-        }
-        if (lo < j) {
-          qpos += 1;
-          qstack[qpos].lo = lo;
-          qstack[qpos].hi = j;
-        }
-      }
-    }
-  }
-}
-
-#ifdef SWIFT_DEBUG_CHECKS
-/**
- * @brief Recursively checks that the flags are consistent in a cell hierarchy.
- *
- * Debugging function. Exists in two flavours: hydro & stars.
- */
-#define RUNNER_CHECK_SORTS(TYPE)                                               \
-  void runner_check_sorts_##TYPE(struct cell *c, int flags) {                  \
-                                                                               \
-    if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \
-    if (c->split)                                                              \
-      for (int k = 0; k < 8; k++)                                              \
-        if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0)            \
-          runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted);            \
-  }
-#else
-#define RUNNER_CHECK_SORTS(TYPE)                                       \
-  void runner_check_sorts_##TYPE(struct cell *c, int flags) {          \
-    error("Calling debugging code without debugging flag activated."); \
-  }
-#endif
-
-RUNNER_CHECK_SORTS(hydro)
-RUNNER_CHECK_SORTS(stars)
-
-/**
- * @brief Sort the particles in the given cell along all cardinal directions.
- *
- * @param r The #runner.
- * @param c The #cell.
- * @param flags Cell flag.
- * @param cleanup If true, re-build the sorts for the selected flags instead
- *        of just adding them.
- * @param clock Flag indicating whether to record the timing or not, needed
- *      for recursive calls.
- */
-void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags,
-                          int cleanup, int clock) {
-
-  struct sort_entry *fingers[8];
-  const int count = c->hydro.count;
-  const struct part *parts = c->hydro.parts;
-  struct xpart *xparts = c->hydro.xparts;
-  float buff[8];
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->hydro.super == NULL) error("Task called above the super level!!!");
-#endif
-
-  /* We need to do the local sorts plus whatever was requested further up. */
-  flags |= c->hydro.do_sort;
-  if (cleanup) {
-    c->hydro.sorted = 0;
-  } else {
-    flags &= ~c->hydro.sorted;
-  }
-  if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return;
-
-  /* Check that the particles have been moved to the current time */
-  if (flags && !cell_are_part_drifted(c, r->e))
-    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_hydro(c, c->hydro.sorted);
-
-  /* Make sure the sort flags are consistent (upard). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->hydro.sorted & ~c->hydro.sorted)
-      error("Inconsistent sort flags (upward).");
-  }
-
-  /* Update the sort timer which represents the last time the sorts
-     were re-set. */
-  if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current;
-#endif
-
-  /* Allocate memory for sorting. */
-  cell_malloc_hydro_sorts(c, flags);
-
-  /* Does this cell have any progeny? */
-  if (c->split) {
-
-    /* Fill in the gaps within the progeny. */
-    float dx_max_sort = 0.0f;
-    float dx_max_sort_old = 0.0f;
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-
-        if (c->progeny[k]->hydro.count > 0) {
-
-          /* Only propagate cleanup if the progeny is stale. */
-          runner_do_hydro_sort(
-              r, c->progeny[k], flags,
-              cleanup && (c->progeny[k]->hydro.dx_max_sort_old >
-                          space_maxreldx * c->progeny[k]->dmin),
-              0);
-          dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort);
-          dx_max_sort_old =
-              max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old);
-        } else {
-
-          /* We need to clean up the unused flags that were in case the
-             number of particles in the cell would change */
-          cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
-        }
-      }
-    }
-    c->hydro.dx_max_sort = dx_max_sort;
-    c->hydro.dx_max_sort_old = dx_max_sort_old;
-
-    /* Loop over the 13 different sort arrays. */
-    for (int j = 0; j < 13; j++) {
-
-      /* Has this sort array been flagged? */
-      if (!(flags & (1 << j))) continue;
-
-      /* Init the particle index offsets. */
-      int off[8];
-      off[0] = 0;
-      for (int k = 1; k < 8; k++)
-        if (c->progeny[k - 1] != NULL)
-          off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count;
-        else
-          off[k] = off[k - 1];
-
-      /* Init the entries and indices. */
-      int inds[8];
-      for (int k = 0; k < 8; k++) {
-        inds[k] = k;
-        if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
-          fingers[k] = c->progeny[k]->hydro.sort[j];
-          buff[k] = fingers[k]->d;
-          off[k] = off[k];
-        } else
-          buff[k] = FLT_MAX;
-      }
-
-      /* Sort the buffer. */
-      for (int i = 0; i < 7; i++)
-        for (int k = i + 1; k < 8; k++)
-          if (buff[inds[k]] < buff[inds[i]]) {
-            int temp_i = inds[i];
-            inds[i] = inds[k];
-            inds[k] = temp_i;
-          }
-
-      /* For each entry in the new sort list. */
-      struct sort_entry *finger = c->hydro.sort[j];
-      for (int ind = 0; ind < count; ind++) {
-
-        /* Copy the minimum into the new sort array. */
-        finger[ind].d = buff[inds[0]];
-        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
-
-        /* Update the buffer. */
-        fingers[inds[0]] += 1;
-        buff[inds[0]] = fingers[inds[0]]->d;
-
-        /* Find the smallest entry. */
-        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
-          int temp_i = inds[k - 1];
-          inds[k - 1] = inds[k];
-          inds[k] = temp_i;
-        }
-
-      } /* Merge. */
-
-      /* Add a sentinel. */
-      c->hydro.sort[j][count].d = FLT_MAX;
-      c->hydro.sort[j][count].i = 0;
-
-      /* Mark as sorted. */
-      atomic_or(&c->hydro.sorted, 1 << j);
-
-    } /* loop over sort arrays. */
-
-  } /* progeny? */
-
-  /* Otherwise, just sort. */
-  else {
-
-    /* Reset the sort distance */
-    if (c->hydro.sorted == 0) {
-#ifdef SWIFT_DEBUG_CHECKS
-      if (xparts != NULL && c->nodeID != engine_rank)
-        error("Have non-NULL xparts in foreign cell");
-#endif
-
-      /* And the individual sort distances if we are a local cell */
-      if (xparts != NULL) {
-        for (int k = 0; k < count; k++) {
-          xparts[k].x_diff_sort[0] = 0.0f;
-          xparts[k].x_diff_sort[1] = 0.0f;
-          xparts[k].x_diff_sort[2] = 0.0f;
-        }
-      }
-      c->hydro.dx_max_sort_old = 0.f;
-      c->hydro.dx_max_sort = 0.f;
-    }
-
-    /* Fill the sort array. */
-    for (int k = 0; k < count; k++) {
-      const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]};
-      for (int j = 0; j < 13; j++)
-        if (flags & (1 << j)) {
-          c->hydro.sort[j][k].i = k;
-          c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] +
-                                  px[1] * runner_shift[j][1] +
-                                  px[2] * runner_shift[j][2];
-        }
-    }
-
-    /* Add the sentinel and sort. */
-    for (int j = 0; j < 13; j++)
-      if (flags & (1 << j)) {
-        c->hydro.sort[j][count].d = FLT_MAX;
-        c->hydro.sort[j][count].i = 0;
-        runner_do_sort_ascending(c->hydro.sort[j], count);
-        atomic_or(&c->hydro.sorted, 1 << j);
-      }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify the sorting. */
-  for (int j = 0; j < 13; j++) {
-    if (!(flags & (1 << j))) continue;
-    struct sort_entry *finger = c->hydro.sort[j];
-    for (int k = 1; k < count; k++) {
-      if (finger[k].d < finger[k - 1].d)
-        error("Sorting failed, ascending array.");
-      if (finger[k].i >= count) error("Sorting failed, indices borked.");
-    }
-  }
-
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_hydro(c, flags);
-
-  /* Make sure the sort flags are consistent (upward). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->hydro.sorted & ~c->hydro.sorted)
-      error("Inconsistent sort flags.");
-  }
-#endif
-
-  /* Clear the cell's sort flags. */
-  c->hydro.do_sort = 0;
-  cell_clear_flag(c, cell_flag_do_hydro_sub_sort);
-  c->hydro.requires_sorts = 0;
-
-  if (clock) TIMER_TOC(timer_dosort);
-}
-
-/**
- * @brief Sort the stars particles in the given cell along all cardinal
- * directions.
- *
- * @param r The #runner.
- * @param c The #cell.
- * @param flags Cell flag.
- * @param cleanup If true, re-build the sorts for the selected flags instead
- *        of just adding them.
- * @param clock Flag indicating whether to record the timing or not, needed
- *      for recursive calls.
- */
-void runner_do_stars_sort(struct runner *r, struct cell *c, int flags,
-                          int cleanup, int clock) {
-
-  struct sort_entry *fingers[8];
-  const int count = c->stars.count;
-  struct spart *sparts = c->stars.parts;
-  float buff[8];
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->hydro.super == NULL) error("Task called above the super level!!!");
-#endif
-
-  /* We need to do the local sorts plus whatever was requested further up. */
-  flags |= c->stars.do_sort;
-  if (cleanup) {
-    c->stars.sorted = 0;
-  } else {
-    flags &= ~c->stars.sorted;
-  }
-  if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return;
-
-  /* Check that the particles have been moved to the current time */
-  if (flags && !cell_are_spart_drifted(c, r->e)) {
-    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_stars(c, c->stars.sorted);
-
-  /* Make sure the sort flags are consistent (upward). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->stars.sorted & ~c->stars.sorted)
-      error("Inconsistent sort flags (upward).");
-  }
-
-  /* Update the sort timer which represents the last time the sorts
-     were re-set. */
-  if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current;
-#endif
-
-  /* start by allocating the entry arrays in the requested dimensions. */
-  cell_malloc_stars_sorts(c, flags);
-
-  /* Does this cell have any progeny? */
-  if (c->split) {
-
-    /* Fill in the gaps within the progeny. */
-    float dx_max_sort = 0.0f;
-    float dx_max_sort_old = 0.0f;
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-
-        if (c->progeny[k]->stars.count > 0) {
-
-          /* Only propagate cleanup if the progeny is stale. */
-          const int cleanup_prog =
-              cleanup && (c->progeny[k]->stars.dx_max_sort_old >
-                          space_maxreldx * c->progeny[k]->dmin);
-          runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0);
-          dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort);
-          dx_max_sort_old =
-              max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old);
-        } else {
-
-          /* We need to clean up the unused flags that were in case the
-             number of particles in the cell would change */
-          cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
-        }
-      }
-    }
-    c->stars.dx_max_sort = dx_max_sort;
-    c->stars.dx_max_sort_old = dx_max_sort_old;
-
-    /* Loop over the 13 different sort arrays. */
-    for (int j = 0; j < 13; j++) {
-
-      /* Has this sort array been flagged? */
-      if (!(flags & (1 << j))) continue;
-
-      /* Init the particle index offsets. */
-      int off[8];
-      off[0] = 0;
-      for (int k = 1; k < 8; k++)
-        if (c->progeny[k - 1] != NULL)
-          off[k] = off[k - 1] + c->progeny[k - 1]->stars.count;
-        else
-          off[k] = off[k - 1];
-
-      /* Init the entries and indices. */
-      int inds[8];
-      for (int k = 0; k < 8; k++) {
-        inds[k] = k;
-        if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
-          fingers[k] = c->progeny[k]->stars.sort[j];
-          buff[k] = fingers[k]->d;
-          off[k] = off[k];
-        } else
-          buff[k] = FLT_MAX;
-      }
-
-      /* Sort the buffer. */
-      for (int i = 0; i < 7; i++)
-        for (int k = i + 1; k < 8; k++)
-          if (buff[inds[k]] < buff[inds[i]]) {
-            int temp_i = inds[i];
-            inds[i] = inds[k];
-            inds[k] = temp_i;
-          }
-
-      /* For each entry in the new sort list. */
-      struct sort_entry *finger = c->stars.sort[j];
-      for (int ind = 0; ind < count; ind++) {
-
-        /* Copy the minimum into the new sort array. */
-        finger[ind].d = buff[inds[0]];
-        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
-
-        /* Update the buffer. */
-        fingers[inds[0]] += 1;
-        buff[inds[0]] = fingers[inds[0]]->d;
-
-        /* Find the smallest entry. */
-        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
-          int temp_i = inds[k - 1];
-          inds[k - 1] = inds[k];
-          inds[k] = temp_i;
-        }
-
-      } /* Merge. */
-
-      /* Add a sentinel. */
-      c->stars.sort[j][count].d = FLT_MAX;
-      c->stars.sort[j][count].i = 0;
-
-      /* Mark as sorted. */
-      atomic_or(&c->stars.sorted, 1 << j);
-
-    } /* loop over sort arrays. */
-
-  } /* progeny? */
-
-  /* Otherwise, just sort. */
-  else {
-
-    /* Reset the sort distance */
-    if (c->stars.sorted == 0) {
-
-      /* And the individual sort distances if we are a local cell */
-      for (int k = 0; k < count; k++) {
-        sparts[k].x_diff_sort[0] = 0.0f;
-        sparts[k].x_diff_sort[1] = 0.0f;
-        sparts[k].x_diff_sort[2] = 0.0f;
-      }
-      c->stars.dx_max_sort_old = 0.f;
-      c->stars.dx_max_sort = 0.f;
-    }
-
-    /* Fill the sort array. */
-    for (int k = 0; k < count; k++) {
-      const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]};
-      for (int j = 0; j < 13; j++)
-        if (flags & (1 << j)) {
-          c->stars.sort[j][k].i = k;
-          c->stars.sort[j][k].d = px[0] * runner_shift[j][0] +
-                                  px[1] * runner_shift[j][1] +
-                                  px[2] * runner_shift[j][2];
-        }
-    }
-
-    /* Add the sentinel and sort. */
-    for (int j = 0; j < 13; j++)
-      if (flags & (1 << j)) {
-        c->stars.sort[j][count].d = FLT_MAX;
-        c->stars.sort[j][count].i = 0;
-        runner_do_sort_ascending(c->stars.sort[j], count);
-        atomic_or(&c->stars.sorted, 1 << j);
-      }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Verify the sorting. */
-  for (int j = 0; j < 13; j++) {
-    if (!(flags & (1 << j))) continue;
-    struct sort_entry *finger = c->stars.sort[j];
-    for (int k = 1; k < count; k++) {
-      if (finger[k].d < finger[k - 1].d)
-        error("Sorting failed, ascending array.");
-      if (finger[k].i >= count) error("Sorting failed, indices borked.");
-    }
-  }
-
-  /* Make sure the sort flags are consistent (downward). */
-  runner_check_sorts_stars(c, flags);
-
-  /* Make sure the sort flags are consistent (upward). */
-  for (struct cell *finger = c->parent; finger != NULL;
-       finger = finger->parent) {
-    if (finger->stars.sorted & ~c->stars.sorted)
-      error("Inconsistent sort flags.");
-  }
-#endif
-
-  /* Clear the cell's sort flags. */
-  c->stars.do_sort = 0;
-  cell_clear_flag(c, cell_flag_do_stars_sub_sort);
-  c->stars.requires_sorts = 0;
-
-  if (clock) TIMER_TOC(timer_do_stars_sort);
-}
-
-/**
- * @brief Recurse into a cell until reaching the super level and call
- * the hydro sorting function there.
- *
- * This function must be called at or above the super level!
- *
- * This function will sort the particles in all 13 directions.
- *
- * @param r the #runner.
- * @param c the #cell.
- */
-void runner_do_all_hydro_sort(struct runner *r, struct cell *c) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
-#endif
-
-  if (!cell_is_active_hydro(c, r->e)) return;
-
-  /* Shall we sort at this level? */
-  if (c->hydro.super == c) {
-
-    /* Sort everything */
-    runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
-
-  } else {
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (c->hydro.super != NULL) error("Function called below the super level!");
-#endif
-
-    /* Ok, then, let's try lower */
-    if (c->split) {
-      for (int k = 0; k < 8; ++k) {
-        if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]);
-      }
-    } else {
-#ifdef SWIFT_DEBUG_CHECKS
-      error("Reached a leaf without encountering a hydro super cell!");
-#endif
-    }
-  }
-}
-
-/**
- * @brief Recurse into a cell until reaching the super level and call
- * the star sorting function there.
- *
- * This function must be called at or above the super level!
- *
- * This function will sort the particles in all 13 directions.
- *
- * @param r the #runner.
- * @param c the #cell.
- */
-void runner_do_all_stars_sort(struct runner *r, struct cell *c) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
-#endif
-
-  if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return;
-
-  /* Shall we sort at this level? */
-  if (c->hydro.super == c) {
-
-    /* Sort everything */
-    runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
-
-  } else {
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (c->hydro.super != NULL) error("Function called below the super level!");
-#endif
-
-    /* Ok, then, let's try lower */
-    if (c->split) {
-      for (int k = 0; k < 8; ++k) {
-        if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]);
-      }
-    } else {
-#ifdef SWIFT_DEBUG_CHECKS
-      error("Reached a leaf without encountering a hydro super cell!");
-#endif
-    }
-  }
-}
-
-/**
- * @brief Initialize the multipoles before the gravity calculation.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_init_grav(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!(e->policy & engine_policy_self_gravity))
-    error("Grav-init task called outside of self-gravity calculation");
-#endif
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Reset the gravity acceleration tensors */
-  gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current);
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_init_grav);
-}
-
-/**
- * @brief Intermediate task after the gradient loop that does final operations
- * on the gradient quantities and optionally slope limits the gradients
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) {
-
-#ifdef EXTRA_HYDRO_LOOP
-
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int count = c->hydro.count;
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const double time_base = e->time_base;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int i = 0; i < count; i++) {
-
-      /* Get a direct pointer on the part. */
-      struct part *restrict p = &parts[i];
-      struct xpart *restrict xp = &xparts[i];
-
-      if (part_is_active(p, e)) {
-
-        /* Finish the gradient calculation */
-        hydro_end_gradient(p);
-
-        /* As of here, particle force variables will be set. */
-
-        /* Calculate the time-step for passing to hydro_prepare_force.
-         * This is the physical time between the start and end of the time-step
-         * without any scale-factor powers. */
-        double dt_alpha;
-
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(p->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(ti_current - 1, p->time_bin);
-
-          dt_alpha =
-              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-        } else {
-          dt_alpha = get_timestep(p->time_bin, time_base);
-        }
-
-        /* Compute variables required for the force loop */
-        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
-
-        /* The particle force values are now set.  Do _NOT_
-           try to read any particle density variables! */
-
-        /* Prepare the particle for the force loop over neighbours */
-        hydro_reset_acceleration(p);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_extra_ghost);
-
-#else
-  error("SWIFT was not compiled with the extra hydro loop activated.");
-#endif
-}
-
-/**
- * @brief Intermediate task after the density to check that the smoothing
- * lengths are correct.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
-
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const struct engine *e = r->e;
-  const struct space *s = e->s;
-  const struct hydro_space *hs = &s->hs;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct chemistry_global_data *chemistry = e->chemistry;
-
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-
-  const float hydro_h_max = e->hydro_properties->h_max;
-  const float hydro_h_min = e->hydro_properties->h_min;
-  const float eps = e->hydro_properties->h_tolerance;
-  const float hydro_eta_dim =
-      pow_dimension(e->hydro_properties->eta_neighbours);
-  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
-  int redo = 0, count = 0;
-
-  /* Running value of the maximal smoothing length */
-  double h_max = c->hydro.h_max;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (c->hydro.count == 0) return;
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        runner_do_ghost(r, c->progeny[k], 0);
-
-        /* Update h_max */
-        h_max = max(h_max, c->progeny[k]->hydro.h_max);
-      }
-    }
-  } else {
-
-    /* Init the list of active particles that have to be updated and their
-     * current smoothing lengths. */
-    int *pid = NULL;
-    float *h_0 = NULL;
-    float *left = NULL;
-    float *right = NULL;
-    if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for pid.");
-    if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for h_0.");
-    if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for left.");
-    if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
-      error("Can't allocate memory for right.");
-    for (int k = 0; k < c->hydro.count; k++)
-      if (part_is_active(&parts[k], e)) {
-        pid[count] = k;
-        h_0[count] = parts[k].h;
-        left[count] = 0.f;
-        right[count] = hydro_h_max;
-        ++count;
-      }
-
-    /* While there are particles that need to be updated... */
-    for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter;
-         num_reruns++) {
-
-      /* Reset the redo-count. */
-      redo = 0;
-
-      /* Loop over the remaining active parts in this cell. */
-      for (int i = 0; i < count; i++) {
-
-        /* Get a direct pointer on the part. */
-        struct part *p = &parts[pid[i]];
-        struct xpart *xp = &xparts[pid[i]];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Is this part within the timestep? */
-        if (!part_is_active(p, e)) error("Ghost applied to inactive particle");
-#endif
-
-        /* Get some useful values */
-        const float h_init = h_0[i];
-        const float h_old = p->h;
-        const float h_old_dim = pow_dimension(h_old);
-        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
-
-        float h_new;
-        int has_no_neighbours = 0;
-
-        if (p->density.wcount == 0.f) { /* No neighbours case */
-
-          /* Flag that there were no neighbours */
-          has_no_neighbours = 1;
-
-          /* Double h and try again */
-          h_new = 2.f * h_old;
-
-        } else {
-
-          /* Finish the density calculation */
-          hydro_end_density(p, cosmo);
-          chemistry_end_density(p, chemistry, cosmo);
-          pressure_floor_end_density(p, cosmo);
-
-          /* Compute one step of the Newton-Raphson scheme */
-          const float n_sum = p->density.wcount * h_old_dim;
-          const float n_target = hydro_eta_dim;
-          const float f = n_sum - n_target;
-          const float f_prime =
-              p->density.wcount_dh * h_old_dim +
-              hydro_dimension * p->density.wcount * h_old_dim_minus_one;
-
-          /* Improve the bisection bounds */
-          if (n_sum < n_target)
-            left[i] = max(left[i], h_old);
-          else if (n_sum > n_target)
-            right[i] = min(right[i], h_old);
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Check the validity of the left and right bounds */
-          if (left[i] > right[i])
-            error("Invalid left (%e) and right (%e)", left[i], right[i]);
-#endif
-
-          /* Skip if h is already h_max and we don't have enough neighbours */
-          /* Same if we are below h_min */
-          if (((p->h >= hydro_h_max) && (f < 0.f)) ||
-              ((p->h <= hydro_h_min) && (f > 0.f))) {
-
-          /* We have a particle whose smoothing length is already set (wants
-           * to be larger but has already hit the maximum OR wants to be
-           * smaller but has already reached the minimum). So, just tidy up as
-           * if the smoothing length had converged correctly  */
-
-#ifdef EXTRA_HYDRO_LOOP
-
-            /* As of here, particle gradient variables will be set. */
-            /* The force variables are set in the extra ghost. */
-
-            /* Compute variables required for the gradient loop */
-            hydro_prepare_gradient(p, xp, cosmo);
-
-            /* The particle gradient values are now set.  Do _NOT_
-               try to read any particle density variables! */
-
-            /* Prepare the particle for the gradient loop over neighbours */
-            hydro_reset_gradient(p);
-
-#else
-            const struct hydro_props *hydro_props = e->hydro_properties;
-
-            /* Calculate the time-step for passing to hydro_prepare_force, used
-             * for the evolution of alpha factors (i.e. those involved in the
-             * artificial viscosity and thermal conduction terms) */
-            const double time_base = e->time_base;
-            const integertime_t ti_current = e->ti_current;
-            double dt_alpha;
-
-            if (with_cosmology) {
-              const integertime_t ti_step = get_integer_timestep(p->time_bin);
-              const integertime_t ti_begin =
-                  get_integer_time_begin(ti_current - 1, p->time_bin);
-
-              dt_alpha =
-                  cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-            } else {
-              dt_alpha = get_timestep(p->time_bin, time_base);
-            }
-
-            /* As of here, particle force variables will be set. */
-
-            /* Compute variables required for the force loop */
-            hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
-
-            /* The particle force values are now set.  Do _NOT_
-               try to read any particle density variables! */
-
-            /* Prepare the particle for the force loop over neighbours */
-            hydro_reset_acceleration(p);
-
-#endif /* EXTRA_HYDRO_LOOP */
-
-            /* Ok, we are done with this particle */
-            continue;
-          }
-
-          /* Normal case: Use Newton-Raphson to get a better value of h */
-
-          /* Avoid floating point exception from f_prime = 0 */
-          h_new = h_old - f / (f_prime + FLT_MIN);
-
-          /* Be verbose about the particles that struggle to converge */
-          if (num_reruns > max_smoothing_iter - 10) {
-
-            message(
-                "Smoothing length convergence problem: iter=%d p->id=%lld "
-                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
-                num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum,
-                n_target, left[i], right[i]);
-          }
-
-#ifdef SWIFT_DEBUG_CHECKS
-          if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old))
-            error(
-                "Smoothing length correction not going in the right direction");
-#endif
-
-          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
-          h_new = min(h_new, 2.f * h_old);
-          h_new = max(h_new, 0.5f * h_old);
-
-          /* Verify that we are actually progrssing towards the answer */
-          h_new = max(h_new, left[i]);
-          h_new = min(h_new, right[i]);
-        }
-
-        /* Check whether the particle has an inappropriate smoothing length */
-        if (fabsf(h_new - h_old) > eps * h_old) {
-
-          /* Ok, correct then */
-
-          /* Case where we have been oscillating around the solution */
-          if ((h_new == left[i] && h_old == right[i]) ||
-              (h_old == left[i] && h_new == right[i])) {
-
-            /* Bissect the remaining interval */
-            p->h = pow_inv_dimension(
-                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
-
-          } else {
-
-            /* Normal case */
-            p->h = h_new;
-          }
-
-          /* If within the allowed range, try again */
-          if (p->h < hydro_h_max && p->h > hydro_h_min) {
-
-            /* Flag for another round of fun */
-            pid[redo] = pid[i];
-            h_0[redo] = h_0[i];
-            left[redo] = left[i];
-            right[redo] = right[i];
-            redo += 1;
-
-            /* Re-initialise everything */
-            hydro_init_part(p, hs);
-            chemistry_init_part(p, chemistry);
-            pressure_floor_init_part(p, xp);
-            tracers_after_init(p, xp, e->internal_units, e->physical_constants,
-                               with_cosmology, e->cosmology,
-                               e->hydro_properties, e->cooling_func, e->time);
-
-            /* Off we go ! */
-            continue;
-
-          } else if (p->h <= hydro_h_min) {
-
-            /* Ok, this particle is a lost cause... */
-            p->h = hydro_h_min;
-
-          } else if (p->h >= hydro_h_max) {
-
-            /* Ok, this particle is a lost cause... */
-            p->h = hydro_h_max;
-
-            /* Do some damage control if no neighbours at all were found */
-            if (has_no_neighbours) {
-              hydro_part_has_no_neighbours(p, xp, cosmo);
-              chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo);
-              pressure_floor_part_has_no_neighbours(p, xp, cosmo);
-            }
-
-          } else {
-            error(
-                "Fundamental problem with the smoothing length iteration "
-                "logic.");
-          }
-        }
-
-        /* We now have a particle whose smoothing length has converged */
-
-        /* Check if h_max is increased */
-        h_max = max(h_max, p->h);
-
-#ifdef EXTRA_HYDRO_LOOP
-
-        /* As of here, particle gradient variables will be set. */
-        /* The force variables are set in the extra ghost. */
-
-        /* Compute variables required for the gradient loop */
-        hydro_prepare_gradient(p, xp, cosmo);
-
-        /* The particle gradient values are now set.  Do _NOT_
-           try to read any particle density variables! */
-
-        /* Prepare the particle for the gradient loop over neighbours */
-        hydro_reset_gradient(p);
-
-#else
-        const struct hydro_props *hydro_props = e->hydro_properties;
-
-        /* Calculate the time-step for passing to hydro_prepare_force, used for
-         * the evolution of alpha factors (i.e. those involved in the artificial
-         * viscosity and thermal conduction terms) */
-        const double time_base = e->time_base;
-        const integertime_t ti_current = e->ti_current;
-        double dt_alpha;
-
-        if (with_cosmology) {
-          const integertime_t ti_step = get_integer_timestep(p->time_bin);
-          const integertime_t ti_begin =
-              get_integer_time_begin(ti_current - 1, p->time_bin);
-
-          dt_alpha =
-              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
-        } else {
-          dt_alpha = get_timestep(p->time_bin, time_base);
-        }
-
-        /* As of here, particle force variables will be set. */
-
-        /* Compute variables required for the force loop */
-        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
-
-        /* The particle force values are now set.  Do _NOT_
-           try to read any particle density variables! */
-
-        /* Prepare the particle for the force loop over neighbours */
-        hydro_reset_acceleration(p);
-
-#endif /* EXTRA_HYDRO_LOOP */
-      }
-
-      /* We now need to treat the particles whose smoothing length had not
-       * converged again */
-
-      /* Re-set the counter for the next loop (potentially). */
-      count = redo;
-      if (count > 0) {
-
-        /* Climb up the cell hierarchy. */
-        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
-
-          /* Run through this cell's density interactions. */
-          for (struct link *l = finger->hydro.density; l != NULL; l = l->next) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-            if (l->t->ti_run < r->e->ti_current)
-              error("Density task should have been run.");
-#endif
-
-            /* Self-interaction? */
-            if (l->t->type == task_type_self)
-              runner_doself_subset_branch_density(r, finger, parts, pid, count);
-
-            /* Otherwise, pair interaction? */
-            else if (l->t->type == task_type_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dopair_subset_branch_density(r, finger, parts, pid,
-                                                    count, l->t->cj);
-              else
-                runner_dopair_subset_branch_density(r, finger, parts, pid,
-                                                    count, l->t->ci);
-            }
-
-            /* Otherwise, sub-self interaction? */
-            else if (l->t->type == task_type_sub_self)
-              runner_dosub_subset_density(r, finger, parts, pid, count, NULL,
-                                          1);
-
-            /* Otherwise, sub-pair interaction? */
-            else if (l->t->type == task_type_sub_pair) {
-
-              /* Left or right? */
-              if (l->t->ci == finger)
-                runner_dosub_subset_density(r, finger, parts, pid, count,
-                                            l->t->cj, 1);
-              else
-                runner_dosub_subset_density(r, finger, parts, pid, count,
-                                            l->t->ci, 1);
-            }
-          }
-        }
-      }
-    }
-
-    if (count) {
-      error("Smoothing length failed to converge on %i particles.", count);
-    }
-
-    /* Be clean */
-    free(left);
-    free(right);
-    free(pid);
-    free(h_0);
-  }
-
-  /* Update h_max */
-  c->hydro.h_max = h_max;
-
-  /* The ghost may not always be at the top level.
-   * Therefore we need to update h_max between the super- and top-levels */
-  if (c->hydro.ghost) {
-    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
-      atomic_max_d(&tmp->hydro.h_max, h_max);
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_do_ghost);
-}
-
-/**
- * @brief Unskip any hydro tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- */
-static void runner_do_unskip_hydro(struct cell *c, struct engine *e) {
-
-  /* Ignore empty cells. */
-  if (c->hydro.count == 0) return;
-
-  /* Skip inactive cells. */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_hydro(cp, e);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched);
-  if (forcerebuild) atomic_inc(&e->forcerebuild);
-}
-
-/**
- * @brief Unskip any stars tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- * @param with_star_formation Are we running with star formation switched on?
- */
-static void runner_do_unskip_stars(struct cell *c, struct engine *e,
-                                   const int with_star_formation) {
-
-  const int non_empty =
-      c->stars.count > 0 || (with_star_formation && c->hydro.count > 0);
-
-  /* Ignore empty cells. */
-  if (!non_empty) return;
-
-  const int ci_active = cell_is_active_stars(c, e) ||
-                        (with_star_formation && cell_is_active_hydro(c, e));
-
-  /* Skip inactive cells. */
-  if (!ci_active) return;
-
-  /* Recurse */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_stars(cp, e, with_star_formation);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  const int forcerebuild =
-      cell_unskip_stars_tasks(c, &e->sched, with_star_formation);
-  if (forcerebuild) atomic_inc(&e->forcerebuild);
-}
-
-/**
- * @brief Unskip any black hole tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- */
-static void runner_do_unskip_black_holes(struct cell *c, struct engine *e) {
-
-  /* Ignore empty cells. */
-  if (c->black_holes.count == 0) return;
-
-  /* Skip inactive cells. */
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  /* Recurse */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_black_holes(cp, e);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  const int forcerebuild = cell_unskip_black_holes_tasks(c, &e->sched);
-  if (forcerebuild) atomic_inc(&e->forcerebuild);
-}
-
-/**
- * @brief Unskip any gravity tasks associated with active cells.
- *
- * @param c The cell.
- * @param e The engine.
- */
-static void runner_do_unskip_gravity(struct cell *c, struct engine *e) {
-
-  /* Ignore empty cells. */
-  if (c->grav.count == 0) return;
-
-  /* Skip inactive cells. */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse */
-  if (c->split && ((c->maxdepth - c->depth) >= space_subdepth_diff_grav)) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
-        runner_do_unskip_gravity(cp, e);
-      }
-    }
-  }
-
-  /* Unskip any active tasks. */
-  cell_unskip_gravity_tasks(c, &e->sched);
-}
-
-/**
- * @brief Mapper function to unskip active tasks.
- *
- * @param map_data An array of #cell%s.
- * @param num_elements Chunk size.
- * @param extra_data Pointer to an #engine.
- */
-void runner_do_unskip_mapper(void *map_data, int num_elements,
-                             void *extra_data) {
-
-  struct engine *e = (struct engine *)extra_data;
-  const int with_star_formation = e->policy & engine_policy_star_formation;
-  const int nodeID = e->nodeID;
-  struct space *s = e->s;
-  int *local_cells = (int *)map_data;
-
-  for (int ind = 0; ind < num_elements; ind++) {
-    struct cell *c = &s->cells_top[local_cells[ind]];
-    if (c != NULL) {
-
-      /* Hydro tasks */
-      if (e->policy & engine_policy_hydro) runner_do_unskip_hydro(c, e);
-
-      /* All gravity tasks */
-      if ((e->policy & engine_policy_self_gravity) ||
-          ((e->policy & engine_policy_external_gravity) && c->nodeID == nodeID))
-        runner_do_unskip_gravity(c, e);
-
-      /* Stars tasks */
-      if (e->policy & engine_policy_stars)
-        runner_do_unskip_stars(c, e, with_star_formation);
-
-      /* Black hole tasks */
-      if (e->policy & engine_policy_black_holes)
-        runner_do_unskip_black_holes(c, e);
-    }
-  }
-}
-
-/**
- * @brief Drift all part in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_part(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_part(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_part);
-}
-
-/**
- * @brief Drift all gpart in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_gpart(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_gpart);
-}
-
-/**
- * @brief Drift all spart in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_spart(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_spart);
-}
-
-/**
- * @brief Drift all bpart in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) {
-
-  TIMER_TIC;
-
-  cell_drift_bpart(c, r->e, 0);
-
-  if (timer) TIMER_TOC(timer_drift_bpart);
-}
-
-/**
- * @brief Perform the first half-kick on all the active particles in a cell.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  struct gpart *restrict gparts = c->grav.parts;
-  struct spart *restrict sparts = c->stars.parts;
-  const int count = c->hydro.count;
-  const int gcount = c->grav.count;
-  const int scount = c->stars.count;
-  const integertime_t ti_current = e->ti_current;
-  const double time_base = e->time_base;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) &&
-      !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e))
-    return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs to be kicked */
-      if (part_is_starting(p, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (p->wakeup == time_bin_awake)
-          error("Woken-up particle that has not been processed in kick1");
-#endif
-
-        /* Skip particles that have been woken up and treated by the limiter. */
-        if (p->wakeup != time_bin_not_awake) continue;
-
-        const integertime_t ti_step = get_integer_timestep(p->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current + 1, p->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end = ti_begin + ti_step;
-
-        if (ti_begin != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
-        if (with_cosmology) {
-          dt_kick_hydro = cosmology_get_hydro_kick_factor(
-              cosmo, ti_begin, ti_begin + ti_step / 2);
-          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-          dt_kick_therm = cosmology_get_therm_kick_factor(
-              cosmo, ti_begin, ti_begin + ti_step / 2);
-          dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-        } else {
-          dt_kick_hydro = (ti_step / 2) * time_base;
-          dt_kick_grav = (ti_step / 2) * time_base;
-          dt_kick_therm = (ti_step / 2) * time_base;
-          dt_kick_corr = (ti_step / 2) * time_base;
-        }
-
-        /* do the kick */
-        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
-                  dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin,
-                  ti_begin + ti_step / 2);
-
-        /* Update the accelerations to be used in the drift for hydro */
-        if (p->gpart != NULL) {
-
-          xp->a_grav[0] = p->gpart->a_grav[0];
-          xp->a_grav[1] = p->gpart->a_grav[1];
-          xp->a_grav[2] = p->gpart->a_grav[2];
-        }
-      }
-    }
-
-    /* Loop over the gparts in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the part. */
-      struct gpart *restrict gp = &gparts[k];
-
-      /* If the g-particle has no counterpart and needs to be kicked */
-      if ((gp->type == swift_type_dark_matter ||
-           gp->type == swift_type_dark_matter_background) &&
-          gpart_is_starting(gp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current + 1, gp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current + 1, gp->time_bin);
-
-        if (ti_begin != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, gp->time_bin, ti_current);
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* do the kick */
-        kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
-      }
-    }
-
-    /* Loop over the stars particles in this cell. */
-    for (int k = 0; k < scount; k++) {
-
-      /* Get a handle on the s-part. */
-      struct spart *restrict sp = &sparts[k];
-
-      /* If particle needs to be kicked */
-      if (spart_is_starting(sp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current + 1, sp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current + 1, sp->time_bin);
-
-        if (ti_begin != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, sp->time_bin, ti_current);
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
-                                                        ti_begin + ti_step / 2);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* do the kick */
-        kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_kick1);
-}
-
-/**
- * @brief Perform the second half-kick on all the active particles in a cell.
- *
- * Also prepares particles to be drifted.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const struct cosmology *cosmo = e->cosmology;
-  const struct hydro_props *hydro_props = e->hydro_properties;
-  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const int count = c->hydro.count;
-  const int gcount = c->grav.count;
-  const int scount = c->stars.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  struct gpart *restrict gparts = c->grav.parts;
-  struct spart *restrict sparts = c->stars.parts;
-  const integertime_t ti_current = e->ti_current;
-  const double time_base = e->time_base;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
-      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e))
-    return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs to be kicked */
-      if (part_is_active(p, e)) {
-
-        integertime_t ti_begin, ti_end, ti_step;
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (p->wakeup == time_bin_awake)
-          error("Woken-up particle that has not been processed in kick1");
-#endif
-
-        if (p->wakeup == time_bin_not_awake) {
-
-          /* Time-step from a regular kick */
-          ti_step = get_integer_timestep(p->time_bin);
-          ti_begin = get_integer_time_begin(ti_current, p->time_bin);
-          ti_end = ti_begin + ti_step;
-
-        } else {
-
-          /* Time-step that follows a wake-up call */
-          ti_begin = get_integer_time_begin(ti_current, p->wakeup);
-          ti_end = get_integer_time_end(ti_current, p->time_bin);
-          ti_step = ti_end - ti_begin;
-
-          /* Reset the flag. Everything is back to normal from now on. */
-          p->wakeup = time_bin_awake;
-        }
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (ti_begin + ti_step != ti_current)
-          error(
-              "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
-              "time_bin=%d wakeup=%d ti_current=%lld",
-              ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
-#endif
-        /* Time interval for this half-kick */
-        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
-        if (with_cosmology) {
-          dt_kick_hydro = cosmology_get_hydro_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-          dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-          dt_kick_therm = cosmology_get_therm_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-          dt_kick_corr = cosmology_get_corr_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_end);
-        } else {
-          dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-          dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-          dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-          dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base;
-        }
-
-        /* Finish the time-step with a second half-kick */
-        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
-                  dt_kick_corr, cosmo, hydro_props, entropy_floor,
-                  ti_begin + ti_step / 2, ti_end);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that kick and the drift are synchronized */
-        if (p->ti_drift != p->ti_kick) error("Error integrating part in time.");
-#endif
-
-        /* Prepare the values to be drifted */
-        hydro_reset_predicted_values(p, xp, cosmo);
-      }
-    }
-
-    /* Loop over the g-particles in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the part. */
-      struct gpart *restrict gp = &gparts[k];
-
-      /* If the g-particle has no counterpart and needs to be kicked */
-      if ((gp->type == swift_type_dark_matter ||
-           gp->type == swift_type_dark_matter_background) &&
-          gpart_is_active(gp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current, gp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (ti_begin + ti_step != ti_current)
-          error("Particle in wrong time-bin");
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* Finish the time-step with a second half-kick */
-        kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2,
-                   ti_begin + ti_step);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that kick and the drift are synchronized */
-        if (gp->ti_drift != gp->ti_kick)
-          error("Error integrating g-part in time.");
-#endif
-
-        /* Prepare the values to be drifted */
-        gravity_reset_predicted_values(gp);
-      }
-    }
-
-    /* Loop over the particles in this cell. */
-    for (int k = 0; k < scount; k++) {
-
-      /* Get a handle on the part. */
-      struct spart *restrict sp = &sparts[k];
-
-      /* If particle needs to be kicked */
-      if (spart_is_active(sp, e)) {
-
-        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current, sp->time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (ti_begin + ti_step != ti_current)
-          error("Particle in wrong time-bin");
-#endif
-
-        /* Time interval for this half-kick */
-        double dt_kick_grav;
-        if (with_cosmology) {
-          dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
-        } else {
-          dt_kick_grav = (ti_step / 2) * time_base;
-        }
-
-        /* Finish the time-step with a second half-kick */
-        kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2,
-                   ti_begin + ti_step);
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that kick and the drift are synchronized */
-        if (sp->ti_drift != sp->ti_kick)
-          error("Error integrating s-part in time.");
-#endif
-
-        /* Prepare the values to be drifted */
-        stars_reset_predicted_values(sp);
-      }
-    }
-  }
-  if (timer) TIMER_TOC(timer_kick2);
-}
-
-/**
- * @brief Computes the next time-step of all active particles in this cell
- * and update the cell's statistics.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const int with_cosmology = (e->policy & engine_policy_cosmology);
-  const int count = c->hydro.count;
-  const int gcount = c->grav.count;
-  const int scount = c->stars.count;
-  const int bcount = c->black_holes.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  struct gpart *restrict gparts = c->grav.parts;
-  struct spart *restrict sparts = c->stars.parts;
-  struct bpart *restrict bparts = c->black_holes.parts;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
-      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) {
-    c->hydro.updated = 0;
-    c->grav.updated = 0;
-    c->stars.updated = 0;
-    c->black_holes.updated = 0;
-    return;
-  }
-
-  int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
-                ti_gravity_beg_max = 0;
-  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
-                ti_stars_beg_max = 0;
-  integertime_t ti_black_holes_end_min = max_nr_timesteps,
-                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
-
-  /* No children? */
-  if (!c->split) {
-
-    /* Loop over the particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs updating */
-      if (part_is_active(p, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Current end of time-step */
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current, p->time_bin);
-
-        if (ti_end != ti_current)
-          error("Computing time-step of rogue particle.");
-#endif
-
-        /* Get new time-step */
-        const integertime_t ti_new_step = get_part_timestep(p, xp, e);
-
-        /* Update particle */
-        p->time_bin = get_time_bin(ti_new_step);
-        if (p->gpart != NULL) p->gpart->time_bin = p->time_bin;
-
-        /* Update the tracers properties */
-        tracers_after_timestep(p, xp, e->internal_units, e->physical_constants,
-                               with_cosmology, e->cosmology,
-                               e->hydro_properties, e->cooling_func, e->time);
-
-        /* Number of updated particles */
-        updated++;
-        if (p->gpart != NULL) g_updated++;
-
-        /* What is the next sync-point ? */
-        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
-        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
-
-        if (p->gpart != NULL) {
-
-          /* What is the next sync-point ? */
-          ti_gravity_end_min =
-              min(ti_current + ti_new_step, ti_gravity_end_min);
-          ti_gravity_end_max =
-              max(ti_current + ti_new_step, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-        }
-      }
-
-      else { /* part is inactive */
-
-        if (!part_is_inhibited(p, e)) {
-
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, p->time_bin);
-
-          const integertime_t ti_beg =
-              get_integer_time_begin(ti_current + 1, p->time_bin);
-
-          /* What is the next sync-point ? */
-          ti_hydro_end_min = min(ti_end, ti_hydro_end_min);
-          ti_hydro_end_max = max(ti_end, ti_hydro_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max);
-
-          if (p->gpart != NULL) {
-
-            /* What is the next sync-point ? */
-            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-            /* What is the next starting point for this cell ? */
-            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-          }
-        }
-      }
-    }
-
-    /* Loop over the g-particles in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the part. */
-      struct gpart *restrict gp = &gparts[k];
-
-      /* If the g-particle has no counterpart */
-      if (gp->type == swift_type_dark_matter ||
-          gp->type == swift_type_dark_matter_background) {
-
-        /* need to be updated ? */
-        if (gpart_is_active(gp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-          /* Current end of time-step */
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, gp->time_bin);
-
-          if (ti_end != ti_current)
-            error("Computing time-step of rogue particle.");
-#endif
-
-          /* Get new time-step */
-          const integertime_t ti_new_step = get_gpart_timestep(gp, e);
-
-          /* Update particle */
-          gp->time_bin = get_time_bin(ti_new_step);
-
-          /* Number of updated g-particles */
-          g_updated++;
-
-          /* What is the next sync-point ? */
-          ti_gravity_end_min =
-              min(ti_current + ti_new_step, ti_gravity_end_min);
-          ti_gravity_end_max =
-              max(ti_current + ti_new_step, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-
-        } else { /* gpart is inactive */
-
-          if (!gpart_is_inhibited(gp, e)) {
-
-            const integertime_t ti_end =
-                get_integer_time_end(ti_current, gp->time_bin);
-
-            /* What is the next sync-point ? */
-            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-            const integertime_t ti_beg =
-                get_integer_time_begin(ti_current + 1, gp->time_bin);
-
-            /* What is the next starting point for this cell ? */
-            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-          }
-        }
-      }
-    }
-
-    /* Loop over the star particles in this cell. */
-    for (int k = 0; k < scount; k++) {
-
-      /* Get a handle on the part. */
-      struct spart *restrict sp = &sparts[k];
-
-      /* need to be updated ? */
-      if (spart_is_active(sp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Current end of time-step */
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current, sp->time_bin);
-
-        if (ti_end != ti_current)
-          error("Computing time-step of rogue particle.");
-#endif
-        /* Get new time-step */
-        const integertime_t ti_new_step = get_spart_timestep(sp, e);
-
-        /* Update particle */
-        sp->time_bin = get_time_bin(ti_new_step);
-        sp->gpart->time_bin = get_time_bin(ti_new_step);
-
-        /* Number of updated s-particles */
-        s_updated++;
-        g_updated++;
-
-        ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min);
-        ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max);
-        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
-        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_stars_beg_max = max(ti_current, ti_stars_beg_max);
-        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-
-        /* star particle is inactive but not inhibited */
-      } else {
-
-        if (!spart_is_inhibited(sp, e)) {
-
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, sp->time_bin);
-
-          const integertime_t ti_beg =
-              get_integer_time_begin(ti_current + 1, sp->time_bin);
-
-          ti_stars_end_min = min(ti_end, ti_stars_end_min);
-          ti_stars_end_max = max(ti_end, ti_stars_end_max);
-          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_stars_beg_max = max(ti_beg, ti_stars_beg_max);
-          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-        }
-      }
-    }
-
-    /* Loop over the star particles in this cell. */
-    for (int k = 0; k < bcount; k++) {
-
-      /* Get a handle on the part. */
-      struct bpart *restrict bp = &bparts[k];
-
-      /* need to be updated ? */
-      if (bpart_is_active(bp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Current end of time-step */
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current, bp->time_bin);
-
-        if (ti_end != ti_current)
-          error("Computing time-step of rogue particle.");
-#endif
-        /* Get new time-step */
-        const integertime_t ti_new_step = get_bpart_timestep(bp, e);
-
-        /* Update particle */
-        bp->time_bin = get_time_bin(ti_new_step);
-        bp->gpart->time_bin = get_time_bin(ti_new_step);
-
-        /* Number of updated s-particles */
-        b_updated++;
-        g_updated++;
-
-        ti_black_holes_end_min =
-            min(ti_current + ti_new_step, ti_black_holes_end_min);
-        ti_black_holes_end_max =
-            max(ti_current + ti_new_step, ti_black_holes_end_max);
-        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
-        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max);
-        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-
-        /* star particle is inactive but not inhibited */
-      } else {
-
-        if (!bpart_is_inhibited(bp, e)) {
-
-          const integertime_t ti_end =
-              get_integer_time_end(ti_current, bp->time_bin);
-
-          const integertime_t ti_beg =
-              get_integer_time_begin(ti_current + 1, bp->time_bin);
-
-          ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min);
-          ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max);
-          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
-          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max);
-          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
-        }
-      }
-    }
-
-  } else {
-
-    /* Loop over the progeny. */
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        /* Recurse */
-        runner_do_timestep(r, cp, 0);
-
-        /* And aggregate */
-        updated += cp->hydro.updated;
-        g_updated += cp->grav.updated;
-        s_updated += cp->stars.updated;
-        b_updated += cp->black_holes.updated;
-
-        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
-        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
-        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
-
-        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
-        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
-        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
-
-        ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min);
-        ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max);
-        ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max);
-
-        ti_black_holes_end_min =
-            min(cp->black_holes.ti_end_min, ti_black_holes_end_min);
-        ti_black_holes_end_max =
-            max(cp->grav.ti_end_max, ti_black_holes_end_max);
-        ti_black_holes_beg_max =
-            max(cp->grav.ti_beg_max, ti_black_holes_beg_max);
-      }
-    }
-  }
-
-  /* Store the values. */
-  c->hydro.updated = updated;
-  c->grav.updated = g_updated;
-  c->stars.updated = s_updated;
-  c->black_holes.updated = b_updated;
-
-  c->hydro.ti_end_min = ti_hydro_end_min;
-  c->hydro.ti_end_max = ti_hydro_end_max;
-  c->hydro.ti_beg_max = ti_hydro_beg_max;
-  c->grav.ti_end_min = ti_gravity_end_min;
-  c->grav.ti_end_max = ti_gravity_end_max;
-  c->grav.ti_beg_max = ti_gravity_beg_max;
-  c->stars.ti_end_min = ti_stars_end_min;
-  c->stars.ti_end_max = ti_stars_end_max;
-  c->stars.ti_beg_max = ti_stars_beg_max;
-  c->black_holes.ti_end_min = ti_black_holes_end_min;
-  c->black_holes.ti_end_max = ti_black_holes_end_max;
-  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->hydro.ti_end_min == e->ti_current &&
-      c->hydro.ti_end_min < max_nr_timesteps)
-    error("End of next hydro step is current time!");
-  if (c->grav.ti_end_min == e->ti_current &&
-      c->grav.ti_end_min < max_nr_timesteps)
-    error("End of next gravity step is current time!");
-  if (c->stars.ti_end_min == e->ti_current &&
-      c->stars.ti_end_min < max_nr_timesteps)
-    error("End of next stars step is current time!");
-  if (c->black_holes.ti_end_min == e->ti_current &&
-      c->black_holes.ti_end_min < max_nr_timesteps)
-    error("End of next black holes step is current time!");
-#endif
-
-  if (timer) TIMER_TOC(timer_timestep);
-}
-
-/**
- * @brief Apply the time-step limiter to all awaken particles in a cell
- * hierarchy.
- *
- * @param r The task #runner.
- * @param c The #cell.
- * @param force Limit the particles irrespective of the #cell flags.
- * @param timer Are we timing this ?
- */
-void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) {
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const int count = c->hydro.count;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Check that we only limit local cells. */
-  if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope.");
-#endif
-
-  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
-                ti_hydro_beg_max = 0;
-  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
-                ti_gravity_beg_max = 0;
-
-  /* Limit irrespective of cell flags? */
-  force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter));
-
-  /* Early abort? */
-  if (c->hydro.count == 0) {
-
-    /* Clear the limiter flags. */
-    cell_clear_flag(
-        c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
-    return;
-  }
-
-  /* Loop over the progeny ? */
-  if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        /* Recurse */
-        runner_do_limiter(r, cp, force, 0);
-
-        /* And aggregate */
-        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
-        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
-        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
-        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
-        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
-        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
-      }
-    }
-
-    /* Store the updated values */
-    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
-    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
-    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
-    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
-    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
-    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
-
-  } else if (!c->split && force) {
-
-    ti_hydro_end_min = c->hydro.ti_end_min;
-    ti_hydro_end_max = c->hydro.ti_end_max;
-    ti_hydro_beg_max = c->hydro.ti_beg_max;
-    ti_gravity_end_min = c->grav.ti_end_min;
-    ti_gravity_end_max = c->grav.ti_end_max;
-    ti_gravity_beg_max = c->grav.ti_beg_max;
-
-    /* Loop over the gas particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* Avoid inhibited particles */
-      if (part_is_inhibited(p, e)) continue;
-
-      /* If the particle will be active no need to wake it up */
-      if (part_is_active(p, e) && p->wakeup != time_bin_not_awake)
-        p->wakeup = time_bin_not_awake;
-
-      /* Bip, bip, bip... wake-up time */
-      if (p->wakeup <= time_bin_awake) {
-
-        /* Apply the limiter and get the new time-step size */
-        const integertime_t ti_new_step = timestep_limit_part(p, xp, e);
-
-        /* What is the next sync-point ? */
-        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
-        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
-
-        /* What is the next starting point for this cell ? */
-        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
-
-        /* Also limit the gpart counter-part */
-        if (p->gpart != NULL) {
-
-          /* Register the time-bin */
-          p->gpart->time_bin = p->time_bin;
-
-          /* What is the next sync-point ? */
-          ti_gravity_end_min =
-              min(ti_current + ti_new_step, ti_gravity_end_min);
-          ti_gravity_end_max =
-              max(ti_current + ti_new_step, ti_gravity_end_max);
-
-          /* What is the next starting point for this cell ? */
-          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
-        }
-      }
-    }
-
-    /* Store the updated values */
-    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
-    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
-    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
-    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
-    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
-    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
-  }
-
-  /* Clear the limiter flags. */
-  cell_clear_flag(c,
-                  cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
-
-  if (timer) TIMER_TOC(timer_do_limiter);
-}
-
-/**
- * @brief End the hydro force calculation of all active particles in a cell
- * by multiplying the acccelerations by the relevant constants
- *
- * @param r The #runner thread.
- * @param c The #cell.
- * @param timer Are we timing this ?
- */
-void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0);
-  } else {
-
-    const struct cosmology *cosmo = e->cosmology;
-    const int count = c->hydro.count;
-    struct part *restrict parts = c->hydro.parts;
-
-    /* Loop over the gas particles in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-
-      if (part_is_active(p, e)) {
-
-        /* Finish the force loop */
-        hydro_end_force(p, cosmo);
-        chemistry_end_force(p, cosmo);
-
-#ifdef SWIFT_BOUNDARY_PARTICLES
-
-        /* Get the ID of the part */
-        const long long id = p->id;
-
-        /* Cancel hdyro forces of these particles */
-        if (id < SWIFT_BOUNDARY_PARTICLES) {
-
-          /* Don't move ! */
-          hydro_reset_acceleration(p);
-
-#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH)
-
-          /* Some values need to be reset in the Gizmo case. */
-          hydro_prepare_force(p, &c->hydro.xparts[k], cosmo,
-                              e->hydro_properties, 0);
-#endif
-        }
-#endif
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_end_hydro_force);
-}
-
-/**
- * @brief End the gravity force calculation of all active particles in a cell
- * by multiplying the acccelerations by the relevant constants
- *
- * @param r The #runner thread.
- * @param c The #cell.
- * @param timer Are we timing this ?
- */
-void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) {
-
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0);
-  } else {
-
-    const struct space *s = e->s;
-    const int periodic = s->periodic;
-    const float G_newton = e->physical_constants->const_newton_G;
-
-    /* Potential normalisation in the case of periodic gravity */
-    float potential_normalisation = 0.;
-    if (periodic && (e->policy & engine_policy_self_gravity)) {
-      const double volume = s->dim[0] * s->dim[1] * s->dim[2];
-      const double r_s = e->mesh->r_s;
-      potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume;
-    }
-
-    const int gcount = c->grav.count;
-    struct gpart *restrict gparts = c->grav.parts;
-
-    /* Loop over the g-particles in this cell. */
-    for (int k = 0; k < gcount; k++) {
-
-      /* Get a handle on the gpart. */
-      struct gpart *restrict gp = &gparts[k];
-
-      if (gpart_is_active(gp, e)) {
-
-        /* Finish the force calculation */
-        gravity_end_force(gp, G_newton, potential_normalisation, periodic);
-
-#ifdef SWIFT_MAKE_GRAVITY_GLASS
-
-        /* Negate the gravity forces */
-        gp->a_grav[0] *= -1.f;
-        gp->a_grav[1] *= -1.f;
-        gp->a_grav[2] *= -1.f;
-#endif
-
-#ifdef SWIFT_NO_GRAVITY_BELOW_ID
-
-        /* Get the ID of the gpart */
-        long long id = 0;
-        if (gp->type == swift_type_gas)
-          id = e->s->parts[-gp->id_or_neg_offset].id;
-        else if (gp->type == swift_type_stars)
-          id = e->s->sparts[-gp->id_or_neg_offset].id;
-        else if (gp->type == swift_type_black_hole)
-          error("Unexisting type");
-        else
-          id = gp->id_or_neg_offset;
-
-        /* Cancel gravity forces of these particles */
-        if (id < SWIFT_NO_GRAVITY_BELOW_ID) {
-
-          /* Don't move ! */
-          gp->a_grav[0] = 0.f;
-          gp->a_grav[1] = 0.f;
-          gp->a_grav[2] = 0.f;
-        }
-#endif
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if ((e->policy & engine_policy_self_gravity) &&
-            !(e->policy & engine_policy_black_holes)) {
-
-          /* Let's add a self interaction to simplify the count */
-          gp->num_interacted++;
-
-          /* Check that this gpart has interacted with all the other
-           * particles (via direct or multipoles) in the box */
-          if (gp->num_interacted !=
-              e->total_nr_gparts - e->count_inhibited_gparts) {
-
-            /* Get the ID of the gpart */
-            long long my_id = 0;
-            if (gp->type == swift_type_gas)
-              my_id = e->s->parts[-gp->id_or_neg_offset].id;
-            else if (gp->type == swift_type_stars)
-              my_id = e->s->sparts[-gp->id_or_neg_offset].id;
-            else if (gp->type == swift_type_black_hole)
-              error("Unexisting type");
-            else
-              my_id = gp->id_or_neg_offset;
-
-            error(
-                "g-particle (id=%lld, type=%s) did not interact "
-                "gravitationally with all other gparts "
-                "gp->num_interacted=%lld, total_gparts=%lld (local "
-                "num_gparts=%zd inhibited_gparts=%lld)",
-                my_id, part_type_names[gp->type], gp->num_interacted,
-                e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts);
-          }
-        }
-#endif
-      }
-    }
-  }
-  if (timer) TIMER_TOC(timer_end_grav_force);
-}
-
-/**
- * @brief Process all the gas particles in a cell that have been flagged for
- * swallowing by a black hole.
- *
- * This is done by recursing down to the leaf-level and skipping the sub-cells
- * that have not been drifted as they would not have any particles with
- * swallowing flag. We then loop over the particles with a flag and look into
- * the space-wide list of black holes for the particle with the corresponding
- * ID. If found, the BH swallows the gas particle and the gas particle is
- * removed. If the cell is local, we may be looking for a foreign BH, in which
- * case, we do not update the BH (that will be done on its node) but just remove
- * the gas particle.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) {
-
-  struct engine *e = r->e;
-  struct space *s = e->s;
-  struct bpart *bparts = s->bparts;
-  const size_t nr_bpart = s->nr_bparts;
-#ifdef WITH_MPI
-  struct bpart *bparts_foreign = s->bparts_foreign;
-  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
-#endif
-
-  struct part *parts = c->hydro.parts;
-  struct xpart *xparts = c->hydro.xparts;
-
-  /* Early abort?
-   * (We only want cells for which we drifted the gas as these are
-   * the only ones that could have gas particles that have been flagged
-   * for swallowing) */
-  if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) {
-    return;
-  }
-
-  /* Loop over the progeny ? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        runner_do_gas_swallow(r, cp, 0);
-      }
-    }
-  } else {
-
-    /* Loop over all the gas particles in the cell
-     * Note that the cell (and hence the parts) may be local or foreign. */
-    const size_t nr_parts = c->hydro.count;
-    for (size_t k = 0; k < nr_parts; k++) {
-
-      /* Get a handle on the part. */
-      struct part *const p = &parts[k];
-      struct xpart *const xp = &xparts[k];
-
-      /* Ignore inhibited particles (they have already been removed!) */
-      if (part_is_inhibited(p, e)) continue;
-
-      /* Get the ID of the black holes that will swallow this part */
-      const long long swallow_id =
-          black_holes_get_part_swallow_id(&p->black_holes_data);
-
-      /* Has this particle been flagged for swallowing? */
-      if (swallow_id >= 0) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (p->ti_drift != e->ti_current)
-          error("Trying to swallow an un-drifted particle.");
-#endif
-
-        /* ID of the BH swallowing this particle */
-        const long long BH_id = swallow_id;
-
-        /* Have we found this particle's BH already? */
-        int found = 0;
-
-        /* Let's look for the hungry black hole in the local list */
-        for (size_t i = 0; i < nr_bpart; ++i) {
-
-          /* Get a handle on the bpart. */
-          struct bpart *bp = &bparts[i];
-
-          if (bp->id == BH_id) {
-
-            /* Lock the space as we are going to work directly on the bpart list
-             */
-            lock_lock(&s->lock);
-
-            /* Swallow the gas particle (i.e. update the BH properties) */
-            black_holes_swallow_part(bp, p, xp, e->cosmology);
-
-            /* Release the space as we are done updating the bpart */
-            if (lock_unlock(&s->lock) != 0)
-              error("Failed to unlock the space.");
-
-            message("BH %lld swallowing gas particle %lld", bp->id, p->id);
-
-            /* If the gas particle is local, remove it */
-            if (c->nodeID == e->nodeID) {
-
-              message("BH %lld removing gas particle %lld", bp->id, p->id);
-
-              lock_lock(&e->s->lock);
-
-              /* Re-check that the particle has not been removed
-               * by another thread before we do the deed. */
-              if (!part_is_inhibited(p, e)) {
-
-                /* Finally, remove the gas particle from the system
-                 * Recall that the gpart associated with it is also removed
-                 * at the same time. */
-                cell_remove_part(e, c, p, xp);
-              }
-
-              if (lock_unlock(&e->s->lock) != 0)
-                error("Failed to unlock the space!");
-            }
-
-            /* In any case, prevent the particle from being re-swallowed */
-            black_holes_mark_part_as_swallowed(&p->black_holes_data);
-
-            found = 1;
-            break;
-          }
-
-        } /* Loop over local BHs */
-
-#ifdef WITH_MPI
-
-        /* We could also be in the case of a local gas particle being
-         * swallowed by a foreign BH. In this case, we won't update the
-         * BH but just remove the particle from the local list. */
-        if (c->nodeID == e->nodeID && !found) {
-
-          /* Let's look for the foreign hungry black hole */
-          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
-
-            /* Get a handle on the bpart. */
-            struct bpart *bp = &bparts_foreign[i];
-
-            if (bp->id == BH_id) {
-
-              message("BH %lld removing gas particle %lld (foreign BH case)",
-                      bp->id, p->id);
-
-              lock_lock(&e->s->lock);
-
-              /* Re-check that the particle has not been removed
-               * by another thread before we do the deed. */
-              if (!part_is_inhibited(p, e)) {
-
-                /* Finally, remove the gas particle from the system */
-                cell_remove_part(e, c, p, xp);
-              }
-
-              if (lock_unlock(&e->s->lock) != 0)
-                error("Failed to unlock the space!");
-
-              found = 1;
-              break;
-            }
-          } /* Loop over foreign BHs */
-        }   /* Is the cell local? */
-#endif
-
-        /* If we have a local particle, we must have found the BH in one
-         * of our list of black holes. */
-        if (c->nodeID == e->nodeID && !found) {
-          error("Gas particle %lld could not find BH %lld to be swallowed",
-                p->id, swallow_id);
-        }
-      } /* Part was flagged for swallowing */
-    }   /* Loop over the parts */
-  }     /* Cell is not split */
-}
-
-/**
- * @brief Processing of gas particles to swallow - self task case.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
-  if (!cell_is_active_black_holes(c, r->e))
-    error("Running self task on inactive cell");
-#endif
-
-  runner_do_gas_swallow(r, c, timer);
-}
-
-/**
- * @brief Processing of gas particles to swallow - pair task case.
- *
- * @param r The thread #runner.
- * @param ci First #cell.
- * @param cj Second #cell.
- * @param timer Are we timing this?
- */
-void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci,
-                                struct cell *cj, int timer) {
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
-    error("Running pair task on foreign node");
-#endif
-
-  /* Run the swallowing loop only in the cell that is the neighbour of the
-   * active BH */
-  if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer);
-  if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer);
-}
-
-/**
- * @brief Process all the BH particles in a cell that have been flagged for
- * swallowing by a black hole.
- *
- * This is done by recursing down to the leaf-level and skipping the sub-cells
- * that have not been drifted as they would not have any particles with
- * swallowing flag. We then loop over the particles with a flag and look into
- * the space-wide list of black holes for the particle with the corresponding
- * ID. If found, the BH swallows the BH particle and the BH particle is
- * removed. If the cell is local, we may be looking for a foreign BH, in which
- * case, we do not update the BH (that will be done on its node) but just remove
- * the BH particle.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) {
-
-  struct engine *e = r->e;
-  struct space *s = e->s;
-  struct bpart *bparts = s->bparts;
-  const size_t nr_bpart = s->nr_bparts;
-#ifdef WITH_MPI
-  struct bpart *bparts_foreign = s->bparts_foreign;
-  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
-#endif
-
-  struct bpart *cell_bparts = c->black_holes.parts;
-
-  /* Early abort?
-   * (We only want cells for which we drifted the BH as these are
-   * the only ones that could have BH particles that have been flagged
-   * for swallowing) */
-  if (c->black_holes.count == 0 ||
-      c->black_holes.ti_old_part != e->ti_current) {
-    return;
-  }
-
-  /* Loop over the progeny ? */
-  if (c->split) {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
-        struct cell *restrict cp = c->progeny[k];
-
-        runner_do_bh_swallow(r, cp, 0);
-      }
-    }
-  } else {
-
-    /* Loop over all the gas particles in the cell
-     * Note that the cell (and hence the bparts) may be local or foreign. */
-    const size_t nr_cell_bparts = c->black_holes.count;
-    for (size_t k = 0; k < nr_cell_bparts; k++) {
-
-      /* Get a handle on the part. */
-      struct bpart *const cell_bp = &cell_bparts[k];
-
-      /* Ignore inhibited particles (they have already been removed!) */
-      if (bpart_is_inhibited(cell_bp, e)) continue;
-
-      /* Get the ID of the black holes that will swallow this part */
-      const long long swallow_id =
-          black_holes_get_bpart_swallow_id(&cell_bp->merger_data);
-
-      /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */
-      /* 	      swallow_id); */
-
-      /* Has this particle been flagged for swallowing? */
-      if (swallow_id >= 0) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (cell_bp->ti_drift != e->ti_current)
-          error("Trying to swallow an un-drifted particle.");
-#endif
-
-        /* ID of the BH swallowing this particle */
-        const long long BH_id = swallow_id;
-
-        /* Have we found this particle's BH already? */
-        int found = 0;
-
-        /* Let's look for the hungry black hole in the local list */
-        for (size_t i = 0; i < nr_bpart; ++i) {
-
-          /* Get a handle on the bpart. */
-          struct bpart *bp = &bparts[i];
-
-          if (bp->id == BH_id) {
-
-            /* Lock the space as we are going to work directly on the bpart list
-             */
-            lock_lock(&s->lock);
-
-            /* Swallow the gas particle (i.e. update the BH properties) */
-            black_holes_swallow_bpart(bp, cell_bp, e->cosmology);
-
-            /* Release the space as we are done updating the bpart */
-            if (lock_unlock(&s->lock) != 0)
-              error("Failed to unlock the space.");
-
-            message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id);
-
-            /* If the gas particle is local, remove it */
-            if (c->nodeID == e->nodeID) {
-
-              message("BH %lld removing BH particle %lld", bp->id, cell_bp->id);
-
-              /* Finally, remove the gas particle from the system
-               * Recall that the gpart associated with it is also removed
-               * at the same time. */
-              cell_remove_bpart(e, c, cell_bp);
-            }
-
-            /* In any case, prevent the particle from being re-swallowed */
-            black_holes_mark_bpart_as_merged(&cell_bp->merger_data);
-
-            found = 1;
-            break;
-          }
-
-        } /* Loop over local BHs */
-
-#ifdef WITH_MPI
-
-        /* We could also be in the case of a local BH particle being
-         * swallowed by a foreign BH. In this case, we won't update the
-         * foreign BH but just remove the particle from the local list. */
-        if (c->nodeID == e->nodeID && !found) {
-
-          /* Let's look for the foreign hungry black hole */
-          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
-
-            /* Get a handle on the bpart. */
-            struct bpart *bp = &bparts_foreign[i];
-
-            if (bp->id == BH_id) {
-
-              message("BH %lld removing BH particle %lld (foreign BH case)",
-                      bp->id, cell_bp->id);
-
-              /* Finally, remove the gas particle from the system */
-              cell_remove_bpart(e, c, cell_bp);
-
-              found = 1;
-              break;
-            }
-          } /* Loop over foreign BHs */
-        }   /* Is the cell local? */
-#endif
-
-        /* If we have a local particle, we must have found the BH in one
-         * of our list of black holes. */
-        if (c->nodeID == e->nodeID && !found) {
-          error("BH particle %lld could not find BH %lld to be swallowed",
-                cell_bp->id, swallow_id);
-        }
-      } /* Part was flagged for swallowing */
-    }   /* Loop over the parts */
-  }     /* Cell is not split */
-}
-
-/**
- * @brief Processing of bh particles to swallow - self task case.
- *
- * @param r The thread #runner.
- * @param c The #cell.
- * @param timer Are we timing this?
- */
-void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
-  if (!cell_is_active_black_holes(c, r->e))
-    error("Running self task on inactive cell");
-#endif
-
-  runner_do_bh_swallow(r, c, timer);
-}
-
-/**
- * @brief Processing of bh particles to swallow - pair task case.
- *
- * @param r The thread #runner.
- * @param ci First #cell.
- * @param cj Second #cell.
- * @param timer Are we timing this?
- */
-void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci,
-                               struct cell *cj, int timer) {
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
-    error("Running pair task on foreign node");
-#endif
-
-  /* Run the swallowing loop only in the cell that is the neighbour of the
-   * active BH */
-  if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer);
-  if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer);
-}
-
-/**
- * @brief Construct the cell properties from the received #part.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
- * @param timer Are we timing this ?
- */
-void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
-                         int timer) {
-#ifdef WITH_MPI
-
-  const struct part *restrict parts = c->hydro.parts;
-  const size_t nr_parts = c->hydro.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_hydro_end_min = max_nr_timesteps;
-  integertime_t ti_hydro_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-  float h_max = 0.f;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* Clear this cell's sorted mask. */
-  if (clear_sorts) c->hydro.sorted = 0;
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_parts; k++) {
-      if (parts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, parts[k].time_bin);
-      time_bin_max = max(time_bin_max, parts[k].time_bin);
-      h_max = max(h_max, parts[k].h);
-    }
-
-    /* Convert into a time */
-    ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
-        runner_do_recv_part(r, c->progeny[k], clear_sorts, 0);
-        ti_hydro_end_min =
-            min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min);
-        ti_hydro_end_max =
-            max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max);
-        h_max = max(h_max, c->progeny[k]->hydro.h_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_hydro_end_min < ti_current)
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_hydro_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->hydro.ti_end_min = ti_hydro_end_min;
-  // c->hydro.ti_end_max = ti_hydro_end_max;
-  c->hydro.ti_old_part = ti_current;
-  c->hydro.h_max = h_max;
-
-  if (timer) TIMER_TOC(timer_dorecv_part);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief Construct the cell properties from the received #gpart.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
-
-#ifdef WITH_MPI
-
-  const struct gpart *restrict gparts = c->grav.parts;
-  const size_t nr_gparts = c->grav.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_gravity_end_min = max_nr_timesteps;
-  integertime_t ti_gravity_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_gparts; k++) {
-      if (gparts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, gparts[k].time_bin);
-      time_bin_max = max(time_bin_max, gparts[k].time_bin);
-    }
-
-    /* Convert into a time */
-    ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) {
-        runner_do_recv_gpart(r, c->progeny[k], 0);
-        ti_gravity_end_min =
-            min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min);
-        ti_gravity_end_max =
-            max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_gravity_end_min < ti_current)
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_gravity_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->grav.ti_end_min = ti_gravity_end_min;
-  // c->grav.ti_end_max = ti_gravity_end_max;
-  c->grav.ti_old_part = ti_current;
-
-  if (timer) TIMER_TOC(timer_dorecv_gpart);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief Construct the cell properties from the received #spart.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
- * @param timer Are we timing this ?
- */
-void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts,
-                          int timer) {
-
-#ifdef WITH_MPI
-
-  struct spart *restrict sparts = c->stars.parts;
-  const size_t nr_sparts = c->stars.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_stars_end_min = max_nr_timesteps;
-  integertime_t ti_stars_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-  float h_max = 0.f;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* Clear this cell's sorted mask. */
-  if (clear_sorts) c->stars.sorted = 0;
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_sparts; k++) {
-#ifdef DEBUG_INTERACTIONS_STARS
-      sparts[k].num_ngb_force = 0;
-#endif
-      if (sparts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, sparts[k].time_bin);
-      time_bin_max = max(time_bin_max, sparts[k].time_bin);
-      h_max = max(h_max, sparts[k].h);
-    }
-
-    /* Convert into a time */
-    ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
-        runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0);
-        ti_stars_end_min =
-            min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min);
-        ti_stars_end_max =
-            max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max);
-        h_max = max(h_max, c->progeny[k]->stars.h_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_stars_end_min < ti_current &&
-      !(r->e->policy & engine_policy_star_formation))
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_stars_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->grav.ti_end_min = ti_gravity_end_min;
-  // c->grav.ti_end_max = ti_gravity_end_max;
-  c->stars.ti_old_part = ti_current;
-  c->stars.h_max = h_max;
-
-  if (timer) TIMER_TOC(timer_dorecv_spart);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief Construct the cell properties from the received #bpart.
- *
- * Note that we do not need to clear the sorts since we do not sort
- * the black holes.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
- * @param timer Are we timing this ?
- */
-void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts,
-                          int timer) {
-
-#ifdef WITH_MPI
-
-  struct bpart *restrict bparts = c->black_holes.parts;
-  const size_t nr_bparts = c->black_holes.count;
-  const integertime_t ti_current = r->e->ti_current;
-
-  TIMER_TIC;
-
-  integertime_t ti_black_holes_end_min = max_nr_timesteps;
-  integertime_t ti_black_holes_end_max = 0;
-  timebin_t time_bin_min = num_time_bins;
-  timebin_t time_bin_max = 0;
-  float h_max = 0.f;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID == engine_rank) error("Updating a local cell!");
-#endif
-
-  /* If this cell is a leaf, collect the particle data. */
-  if (!c->split) {
-
-    /* Collect everything... */
-    for (size_t k = 0; k < nr_bparts; k++) {
-#ifdef DEBUG_INTERACTIONS_BLACK_HOLES
-      bparts[k].num_ngb_force = 0;
-#endif
-
-      /* message("Receiving bparts id=%lld time_bin=%d", */
-      /* 	      bparts[k].id, bparts[k].time_bin); */
-
-      if (bparts[k].time_bin == time_bin_inhibited) continue;
-      time_bin_min = min(time_bin_min, bparts[k].time_bin);
-      time_bin_max = max(time_bin_max, bparts[k].time_bin);
-      h_max = max(h_max, bparts[k].h);
-    }
-
-    /* Convert into a time */
-    ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max);
-  }
-
-  /* Otherwise, recurse and collect. */
-  else {
-    for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) {
-        runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0);
-        ti_black_holes_end_min =
-            min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min);
-        ti_black_holes_end_max =
-            max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max);
-        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
-      }
-    }
-  }
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ti_black_holes_end_min < ti_current)
-    error(
-        "Received a cell at an incorrect time c->ti_end_min=%lld, "
-        "e->ti_current=%lld.",
-        ti_black_holes_end_min, ti_current);
-#endif
-
-  /* ... and store. */
-  // c->grav.ti_end_min = ti_gravity_end_min;
-  // c->grav.ti_end_max = ti_gravity_end_max;
-  c->black_holes.ti_old_part = ti_current;
-  c->black_holes.h_max = h_max;
-
-  if (timer) TIMER_TOC(timer_dorecv_bpart);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
-/**
- * @brief The #runner main thread routine.
- *
- * @param data A pointer to this thread's data.
- */
-void *runner_main(void *data) {
-
-  struct runner *r = (struct runner *)data;
-  struct engine *e = r->e;
-  struct scheduler *sched = &e->sched;
-  unsigned int seed = r->id;
-  pthread_setspecific(sched->local_seed_pointer, &seed);
-  /* Main loop. */
-  while (1) {
-
-    /* Wait at the barrier. */
-    engine_barrier(e);
-
-    /* Can we go home yet? */
-    if (e->step_props & engine_step_prop_done) break;
-
-    /* Re-set the pointer to the previous task, as there is none. */
-    struct task *t = NULL;
-    struct task *prev = NULL;
-
-    /* Loop while there are tasks... */
-    while (1) {
-
-      /* If there's no old task, try to get a new one. */
-      if (t == NULL) {
-
-        /* Get the task. */
-        TIMER_TIC
-        t = scheduler_gettask(sched, r->qid, prev);
-        TIMER_TOC(timer_gettask);
-
-        /* Did I get anything? */
-        if (t == NULL) break;
-      }
-
-      /* Get the cells. */
-      struct cell *ci = t->ci;
-      struct cell *cj = t->cj;
-
-#ifdef SWIFT_DEBUG_TASKS
-      /* Mark the thread we run on */
-      t->rid = r->cpuid;
-
-      /* And recover the pair direction */
-      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
-        struct cell *ci_temp = ci;
-        struct cell *cj_temp = cj;
-        double shift[3];
-        t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift);
-      } else {
-        t->sid = -1;
-      }
-#endif
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that we haven't scheduled an inactive task */
-      t->ti_run = e->ti_current;
-      /* Store the task that will be running (for debugging only) */
-      r->t = t;
-#endif
-
-      /* Different types of tasks... */
-      switch (t->type) {
-        case task_type_self:
-          if (t->subtype == task_subtype_density)
-            runner_doself1_branch_density(r, ci);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_doself1_branch_gradient(r, ci);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_doself2_branch_force(r, ci);
-          else if (t->subtype == task_subtype_limiter)
-            runner_doself2_branch_limiter(r, ci);
-          else if (t->subtype == task_subtype_grav)
-            runner_doself_recursive_grav(r, ci, 1);
-          else if (t->subtype == task_subtype_external_grav)
-            runner_do_grav_external(r, ci, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_doself_branch_stars_density(r, ci);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_doself_branch_stars_feedback(r, ci);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_doself_branch_bh_density(r, ci);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_doself_branch_bh_swallow(r, ci);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_doself_branch_bh_feedback(r, ci);
-          else
-            error("Unknown/invalid task subtype (%s).",
-                  subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_pair:
-          if (t->subtype == task_subtype_density)
-            runner_dopair1_branch_density(r, ci, cj);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_dopair1_branch_gradient(r, ci, cj);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_dopair2_branch_force(r, ci, cj);
-          else if (t->subtype == task_subtype_limiter)
-            runner_dopair2_branch_limiter(r, ci, cj);
-          else if (t->subtype == task_subtype_grav)
-            runner_dopair_recursive_grav(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_dopair_branch_stars_density(r, ci, cj);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_dopair_branch_stars_feedback(r, ci, cj);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_dopair_branch_bh_density(r, ci, cj);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_dopair_branch_bh_swallow(r, ci, cj);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_dopair_branch_bh_feedback(r, ci, cj);
-          else
-            error("Unknown/invalid task subtype (%s/%s).",
-                  taskID_names[t->type], subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_sub_self:
-          if (t->subtype == task_subtype_density)
-            runner_dosub_self1_density(r, ci, 1);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_dosub_self1_gradient(r, ci, 1);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_dosub_self2_force(r, ci, 1);
-          else if (t->subtype == task_subtype_limiter)
-            runner_dosub_self2_limiter(r, ci, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_dosub_self_stars_density(r, ci, 1);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_dosub_self_stars_feedback(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_dosub_self_bh_density(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_dosub_self_bh_swallow(r, ci, 1);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_self(r, ci, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_dosub_self_bh_feedback(r, ci, 1);
-          else
-            error("Unknown/invalid task subtype (%s/%s).",
-                  taskID_names[t->type], subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_sub_pair:
-          if (t->subtype == task_subtype_density)
-            runner_dosub_pair1_density(r, ci, cj, 1);
-#ifdef EXTRA_HYDRO_LOOP
-          else if (t->subtype == task_subtype_gradient)
-            runner_dosub_pair1_gradient(r, ci, cj, 1);
-#endif
-          else if (t->subtype == task_subtype_force)
-            runner_dosub_pair2_force(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_limiter)
-            runner_dosub_pair2_limiter(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_stars_density)
-            runner_dosub_pair_stars_density(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_stars_feedback)
-            runner_dosub_pair_stars_feedback(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_density)
-            runner_dosub_pair_bh_density(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_swallow)
-            runner_dosub_pair_bh_swallow(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_do_gas_swallow)
-            runner_do_gas_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_do_bh_swallow)
-            runner_do_bh_swallow_pair(r, ci, cj, 1);
-          else if (t->subtype == task_subtype_bh_feedback)
-            runner_dosub_pair_bh_feedback(r, ci, cj, 1);
-          else
-            error("Unknown/invalid task subtype (%s/%s).",
-                  taskID_names[t->type], subtaskID_names[t->subtype]);
-          break;
-
-        case task_type_sort:
-          /* Cleanup only if any of the indices went stale. */
-          runner_do_hydro_sort(
-              r, ci, t->flags,
-              ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
-          /* Reset the sort flags as our work here is done. */
-          t->flags = 0;
-          break;
-        case task_type_stars_sort:
-          /* Cleanup only if any of the indices went stale. */
-          runner_do_stars_sort(
-              r, ci, t->flags,
-              ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
-          /* Reset the sort flags as our work here is done. */
-          t->flags = 0;
-          break;
-        case task_type_init_grav:
-          runner_do_init_grav(r, ci, 1);
-          break;
-        case task_type_ghost:
-          runner_do_ghost(r, ci, 1);
-          break;
-#ifdef EXTRA_HYDRO_LOOP
-        case task_type_extra_ghost:
-          runner_do_extra_ghost(r, ci, 1);
-          break;
-#endif
-        case task_type_stars_ghost:
-          runner_do_stars_ghost(r, ci, 1);
-          break;
-        case task_type_bh_density_ghost:
-          runner_do_black_holes_density_ghost(r, ci, 1);
-          break;
-        case task_type_bh_swallow_ghost3:
-          runner_do_black_holes_swallow_ghost(r, ci, 1);
-          break;
-        case task_type_drift_part:
-          runner_do_drift_part(r, ci, 1);
-          break;
-        case task_type_drift_spart:
-          runner_do_drift_spart(r, ci, 1);
-          break;
-        case task_type_drift_bpart:
-          runner_do_drift_bpart(r, ci, 1);
-          break;
-        case task_type_drift_gpart:
-          runner_do_drift_gpart(r, ci, 1);
-          break;
-        case task_type_kick1:
-          runner_do_kick1(r, ci, 1);
-          break;
-        case task_type_kick2:
-          runner_do_kick2(r, ci, 1);
-          break;
-        case task_type_end_hydro_force:
-          runner_do_end_hydro_force(r, ci, 1);
-          break;
-        case task_type_end_grav_force:
-          runner_do_end_grav_force(r, ci, 1);
-          break;
-        case task_type_logger:
-          runner_do_logger(r, ci, 1);
-          break;
-        case task_type_timestep:
-          runner_do_timestep(r, ci, 1);
-          break;
-        case task_type_timestep_limiter:
-          runner_do_limiter(r, ci, 0, 1);
-          break;
-#ifdef WITH_MPI
-        case task_type_send:
-          if (t->subtype == task_subtype_tend_part) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_gpart) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_spart) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_bpart) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_sf_counts) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_part_swallow) {
-            free(t->buff);
-          } else if (t->subtype == task_subtype_bpart_merger) {
-            free(t->buff);
-          }
-          break;
-        case task_type_recv:
-          if (t->subtype == task_subtype_tend_part) {
-            cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_gpart) {
-            cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_spart) {
-            cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_tend_bpart) {
-            cell_unpack_end_step_black_holes(
-                ci, (struct pcell_step_black_holes *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_sf_counts) {
-            cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff);
-            cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_xv) {
-            runner_do_recv_part(r, ci, 1, 1);
-          } else if (t->subtype == task_subtype_rho) {
-            runner_do_recv_part(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_gradient) {
-            runner_do_recv_part(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_part_swallow) {
-            cell_unpack_part_swallow(ci,
-                                     (struct black_holes_part_data *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_bpart_merger) {
-            cell_unpack_bpart_swallow(ci,
-                                      (struct black_holes_bpart_data *)t->buff);
-            free(t->buff);
-          } else if (t->subtype == task_subtype_limiter) {
-            runner_do_recv_part(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_gpart) {
-            runner_do_recv_gpart(r, ci, 1);
-          } else if (t->subtype == task_subtype_spart) {
-            runner_do_recv_spart(r, ci, 1, 1);
-          } else if (t->subtype == task_subtype_bpart_rho) {
-            runner_do_recv_bpart(r, ci, 1, 1);
-          } else if (t->subtype == task_subtype_bpart_swallow) {
-            runner_do_recv_bpart(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_bpart_feedback) {
-            runner_do_recv_bpart(r, ci, 0, 1);
-          } else if (t->subtype == task_subtype_multipole) {
-            cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff);
-            free(t->buff);
-          } else {
-            error("Unknown/invalid task subtype (%d).", t->subtype);
-          }
-          break;
-#endif
-        case task_type_grav_down:
-          runner_do_grav_down(r, t->ci, 1);
-          break;
-        case task_type_grav_mesh:
-          runner_do_grav_mesh(r, t->ci, 1);
-          break;
-        case task_type_grav_long_range:
-          runner_do_grav_long_range(r, t->ci, 1);
-          break;
-        case task_type_grav_mm:
-          runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj);
-          break;
-        case task_type_cooling:
-          runner_do_cooling(r, t->ci, 1);
-          break;
-        case task_type_star_formation:
-          runner_do_star_formation(r, t->ci, 1);
-          break;
-        case task_type_stars_resort:
-          runner_do_stars_resort(r, t->ci, 1);
-          break;
-        case task_type_fof_self:
-          runner_do_fof_self(r, t->ci, 1);
-          break;
-        case task_type_fof_pair:
-          runner_do_fof_pair(r, t->ci, t->cj, 1);
-          break;
-        default:
-          error("Unknown/invalid task type (%d).", t->type);
-      }
-
-/* Mark that we have run this task on these cells */
-#ifdef SWIFT_DEBUG_CHECKS
-      if (ci != NULL) {
-        ci->tasks_executed[t->type]++;
-        ci->subtasks_executed[t->subtype]++;
-      }
-      if (cj != NULL) {
-        cj->tasks_executed[t->type]++;
-        cj->subtasks_executed[t->subtype]++;
-      }
-
-      /* This runner is not doing a task anymore */
-      r->t = NULL;
-#endif
-
-      /* We're done with this task, see if we get a next one. */
-      prev = t;
-      t = scheduler_done(sched, t);
-
-    } /* main loop. */
-  }
-
-  /* Be kind, rewind. */
-  return NULL;
-}
-
-/**
- * @brief Write the required particles through the logger.
- *
- * @param r The runner thread.
- * @param c The cell.
- * @param timer Are we timing this ?
- */
-void runner_do_logger(struct runner *r, struct cell *c, int timer) {
-
-#ifdef WITH_LOGGER
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-  const int count = c->hydro.count;
-
-  /* Anything to do here? */
-  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return;
-
-  /* Recurse? Avoid spending too much time in useless cells. */
-  if (c->split) {
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0);
-  } else {
-
-    /* Loop over the parts in this cell. */
-    for (int k = 0; k < count; k++) {
-
-      /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
-
-      /* If particle needs to be log */
-      /* This is the same function than part_is_active, except for
-       * debugging checks */
-      if (part_is_active(p, e)) {
-
-        if (logger_should_write(&xp->logger_data, e->logger)) {
-          /* Write particle */
-          /* Currently writing everything, should adapt it through time */
-          logger_log_part(e->logger, p,
-                          logger_mask_data[logger_x].mask |
-                              logger_mask_data[logger_v].mask |
-                              logger_mask_data[logger_a].mask |
-                              logger_mask_data[logger_u].mask |
-                              logger_mask_data[logger_h].mask |
-                              logger_mask_data[logger_rho].mask |
-                              logger_mask_data[logger_consts].mask,
-                          &xp->logger_data.last_offset);
-
-          /* Set counter back to zero */
-          xp->logger_data.steps_since_last_output = 0;
-        } else
-          /* Update counter */
-          xp->logger_data.steps_since_last_output += 1;
-      }
-    }
-  }
-
-  if (c->grav.count > 0) error("gparts not implemented");
-
-  if (c->stars.count > 0) error("sparts not implemented");
-
-  if (timer) TIMER_TOC(timer_logger);
-
-#else
-  error("Logger disabled, please enable it during configuration");
-#endif
-}
-
-/**
- * @brief Recursively search for FOF groups in a single cell.
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_fof_self(struct runner *r, struct cell *c, int timer) {
-
-#ifdef WITH_FOF
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const int periodic = s->periodic;
-  const struct gpart *const gparts = s->gparts;
-  const double search_r2 = e->fof_properties->l_x2;
-
-  rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c);
-
-  if (timer) TIMER_TOC(timer_fof_self);
-
-#else
-  error("SWIFT was not compiled with FOF enabled!");
-#endif
-}
-
-/**
- * @brief Recursively search for FOF groups between a pair of cells.
- *
- * @param r runner task
- * @param ci cell i
- * @param cj cell j
- * @param timer 1 if the time is to be recorded.
- */
-void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
-                        int timer) {
-
-#ifdef WITH_FOF
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const int periodic = s->periodic;
-  const struct gpart *const gparts = s->gparts;
-  const double search_r2 = e->fof_properties->l_x2;
-
-  rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci,
-                      cj);
-
-  if (timer) TIMER_TOC(timer_fof_pair);
-#else
-  error("SWIFT was not compiled with FOF enabled!");
-#endif
-}
diff --git a/src/runner.h b/src/runner.h
index 1dc62ad6f5dc1c92851cf841a4ab55836d084bac..7e8d0459efb5485ea1301c923e8c7a3396b6fc7e 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -26,13 +26,21 @@
 /* Config parameters. */
 #include "../config.h"
 
-/* Includes. */
+/* Local headers. */
 #include "cache.h"
 #include "gravity_cache.h"
-#include "task.h"
 
 struct cell;
 struct engine;
+struct task;
+
+/* Unique identifier of loop types */
+#define TASK_LOOP_DENSITY 0
+#define TASK_LOOP_GRADIENT 1
+#define TASK_LOOP_FORCE 2
+#define TASK_LOOP_LIMITER 3
+#define TASK_LOOP_FEEDBACK 4
+#define TASK_LOOP_SWALLOW 5
 
 /**
  * @brief A struct representing a runner's thread and its data.
@@ -75,6 +83,12 @@ struct runner {
 /* Function prototypes. */
 void runner_do_ghost(struct runner *r, struct cell *c, int timer);
 void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer);
+void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer);
+void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c,
+                                         int timer);
+void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c,
+                                         int timer);
+void runner_do_init_grav(struct runner *r, struct cell *c, int timer);
 void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag,
                           int cleanup, int clock);
 void runner_do_stars_sort(struct runner *r, struct cell *c, int flag,
@@ -84,21 +98,38 @@ void runner_do_all_stars_sort(struct runner *r, struct cell *c);
 void runner_do_drift_part(struct runner *r, struct cell *c, int timer);
 void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer);
 void runner_do_drift_spart(struct runner *r, struct cell *c, int timer);
+void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer);
 void runner_do_kick1(struct runner *r, struct cell *c, int timer);
 void runner_do_kick2(struct runner *r, struct cell *c, int timer);
+void runner_do_timestep(struct runner *r, struct cell *c, int timer);
 void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer);
+void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer);
 void runner_do_init(struct runner *r, struct cell *c, int timer);
 void runner_do_cooling(struct runner *r, struct cell *c, int timer);
+void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer);
+void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer);
 void runner_do_grav_external(struct runner *r, struct cell *c, int timer);
 void runner_do_grav_fft(struct runner *r, int timer);
 void runner_do_logger(struct runner *r, struct cell *c, int timer);
 void runner_do_fof_self(struct runner *r, struct cell *c, int timer);
 void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
                         int timer);
+void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer);
+void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer);
+void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci,
+                                struct cell *cj, int timer);
+void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci,
+                               struct cell *cj, int timer);
+void runner_do_star_formation(struct runner *r, struct cell *c, int timer);
+void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer);
+
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer);
+void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
+                         int timer);
+void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer);
+void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer);
 void *runner_main(void *data);
-void runner_do_unskip_mapper(void *map_data, int num_elements,
-                             void *extra_data);
-void runner_do_drift_all_mapper(void *map_data, int num_elements,
-                                void *extra_data);
 
 #endif /* SWIFT_RUNNER_H */
diff --git a/src/runner_black_holes.c b/src/runner_black_holes.c
new file mode 100644
index 0000000000000000000000000000000000000000..d9bb62201d7b087670aef0ce2346a51bf61a3868
--- /dev/null
+++ b/src/runner_black_holes.c
@@ -0,0 +1,459 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Process all the gas particles in a cell that have been flagged for
+ * swallowing by a black hole.
+ *
+ * This is done by recursing down to the leaf-level and skipping the sub-cells
+ * that have not been drifted as they would not have any particles with
+ * swallowing flag. We then loop over the particles with a flag and look into
+ * the space-wide list of black holes for the particle with the corresponding
+ * ID. If found, the BH swallows the gas particle and the gas particle is
+ * removed. If the cell is local, we may be looking for a foreign BH, in which
+ * case, we do not update the BH (that will be done on its node) but just remove
+ * the gas particle.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) {
+
+  struct engine *e = r->e;
+  struct space *s = e->s;
+  struct bpart *bparts = s->bparts;
+  const size_t nr_bpart = s->nr_bparts;
+#ifdef WITH_MPI
+  struct bpart *bparts_foreign = s->bparts_foreign;
+  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
+#endif
+
+  struct part *parts = c->hydro.parts;
+  struct xpart *xparts = c->hydro.xparts;
+
+  /* Early abort?
+   * (We only want cells for which we drifted the gas as these are
+   * the only ones that could have gas particles that have been flagged
+   * for swallowing) */
+  if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) {
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        runner_do_gas_swallow(r, cp, 0);
+      }
+    }
+  } else {
+
+    /* Loop over all the gas particles in the cell
+     * Note that the cell (and hence the parts) may be local or foreign. */
+    const size_t nr_parts = c->hydro.count;
+    for (size_t k = 0; k < nr_parts; k++) {
+
+      /* Get a handle on the part. */
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
+
+      /* Ignore inhibited particles (they have already been removed!) */
+      if (part_is_inhibited(p, e)) continue;
+
+      /* Get the ID of the black holes that will swallow this part */
+      const long long swallow_id =
+          black_holes_get_part_swallow_id(&p->black_holes_data);
+
+      /* Has this particle been flagged for swallowing? */
+      if (swallow_id >= 0) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->ti_drift != e->ti_current)
+          error("Trying to swallow an un-drifted particle.");
+#endif
+
+        /* ID of the BH swallowing this particle */
+        const long long BH_id = swallow_id;
+
+        /* Have we found this particle's BH already? */
+        int found = 0;
+
+        /* Let's look for the hungry black hole in the local list */
+        for (size_t i = 0; i < nr_bpart; ++i) {
+
+          /* Get a handle on the bpart. */
+          struct bpart *bp = &bparts[i];
+
+          if (bp->id == BH_id) {
+
+            /* Lock the space as we are going to work directly on the bpart list
+             */
+            lock_lock(&s->lock);
+
+            /* Swallow the gas particle (i.e. update the BH properties) */
+            black_holes_swallow_part(bp, p, xp, e->cosmology);
+
+            /* Release the space as we are done updating the bpart */
+            if (lock_unlock(&s->lock) != 0)
+              error("Failed to unlock the space.");
+
+            message("BH %lld swallowing gas particle %lld", bp->id, p->id);
+
+            /* If the gas particle is local, remove it */
+            if (c->nodeID == e->nodeID) {
+
+              message("BH %lld removing gas particle %lld", bp->id, p->id);
+
+              lock_lock(&e->s->lock);
+
+              /* Re-check that the particle has not been removed
+               * by another thread before we do the deed. */
+              if (!part_is_inhibited(p, e)) {
+
+                /* Finally, remove the gas particle from the system
+                 * Recall that the gpart associated with it is also removed
+                 * at the same time. */
+                cell_remove_part(e, c, p, xp);
+              }
+
+              if (lock_unlock(&e->s->lock) != 0)
+                error("Failed to unlock the space!");
+            }
+
+            /* In any case, prevent the particle from being re-swallowed */
+            black_holes_mark_part_as_swallowed(&p->black_holes_data);
+
+            found = 1;
+            break;
+          }
+
+        } /* Loop over local BHs */
+
+#ifdef WITH_MPI
+
+        /* We could also be in the case of a local gas particle being
+         * swallowed by a foreign BH. In this case, we won't update the
+         * BH but just remove the particle from the local list. */
+        if (c->nodeID == e->nodeID && !found) {
+
+          /* Let's look for the foreign hungry black hole */
+          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
+
+            /* Get a handle on the bpart. */
+            struct bpart *bp = &bparts_foreign[i];
+
+            if (bp->id == BH_id) {
+
+              message("BH %lld removing gas particle %lld (foreign BH case)",
+                      bp->id, p->id);
+
+              lock_lock(&e->s->lock);
+
+              /* Re-check that the particle has not been removed
+               * by another thread before we do the deed. */
+              if (!part_is_inhibited(p, e)) {
+
+                /* Finally, remove the gas particle from the system */
+                cell_remove_part(e, c, p, xp);
+              }
+
+              if (lock_unlock(&e->s->lock) != 0)
+                error("Failed to unlock the space!");
+
+              found = 1;
+              break;
+            }
+          } /* Loop over foreign BHs */
+        }   /* Is the cell local? */
+#endif
+
+        /* If we have a local particle, we must have found the BH in one
+         * of our list of black holes. */
+        if (c->nodeID == e->nodeID && !found) {
+          error("Gas particle %lld could not find BH %lld to be swallowed",
+                p->id, swallow_id);
+        }
+      } /* Part was flagged for swallowing */
+    }   /* Loop over the parts */
+  }     /* Cell is not split */
+}
+
+/**
+ * @brief Processing of gas particles to swallow - self task case.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
+  if (!cell_is_active_black_holes(c, r->e))
+    error("Running self task on inactive cell");
+#endif
+
+  runner_do_gas_swallow(r, c, timer);
+}
+
+/**
+ * @brief Processing of gas particles to swallow - pair task case.
+ *
+ * @param r The thread #runner.
+ * @param ci First #cell.
+ * @param cj Second #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci,
+                                struct cell *cj, int timer) {
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
+    error("Running pair task on foreign node");
+#endif
+
+  /* Run the swallowing loop only in the cell that is the neighbour of the
+   * active BH */
+  if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer);
+  if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer);
+}
+
+/**
+ * @brief Process all the BH particles in a cell that have been flagged for
+ * swallowing by a black hole.
+ *
+ * This is done by recursing down to the leaf-level and skipping the sub-cells
+ * that have not been drifted as they would not have any particles with
+ * swallowing flag. We then loop over the particles with a flag and look into
+ * the space-wide list of black holes for the particle with the corresponding
+ * ID. If found, the BH swallows the BH particle and the BH particle is
+ * removed. If the cell is local, we may be looking for a foreign BH, in which
+ * case, we do not update the BH (that will be done on its node) but just remove
+ * the BH particle.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) {
+
+  struct engine *e = r->e;
+  struct space *s = e->s;
+  struct bpart *bparts = s->bparts;
+  const size_t nr_bpart = s->nr_bparts;
+#ifdef WITH_MPI
+  struct bpart *bparts_foreign = s->bparts_foreign;
+  const size_t nr_bparts_foreign = s->nr_bparts_foreign;
+#endif
+
+  struct bpart *cell_bparts = c->black_holes.parts;
+
+  /* Early abort?
+   * (We only want cells for which we drifted the BH as these are
+   * the only ones that could have BH particles that have been flagged
+   * for swallowing) */
+  if (c->black_holes.count == 0 ||
+      c->black_holes.ti_old_part != e->ti_current) {
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        runner_do_bh_swallow(r, cp, 0);
+      }
+    }
+  } else {
+
+    /* Loop over all the gas particles in the cell
+     * Note that the cell (and hence the bparts) may be local or foreign. */
+    const size_t nr_cell_bparts = c->black_holes.count;
+    for (size_t k = 0; k < nr_cell_bparts; k++) {
+
+      /* Get a handle on the part. */
+      struct bpart *const cell_bp = &cell_bparts[k];
+
+      /* Ignore inhibited particles (they have already been removed!) */
+      if (bpart_is_inhibited(cell_bp, e)) continue;
+
+      /* Get the ID of the black holes that will swallow this part */
+      const long long swallow_id =
+          black_holes_get_bpart_swallow_id(&cell_bp->merger_data);
+
+      /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */
+      /* 	      swallow_id); */
+
+      /* Has this particle been flagged for swallowing? */
+      if (swallow_id >= 0) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (cell_bp->ti_drift != e->ti_current)
+          error("Trying to swallow an un-drifted particle.");
+#endif
+
+        /* ID of the BH swallowing this particle */
+        const long long BH_id = swallow_id;
+
+        /* Have we found this particle's BH already? */
+        int found = 0;
+
+        /* Let's look for the hungry black hole in the local list */
+        for (size_t i = 0; i < nr_bpart; ++i) {
+
+          /* Get a handle on the bpart. */
+          struct bpart *bp = &bparts[i];
+
+          if (bp->id == BH_id) {
+
+            /* Lock the space as we are going to work directly on the bpart list
+             */
+            lock_lock(&s->lock);
+
+            /* Swallow the gas particle (i.e. update the BH properties) */
+            black_holes_swallow_bpart(bp, cell_bp, e->cosmology);
+
+            /* Release the space as we are done updating the bpart */
+            if (lock_unlock(&s->lock) != 0)
+              error("Failed to unlock the space.");
+
+            message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id);
+
+            /* If the gas particle is local, remove it */
+            if (c->nodeID == e->nodeID) {
+
+              message("BH %lld removing BH particle %lld", bp->id, cell_bp->id);
+
+              /* Finally, remove the gas particle from the system
+               * Recall that the gpart associated with it is also removed
+               * at the same time. */
+              cell_remove_bpart(e, c, cell_bp);
+            }
+
+            /* In any case, prevent the particle from being re-swallowed */
+            black_holes_mark_bpart_as_merged(&cell_bp->merger_data);
+
+            found = 1;
+            break;
+          }
+
+        } /* Loop over local BHs */
+
+#ifdef WITH_MPI
+
+        /* We could also be in the case of a local BH particle being
+         * swallowed by a foreign BH. In this case, we won't update the
+         * foreign BH but just remove the particle from the local list. */
+        if (c->nodeID == e->nodeID && !found) {
+
+          /* Let's look for the foreign hungry black hole */
+          for (size_t i = 0; i < nr_bparts_foreign; ++i) {
+
+            /* Get a handle on the bpart. */
+            struct bpart *bp = &bparts_foreign[i];
+
+            if (bp->id == BH_id) {
+
+              message("BH %lld removing BH particle %lld (foreign BH case)",
+                      bp->id, cell_bp->id);
+
+              /* Finally, remove the gas particle from the system */
+              cell_remove_bpart(e, c, cell_bp);
+
+              found = 1;
+              break;
+            }
+          } /* Loop over foreign BHs */
+        }   /* Is the cell local? */
+#endif
+
+        /* If we have a local particle, we must have found the BH in one
+         * of our list of black holes. */
+        if (c->nodeID == e->nodeID && !found) {
+          error("BH particle %lld could not find BH %lld to be swallowed",
+                cell_bp->id, swallow_id);
+        }
+      } /* Part was flagged for swallowing */
+    }   /* Loop over the parts */
+  }     /* Cell is not split */
+}
+
+/**
+ * @brief Processing of bh particles to swallow - self task case.
+ *
+ * @param r The thread #runner.
+ * @param c The #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != r->e->nodeID) error("Running self task on foreign node");
+  if (!cell_is_active_black_holes(c, r->e))
+    error("Running self task on inactive cell");
+#endif
+
+  runner_do_bh_swallow(r, c, timer);
+}
+
+/**
+ * @brief Processing of bh particles to swallow - pair task case.
+ *
+ * @param r The thread #runner.
+ * @param ci First #cell.
+ * @param cj Second #cell.
+ * @param timer Are we timing this?
+ */
+void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci,
+                               struct cell *cj, int timer) {
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID)
+    error("Running pair task on foreign node");
+#endif
+
+  /* Run the swallowing loop only in the cell that is the neighbour of the
+   * active BH */
+  if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer);
+  if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer);
+}
diff --git a/src/runner_doiact_black_holes.c b/src/runner_doiact_black_holes.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c139eada6cf7403076194c42261948db5e0f7f4
--- /dev/null
+++ b/src/runner_doiact_black_holes.c
@@ -0,0 +1,53 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "runner.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/* Import the black hole density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_functions_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION swallow
+#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW
+#include "runner_doiact_functions_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_functions_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
diff --git a/src/runner_doiact_black_holes.h b/src/runner_doiact_black_holes.h
index ce159c7ac24a508bc625070ed50b3aad7dd9fa8d..763e557babb9ca94a05a28d1ea5ed0f1141684ff 100644
--- a/src/runner_doiact_black_holes.h
+++ b/src/runner_doiact_black_holes.h
@@ -85,852 +85,20 @@
 #define _IACT_BH_BH(f) PASTE(runner_iact_nonsym_bh_bh, f)
 #define IACT_BH_BH _IACT_BH_BH(FUNCTION)
 
-/**
- * @brief Calculate the number density of #part around the #bpart
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void DOSELF1_BH(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (c->black_holes.count == 0) return;
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  const int bcount = c->black_holes.count;
-  const int count = c->hydro.count;
-  struct bpart *restrict bparts = c->black_holes.parts;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-
-  /* Do we actually have any gas neighbours? */
-  if (c->hydro.count != 0) {
-
-    /* Loop over the bparts in ci. */
-    for (int bid = 0; bid < bcount; bid++) {
-
-      /* Get a hold of the ith bpart in ci. */
-      struct bpart *restrict bi = &bparts[bid];
-
-      /* Skip inactive particles */
-      if (!bpart_is_active(bi, e)) continue;
-
-      const float hi = bi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
-                            (float)(bi->x[1] - c->loc[1]),
-                            (float)(bi->x[2] - c->loc[2])};
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count; pjd++) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts[pjd];
-        struct xpart *restrict xpj = &xparts[pjd];
-        const float hj = pj->h;
-
-        /* Early abort? */
-        if (part_is_inhibited(pj, e)) continue;
-
-        /* Compute the pairwise distance. */
-        const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
-                              (float)(pj->x[1] - c->loc[1]),
-                              (float)(pj->x[2] - c->loc[2])};
-        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (bi->ti_drift != e->ti_current)
-          error("Particle bi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        if (r2 < hig2) {
-          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
-                      ti_current);
-        }
-      } /* loop over the parts in ci. */
-    }   /* loop over the bparts in ci. */
-  }     /* Do we have gas particles in the cell? */
-
-    /* When doing BH swallowing, we need a quick loop also over the BH
-     * neighbours */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-
-  /* Loop over the bparts in ci. */
-  for (int bid = 0; bid < bcount; bid++) {
-
-    /* Get a hold of the ith bpart in ci. */
-    struct bpart *restrict bi = &bparts[bid];
-
-    /* Skip inactive particles */
-    if (!bpart_is_active(bi, e)) continue;
-
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
-                          (float)(bi->x[1] - c->loc[1]),
-                          (float)(bi->x[2] - c->loc[2])};
-
-    /* Loop over the parts in cj. */
-    for (int bjd = 0; bjd < bcount; bjd++) {
-
-      /* Skip self interaction */
-      if (bid == bjd) continue;
-
-      /* Get a pointer to the jth particle. */
-      struct bpart *restrict bj = &bparts[bjd];
-      const float hj = bj->h;
-
-      /* Early abort? */
-      if (bpart_is_inhibited(bj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float bjx[3] = {(float)(bj->x[0] - c->loc[0]),
-                            (float)(bj->x[1] - c->loc[1]),
-                            (float)(bj->x[2] - c->loc[2])};
-      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (bi->ti_drift != e->ti_current)
-        error("Particle bi not drifted to current time");
-      if (bj->ti_drift != e->ti_current)
-        error("Particle bj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
-                   ti_current);
-      }
-    } /* loop over the bparts in ci. */
-  }   /* loop over the bparts in ci. */
-
-#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
-
-  TIMER_TOC(TIMER_DOSELF_BH);
-}
-
-/**
- * @brief Calculate the number density of cj #part around the ci #bpart
- *
- * @param r runner task
- * @param ci The first #cell
- * @param cj The second #cell
- */
-void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
-                              struct cell *restrict cj) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-  if (cj->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (ci->black_holes.count == 0) return;
-  if (!cell_is_active_black_holes(ci, e)) return;
-
-  const int bcount_i = ci->black_holes.count;
-  const int count_j = cj->hydro.count;
-  struct bpart *restrict bparts_i = ci->black_holes.parts;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  /* Do we actually have any gas neighbours? */
-  if (cj->hydro.count != 0) {
-
-    /* Loop over the bparts in ci. */
-    for (int bid = 0; bid < bcount_i; bid++) {
-
-      /* Get a hold of the ith bpart in ci. */
-      struct bpart *restrict bi = &bparts_i[bid];
-
-      /* Skip inactive particles */
-      if (!bpart_is_active(bi, e)) continue;
-
-      const float hi = bi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
-                            (float)(bi->x[1] - (cj->loc[1] + shift[1])),
-                            (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count_j; pjd++) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[pjd];
-        struct xpart *restrict xpj = &xparts_j[pjd];
-        const float hj = pj->h;
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        /* Compute the pairwise distance. */
-        const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
-                              (float)(pj->x[1] - cj->loc[1]),
-                              (float)(pj->x[2] - cj->loc[2])};
-        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (bi->ti_drift != e->ti_current)
-          error("Particle bi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        if (r2 < hig2) {
-          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
-                      ti_current);
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the bparts in ci. */
-  }     /* Do we have gas particles in the cell? */
-
-    /* When doing BH swallowing, we need a quick loop also over the BH
-     * neighbours */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-
-  const int bcount_j = cj->black_holes.count;
-  struct bpart *restrict bparts_j = cj->black_holes.parts;
-
-  /* Loop over the bparts in ci. */
-  for (int bid = 0; bid < bcount_i; bid++) {
-
-    /* Get a hold of the ith bpart in ci. */
-    struct bpart *restrict bi = &bparts_i[bid];
-
-    /* Skip inactive particles */
-    if (!bpart_is_active(bi, e)) continue;
-
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
-                          (float)(bi->x[1] - (cj->loc[1] + shift[1])),
-                          (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
-
-    /* Loop over the bparts in cj. */
-    for (int bjd = 0; bjd < bcount_j; bjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct bpart *restrict bj = &bparts_j[bjd];
-      const float hj = bj->h;
-
-      /* Skip inhibited particles. */
-      if (bpart_is_inhibited(bj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]),
-                            (float)(bj->x[1] - cj->loc[1]),
-                            (float)(bj->x[2] - cj->loc[2])};
-      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (bi->ti_drift != e->ti_current)
-        error("Particle bi not drifted to current time");
-      if (bj->ti_drift != e->ti_current)
-        error("Particle bj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
-                   ti_current);
-      }
-    } /* loop over the bparts in cj. */
-  }   /* loop over the bparts in ci. */
-
-#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
-}
-
-void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
-                      struct cell *restrict cj, int timer) {
-
-  TIMER_TIC;
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_bh = ci->nodeID == r->e->nodeID;
-  const int do_cj_bh = cj->nodeID == r->e->nodeID;
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_bh = cj->nodeID == r->e->nodeID;
-  const int do_cj_bh = ci->nodeID == r->e->nodeID;
-#else
-  /* The swallow task is executed on both sides */
-  const int do_ci_bh = 1;
-  const int do_cj_bh = 1;
-#endif
-
-  if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj);
-  if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci);
-
-  TIMER_TOC(TIMER_DOPAIR_BH);
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * Version using a brute-force algorithm.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts_i The #bpart to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- * @param cj The second #cell.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci,
-                             struct bpart *restrict bparts_i, int *restrict ind,
-                             const int bcount, struct cell *restrict cj,
-                             const double *shift) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  const int count_j = cj->hydro.count;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Early abort? */
-  if (count_j == 0) return;
-
-  /* Loop over the parts_i. */
-  for (int bid = 0; bid < bcount; bid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct bpart *restrict bi = &bparts_i[ind[bid]];
-
-    const double bix = bi->x[0] - (shift[0]);
-    const double biy = bi->x[1] - (shift[1]);
-    const double biz = bi->x[2] - (shift[2]);
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!bpart_is_active(bi, e))
-      error("Trying to correct smoothing length of inactive particle !");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Skip inhibited particles */
-      if (part_is_inhibited(pj, e)) continue;
-
-      const double pjx = pj->x[0];
-      const double pjy = pj->x[1];
-      const double pjz = pj->x[2];
-      const float hj = pj->h;
-
-      /* Compute the pairwise distance. */
-      float dx[3] = {(float)(bix - pjx), (float)(biy - pjy),
-                     (float)(biz - pjz)};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
-                    ti_current);
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts The #bpart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- */
-void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci,
-                       struct bpart *restrict bparts, int *restrict ind,
-                       const int bcount) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  const int count_i = ci->hydro.count;
-  struct part *restrict parts_j = ci->hydro.parts;
-  struct xpart *restrict xparts_j = ci->hydro.xparts;
+void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c);
+void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj);
 
-  /* Early abort? */
-  if (count_i == 0) return;
-
-  /* Loop over the parts in ci. */
-  for (int bid = 0; bid < bcount; bid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct bpart *bi = &bparts[ind[bid]];
-    const float bix[3] = {(float)(bi->x[0] - ci->loc[0]),
-                          (float)(bi->x[1] - ci->loc[1]),
-                          (float)(bi->x[2] - ci->loc[2])};
-    const float hi = bi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_i; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Early abort? */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
-                            (float)(pj->x[1] - ci->loc[1]),
-                            (float)(pj->x[2] - ci->loc[2])};
-      float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo,
-                    e->gravity_properties, ti_current);
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
+void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer);
+void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj,
+                    int gettimer);
 
-/**
- * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called
- * depending on the optimisation level.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts The #bpart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- */
 void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
                               struct bpart *restrict bparts, int *restrict ind,
-                              const int bcount) {
-
-  DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount);
-}
-
-/**
- * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called
- * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH
- * needs to be called at all.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param bparts_i The #bpart to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param bcount The number of particles in @c ind.
- * @param cj The second #cell.
- */
+                              const int bcount);
 void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
                               struct bpart *restrict bparts_i,
                               int *restrict ind, int const bcount,
-                              struct cell *restrict cj) {
-
-  const struct engine *e = r->e;
-
-  /* Anything to do here? */
-  if (cj->hydro.count == 0) return;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift);
-}
+                              struct cell *restrict cj);
 
 void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts,
-                     int *ind, const int bcount, struct cell *cj,
-                     int gettimer) {
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-
-  /* Should we even bother? */
-  if (!cell_is_active_black_holes(ci, e) &&
-      (cj == NULL || !cell_is_active_black_holes(cj, e)))
-    return;
-
-  /* Find out in which sub-cell of ci the parts are. */
-  struct cell *sub = NULL;
-  if (ci->split) {
-    for (int k = 0; k < 8; k++) {
-      if (ci->progeny[k] != NULL) {
-        if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] &&
-            &bparts[ind[0]] <
-                &ci->progeny[k]
-                     ->black_holes.parts[ci->progeny[k]->black_holes.count]) {
-          sub = ci->progeny[k];
-          break;
-        }
-      }
-    }
-  }
-
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_self_black_holes_task(ci)) {
-
-      /* Loop over all progeny. */
-      DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0);
-      for (int j = 0; j < 8; j++)
-        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
-          DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0);
-
-    }
-
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount);
-  } /* self-interaction. */
-
-  /* Otherwise, it's a pair interaction. */
-  else {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
-        cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
-
-      /* Get the type of pair and flip ci/cj if needed. */
-      double shift[3] = {0.0, 0.0, 0.0};
-      const int sid = space_getsid(s, &ci, &cj, shift);
-
-      struct cell_split_pair *csp = &cell_split_pairs[sid];
-      for (int k = 0; k < csp->count; k++) {
-        const int pid = csp->pairs[k].pid;
-        const int pjd = csp->pairs[k].pjd;
-        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
-          DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount,
-                          cj->progeny[pjd], 0);
-        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
-          DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount,
-                          ci->progeny[pid], 0);
-      }
-    }
-
-    /* Otherwise, compute the pair directly. */
-    else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) {
-
-      /* Do any of the cells need to be drifted first? */
-      if (cell_is_active_black_holes(ci, e)) {
-        if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!");
-        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
-      }
-
-      DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj);
-    }
-
-  } /* otherwise, pair interaction. */
-}
-
-/**
- * @brief Determine which version of DOSELF1_BH needs to be called depending
- * on the optimisation level.
- *
- * @param r #runner
- * @param c #cell c
- *
- */
-void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) {
-
-  const struct engine *restrict e = r->e;
-
-  /* Anything to do here? */
-  if (c->black_holes.count == 0) return;
-
-  /* Anything to do here? */
-  if (!cell_is_active_black_holes(c, e)) return;
-
-  /* Did we mess up the recursion? */
-  if (c->black_holes.h_max_old * kernel_gamma > c->dmin)
-    error("Cell smaller than smoothing length");
-
-  DOSELF1_BH(r, c, 1);
-}
-
-/**
- * @brief Determine which version of DOPAIR1_BH needs to be called depending
- * on the orientation of the cells or whether DOPAIR1_BH needs to be called
- * at all.
- *
- * @param r #runner
- * @param ci #cell ci
- * @param cj #cell cj
- *
- */
-void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) {
-
-  const struct engine *restrict e = r->e;
-
-  const int ci_active = cell_is_active_black_holes(ci, e);
-  const int cj_active = cell_is_active_black_holes(cj, e);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_bh = ci->nodeID == e->nodeID;
-  const int do_cj_bh = cj->nodeID == e->nodeID;
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_bh = cj->nodeID == e->nodeID;
-  const int do_cj_bh = ci->nodeID == e->nodeID;
-#else
-  /* The swallow task is executed on both sides */
-  const int do_ci_bh = 1;
-  const int do_cj_bh = 1;
-#endif
-
-  const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 &&
-                     ci_active && do_ci_bh);
-  const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 &&
-                     cj_active && do_cj_bh);
-
-  /* Anything to do here? */
-  if (!do_ci && !do_cj) return;
-
-  /* Check that cells are drifted. */
-  if (do_ci &&
-      (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  if (do_cj &&
-      (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  /* No sorted intreactions here -> use the naive ones */
-  DOPAIR1_BH_NAIVE(r, ci, cj, 1);
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for pairs
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param gettimer Do we have a timer ?
- *
- * @todo Hard-code the sid on the recursive calls to avoid the
- * redundant computations to find the sid on-the-fly.
- */
-void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj,
-                    int gettimer) {
-
-  TIMER_TIC;
-
-  struct space *s = r->e->s;
-  const struct engine *e = r->e;
-
-  /* Should we even bother?
-   * In the swallow case we care about BH-BH and BH-gas
-   * interactions.
-   * In all other cases only BH-gas so we can abort if there is
-   * is no gas in the cell */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-  const int should_do_ci =
-      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
-  const int should_do_cj =
-      cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e);
-#else
-  const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 &&
-                           cell_is_active_black_holes(ci, e);
-  const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 &&
-                           cell_is_active_black_holes(cj, e);
-
-#endif
-
-  if (!should_do_ci && !should_do_cj) return;
-
-  /* Get the type of pair and flip ci/cj if needed. */
-  double shift[3];
-  const int sid = space_getsid(s, &ci, &cj, shift);
-
-  /* Recurse? */
-  if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
-      cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
-    struct cell_split_pair *csp = &cell_split_pairs[sid];
-    for (int k = 0; k < csp->count; k++) {
-      const int pid = csp->pairs[k].pid;
-      const int pjd = csp->pairs[k].pjd;
-      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
-        DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0);
-    }
-  }
-
-  /* Otherwise, compute the pair directly. */
-  else {
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-    const int do_ci_bh = ci->nodeID == e->nodeID;
-    const int do_cj_bh = cj->nodeID == e->nodeID;
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-    /* Here we are updating the hydro -> switch ci, cj */
-    const int do_ci_bh = cj->nodeID == e->nodeID;
-    const int do_cj_bh = ci->nodeID == e->nodeID;
-#else
-    /* Here we perform the task on both sides */
-    const int do_ci_bh = 1;
-    const int do_cj_bh = 1;
-#endif
-
-    const int do_ci = ci->black_holes.count != 0 &&
-                      cell_is_active_black_holes(ci, e) && do_ci_bh;
-    const int do_cj = cj->black_holes.count != 0 &&
-                      cell_is_active_black_holes(cj, e) && do_cj_bh;
-
-    if (do_ci) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (!cell_are_bpart_drifted(ci, e))
-        error("Interacting undrifted cells (bparts).");
-
-      if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e))
-        error("Interacting undrifted cells (parts).");
-    }
-
-    if (do_cj) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
-        error("Interacting undrifted cells (parts).");
-
-      if (!cell_are_bpart_drifted(cj, e))
-        error("Interacting undrifted cells (bparts).");
-    }
-
-    if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_PAIR_BH);
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for self tasks
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param gettimer Do we have a timer ?
- */
-void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) {
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank)
-    error("This function should not be called on foreign cells");
-#endif
-
-    /* Should we even bother?
-     * In the swallow case we care about BH-BH and BH-gas
-     * interactions.
-     * In all other cases only BH-gas so we can abort if there is
-     * is no gas in the cell */
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
-  const int should_do_ci =
-      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
-#else
-  const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 &&
-                           cell_is_active_black_holes(ci, e);
-#endif
-
-  if (!should_do_ci) return;
-
-  /* Recurse? */
-  if (cell_can_recurse_in_self_black_holes_task(ci)) {
-
-    /* Loop over all progeny. */
-    for (int k = 0; k < 8; k++)
-      if (ci->progeny[k] != NULL) {
-        DOSUB_SELF1_BH(r, ci->progeny[k], 0);
-        for (int j = k + 1; j < 8; j++)
-          if (ci->progeny[j] != NULL)
-            DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0);
-      }
-  }
-
-  /* Otherwise, compute self-interaction. */
-  else {
-
-    /* Check we did drift to the current time */
-    if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell.");
-
-    if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
-      error("Interacting undrifted cells (bparts).");
-
-    DOSELF1_BRANCH_BH(r, ci);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_SELF_BH);
-}
+                     int *ind, const int bcount, struct cell *cj, int gettimer);
diff --git a/src/runner_doiact_functions_black_holes.h b/src/runner_doiact_functions_black_holes.h
new file mode 100644
index 0000000000000000000000000000000000000000..f8af37c751a9f7a89455ae5c9a7ef72ec55a1c64
--- /dev/null
+++ b/src/runner_doiact_functions_black_holes.h
@@ -0,0 +1,877 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Before including this file, define FUNCTION, which is the
+   name of the interaction function. This creates the interaction functions
+   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
+   and runner_dosub_FUNCTION calling the pairwise interaction function
+   runner_iact_FUNCTION. */
+
+#include "runner_doiact_black_holes.h"
+
+/**
+ * @brief Calculate the number density of #part around the #bpart
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void DOSELF1_BH(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (c->black_holes.count == 0) return;
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  const int bcount = c->black_holes.count;
+  const int count = c->hydro.count;
+  struct bpart *restrict bparts = c->black_holes.parts;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  /* Do we actually have any gas neighbours? */
+  if (c->hydro.count != 0) {
+
+    /* Loop over the bparts in ci. */
+    for (int bid = 0; bid < bcount; bid++) {
+
+      /* Get a hold of the ith bpart in ci. */
+      struct bpart *restrict bi = &bparts[bid];
+
+      /* Skip inactive particles */
+      if (!bpart_is_active(bi, e)) continue;
+
+      const float hi = bi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
+                            (float)(bi->x[1] - c->loc[1]),
+                            (float)(bi->x[2] - c->loc[2])};
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count; pjd++) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts[pjd];
+        struct xpart *restrict xpj = &xparts[pjd];
+        const float hj = pj->h;
+
+        /* Early abort? */
+        if (part_is_inhibited(pj, e)) continue;
+
+        /* Compute the pairwise distance. */
+        const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
+                              (float)(pj->x[1] - c->loc[1]),
+                              (float)(pj->x[2] - c->loc[2])};
+        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (bi->ti_drift != e->ti_current)
+          error("Particle bi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        if (r2 < hig2) {
+          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
+                      ti_current);
+        }
+      } /* loop over the parts in ci. */
+    }   /* loop over the bparts in ci. */
+  }     /* Do we have gas particles in the cell? */
+
+    /* When doing BH swallowing, we need a quick loop also over the BH
+     * neighbours */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+
+  /* Loop over the bparts in ci. */
+  for (int bid = 0; bid < bcount; bid++) {
+
+    /* Get a hold of the ith bpart in ci. */
+    struct bpart *restrict bi = &bparts[bid];
+
+    /* Skip inactive particles */
+    if (!bpart_is_active(bi, e)) continue;
+
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float bix[3] = {(float)(bi->x[0] - c->loc[0]),
+                          (float)(bi->x[1] - c->loc[1]),
+                          (float)(bi->x[2] - c->loc[2])};
+
+    /* Loop over the parts in cj. */
+    for (int bjd = 0; bjd < bcount; bjd++) {
+
+      /* Skip self interaction */
+      if (bid == bjd) continue;
+
+      /* Get a pointer to the jth particle. */
+      struct bpart *restrict bj = &bparts[bjd];
+      const float hj = bj->h;
+
+      /* Early abort? */
+      if (bpart_is_inhibited(bj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float bjx[3] = {(float)(bj->x[0] - c->loc[0]),
+                            (float)(bj->x[1] - c->loc[1]),
+                            (float)(bj->x[2] - c->loc[2])};
+      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (bi->ti_drift != e->ti_current)
+        error("Particle bi not drifted to current time");
+      if (bj->ti_drift != e->ti_current)
+        error("Particle bj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
+                   ti_current);
+      }
+    } /* loop over the bparts in ci. */
+  }   /* loop over the bparts in ci. */
+
+#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
+
+  TIMER_TOC(TIMER_DOSELF_BH);
+}
+
+/**
+ * @brief Calculate the number density of cj #part around the ci #bpart
+ *
+ * @param r runner task
+ * @param ci The first #cell
+ * @param cj The second #cell
+ */
+void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
+                              struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+  if (cj->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (ci->black_holes.count == 0) return;
+  if (!cell_is_active_black_holes(ci, e)) return;
+
+  const int bcount_i = ci->black_holes.count;
+  const int count_j = cj->hydro.count;
+  struct bpart *restrict bparts_i = ci->black_holes.parts;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Do we actually have any gas neighbours? */
+  if (cj->hydro.count != 0) {
+
+    /* Loop over the bparts in ci. */
+    for (int bid = 0; bid < bcount_i; bid++) {
+
+      /* Get a hold of the ith bpart in ci. */
+      struct bpart *restrict bi = &bparts_i[bid];
+
+      /* Skip inactive particles */
+      if (!bpart_is_active(bi, e)) continue;
+
+      const float hi = bi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
+                            (float)(bi->x[1] - (cj->loc[1] + shift[1])),
+                            (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count_j; pjd++) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts_j[pjd];
+        struct xpart *restrict xpj = &xparts_j[pjd];
+        const float hj = pj->h;
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        /* Compute the pairwise distance. */
+        const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
+                              (float)(pj->x[1] - cj->loc[1]),
+                              (float)(pj->x[2] - cj->loc[2])};
+        float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (bi->ti_drift != e->ti_current)
+          error("Particle bi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        if (r2 < hig2) {
+          IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
+                      ti_current);
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the bparts in ci. */
+  }     /* Do we have gas particles in the cell? */
+
+    /* When doing BH swallowing, we need a quick loop also over the BH
+     * neighbours */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+
+  const int bcount_j = cj->black_holes.count;
+  struct bpart *restrict bparts_j = cj->black_holes.parts;
+
+  /* Loop over the bparts in ci. */
+  for (int bid = 0; bid < bcount_i; bid++) {
+
+    /* Get a hold of the ith bpart in ci. */
+    struct bpart *restrict bi = &bparts_i[bid];
+
+    /* Skip inactive particles */
+    if (!bpart_is_active(bi, e)) continue;
+
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])),
+                          (float)(bi->x[1] - (cj->loc[1] + shift[1])),
+                          (float)(bi->x[2] - (cj->loc[2] + shift[2]))};
+
+    /* Loop over the bparts in cj. */
+    for (int bjd = 0; bjd < bcount_j; bjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct bpart *restrict bj = &bparts_j[bjd];
+      const float hj = bj->h;
+
+      /* Skip inhibited particles. */
+      if (bpart_is_inhibited(bj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]),
+                            (float)(bj->x[1] - cj->loc[1]),
+                            (float)(bj->x[2] - cj->loc[2])};
+      float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (bi->ti_drift != e->ti_current)
+        error("Particle bi not drifted to current time");
+      if (bj->ti_drift != e->ti_current)
+        error("Particle bj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties,
+                   ti_current);
+      }
+    } /* loop over the bparts in cj. */
+  }   /* loop over the bparts in ci. */
+
+#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */
+}
+
+void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci,
+                      struct cell *restrict cj, int timer) {
+
+  TIMER_TIC;
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_bh = ci->nodeID == r->e->nodeID;
+  const int do_cj_bh = cj->nodeID == r->e->nodeID;
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_bh = cj->nodeID == r->e->nodeID;
+  const int do_cj_bh = ci->nodeID == r->e->nodeID;
+#else
+  /* The swallow task is executed on both sides */
+  const int do_ci_bh = 1;
+  const int do_cj_bh = 1;
+#endif
+
+  if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj);
+  if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci);
+
+  TIMER_TOC(TIMER_DOPAIR_BH);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * Version using a brute-force algorithm.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts_i The #bpart to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ * @param cj The second #cell.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci,
+                             struct bpart *restrict bparts_i, int *restrict ind,
+                             const int bcount, struct cell *restrict cj,
+                             const double *shift) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  const int count_j = cj->hydro.count;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Early abort? */
+  if (count_j == 0) return;
+
+  /* Loop over the parts_i. */
+  for (int bid = 0; bid < bcount; bid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct bpart *restrict bi = &bparts_i[ind[bid]];
+
+    const double bix = bi->x[0] - (shift[0]);
+    const double biy = bi->x[1] - (shift[1]);
+    const double biz = bi->x[2] - (shift[2]);
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!bpart_is_active(bi, e))
+      error("Trying to correct smoothing length of inactive particle !");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Skip inhibited particles */
+      if (part_is_inhibited(pj, e)) continue;
+
+      const double pjx = pj->x[0];
+      const double pjy = pj->x[1];
+      const double pjz = pj->x[2];
+      const float hj = pj->h;
+
+      /* Compute the pairwise distance. */
+      float dx[3] = {(float)(bix - pjx), (float)(biy - pjy),
+                     (float)(biz - pjz)};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties,
+                    ti_current);
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts The #bpart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci,
+                       struct bpart *restrict bparts, int *restrict ind,
+                       const int bcount) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  const int count_i = ci->hydro.count;
+  struct part *restrict parts_j = ci->hydro.parts;
+  struct xpart *restrict xparts_j = ci->hydro.xparts;
+
+  /* Early abort? */
+  if (count_i == 0) return;
+
+  /* Loop over the parts in ci. */
+  for (int bid = 0; bid < bcount; bid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct bpart *bi = &bparts[ind[bid]];
+    const float bix[3] = {(float)(bi->x[0] - ci->loc[0]),
+                          (float)(bi->x[1] - ci->loc[1]),
+                          (float)(bi->x[2] - ci->loc[2])};
+    const float hi = bi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_i; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Early abort? */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
+                            (float)(pj->x[1] - ci->loc[1]),
+                            (float)(pj->x[2] - ci->loc[2])};
+      float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo,
+                    e->gravity_properties, ti_current);
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called
+ * depending on the optimisation level.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts The #bpart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
+                              struct bpart *restrict bparts, int *restrict ind,
+                              const int bcount) {
+
+  DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount);
+}
+
+/**
+ * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called
+ * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH
+ * needs to be called at all.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param bparts_i The #bpart to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param bcount The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci,
+                              struct bpart *restrict bparts_i,
+                              int *restrict ind, int const bcount,
+                              struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  /* Anything to do here? */
+  if (cj->hydro.count == 0) return;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift);
+}
+
+void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts,
+                     int *ind, const int bcount, struct cell *cj,
+                     int gettimer) {
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+
+  /* Should we even bother? */
+  if (!cell_is_active_black_holes(ci, e) &&
+      (cj == NULL || !cell_is_active_black_holes(cj, e)))
+    return;
+
+  /* Find out in which sub-cell of ci the parts are. */
+  struct cell *sub = NULL;
+  if (ci->split) {
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL) {
+        if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] &&
+            &bparts[ind[0]] <
+                &ci->progeny[k]
+                     ->black_holes.parts[ci->progeny[k]->black_holes.count]) {
+          sub = ci->progeny[k];
+          break;
+        }
+      }
+    }
+  }
+
+  /* Is this a single cell? */
+  if (cj == NULL) {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_self_black_holes_task(ci)) {
+
+      /* Loop over all progeny. */
+      DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0);
+      for (int j = 0; j < 8; j++)
+        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
+          DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0);
+
+    }
+
+    /* Otherwise, compute self-interaction. */
+    else
+      DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount);
+  } /* self-interaction. */
+
+  /* Otherwise, it's a pair interaction. */
+  else {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
+        cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
+
+      /* Get the type of pair and flip ci/cj if needed. */
+      double shift[3] = {0.0, 0.0, 0.0};
+      const int sid = space_getsid(s, &ci, &cj, shift);
+
+      struct cell_split_pair *csp = &cell_split_pairs[sid];
+      for (int k = 0; k < csp->count; k++) {
+        const int pid = csp->pairs[k].pid;
+        const int pjd = csp->pairs[k].pjd;
+        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
+          DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount,
+                          cj->progeny[pjd], 0);
+        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
+          DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount,
+                          ci->progeny[pid], 0);
+      }
+    }
+
+    /* Otherwise, compute the pair directly. */
+    else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) {
+
+      /* Do any of the cells need to be drifted first? */
+      if (cell_is_active_black_holes(ci, e)) {
+        if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!");
+        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
+      }
+
+      DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj);
+    }
+
+  } /* otherwise, pair interaction. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_BH needs to be called depending
+ * on the optimisation level.
+ *
+ * @param r #runner
+ * @param c #cell c
+ *
+ */
+void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Anything to do here? */
+  if (c->black_holes.count == 0) return;
+
+  /* Anything to do here? */
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  /* Did we mess up the recursion? */
+  if (c->black_holes.h_max_old * kernel_gamma > c->dmin)
+    error("Cell smaller than smoothing length");
+
+  DOSELF1_BH(r, c, 1);
+}
+
+/**
+ * @brief Determine which version of DOPAIR1_BH needs to be called depending
+ * on the orientation of the cells or whether DOPAIR1_BH needs to be called
+ * at all.
+ *
+ * @param r #runner
+ * @param ci #cell ci
+ * @param cj #cell cj
+ *
+ */
+void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *restrict e = r->e;
+
+  const int ci_active = cell_is_active_black_holes(ci, e);
+  const int cj_active = cell_is_active_black_holes(cj, e);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_bh = ci->nodeID == e->nodeID;
+  const int do_cj_bh = cj->nodeID == e->nodeID;
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_bh = cj->nodeID == e->nodeID;
+  const int do_cj_bh = ci->nodeID == e->nodeID;
+#else
+  /* The swallow task is executed on both sides */
+  const int do_ci_bh = 1;
+  const int do_cj_bh = 1;
+#endif
+
+  const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 &&
+                     ci_active && do_ci_bh);
+  const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 &&
+                     cj_active && do_cj_bh);
+
+  /* Anything to do here? */
+  if (!do_ci && !do_cj) return;
+
+  /* Check that cells are drifted. */
+  if (do_ci &&
+      (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  if (do_cj &&
+      (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  /* No sorted intreactions here -> use the naive ones */
+  DOPAIR1_BH_NAIVE(r, ci, cj, 1);
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for pairs
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param gettimer Do we have a timer ?
+ *
+ * @todo Hard-code the sid on the recursive calls to avoid the
+ * redundant computations to find the sid on-the-fly.
+ */
+void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj,
+                    int gettimer) {
+
+  TIMER_TIC;
+
+  struct space *s = r->e->s;
+  const struct engine *e = r->e;
+
+  /* Should we even bother?
+   * In the swallow case we care about BH-BH and BH-gas
+   * interactions.
+   * In all other cases only BH-gas so we can abort if there is
+   * is no gas in the cell */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+  const int should_do_ci =
+      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
+  const int should_do_cj =
+      cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e);
+#else
+  const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 &&
+                           cell_is_active_black_holes(ci, e);
+  const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 &&
+                           cell_is_active_black_holes(cj, e);
+
+#endif
+
+  if (!should_do_ci && !should_do_cj) return;
+
+  /* Get the type of pair and flip ci/cj if needed. */
+  double shift[3];
+  const int sid = space_getsid(s, &ci, &cj, shift);
+
+  /* Recurse? */
+  if (cell_can_recurse_in_pair_black_holes_task(ci, cj) &&
+      cell_can_recurse_in_pair_black_holes_task(cj, ci)) {
+    struct cell_split_pair *csp = &cell_split_pairs[sid];
+    for (int k = 0; k < csp->count; k++) {
+      const int pid = csp->pairs[k].pid;
+      const int pjd = csp->pairs[k].pjd;
+      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
+        DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0);
+    }
+  }
+
+  /* Otherwise, compute the pair directly. */
+  else {
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+    const int do_ci_bh = ci->nodeID == e->nodeID;
+    const int do_cj_bh = cj->nodeID == e->nodeID;
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+    /* Here we are updating the hydro -> switch ci, cj */
+    const int do_ci_bh = cj->nodeID == e->nodeID;
+    const int do_cj_bh = ci->nodeID == e->nodeID;
+#else
+    /* Here we perform the task on both sides */
+    const int do_ci_bh = 1;
+    const int do_cj_bh = 1;
+#endif
+
+    const int do_ci = ci->black_holes.count != 0 &&
+                      cell_is_active_black_holes(ci, e) && do_ci_bh;
+    const int do_cj = cj->black_holes.count != 0 &&
+                      cell_is_active_black_holes(cj, e) && do_cj_bh;
+
+    if (do_ci) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (!cell_are_bpart_drifted(ci, e))
+        error("Interacting undrifted cells (bparts).");
+
+      if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e))
+        error("Interacting undrifted cells (parts).");
+    }
+
+    if (do_cj) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
+        error("Interacting undrifted cells (parts).");
+
+      if (!cell_are_bpart_drifted(cj, e))
+        error("Interacting undrifted cells (bparts).");
+    }
+
+    if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_PAIR_BH);
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) {
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank)
+    error("This function should not be called on foreign cells");
+#endif
+
+    /* Should we even bother?
+     * In the swallow case we care about BH-BH and BH-gas
+     * interactions.
+     * In all other cases only BH-gas so we can abort if there is
+     * is no gas in the cell */
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW)
+  const int should_do_ci =
+      ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e);
+#else
+  const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 &&
+                           cell_is_active_black_holes(ci, e);
+#endif
+
+  if (!should_do_ci) return;
+
+  /* Recurse? */
+  if (cell_can_recurse_in_self_black_holes_task(ci)) {
+
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF1_BH(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0);
+      }
+  }
+
+  /* Otherwise, compute self-interaction. */
+  else {
+
+    /* Check we did drift to the current time */
+    if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell.");
+
+    if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e))
+      error("Interacting undrifted cells (bparts).");
+
+    DOSELF1_BRANCH_BH(r, ci);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_SELF_BH);
+}
diff --git a/src/runner_doiact.h b/src/runner_doiact_functions_hydro.h
similarity index 96%
rename from src/runner_doiact.h
rename to src/runner_doiact_functions_hydro.h
index 8aabb05d177385c6bbee1a91eb2ea231ccbca3e4..c324c759b5acc9db75cf0849d0e417b2141978f4 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact_functions_hydro.h
@@ -24,106 +24,7 @@
    and runner_dosub_FUNCTION calling the pairwise interaction function
    runner_iact_FUNCTION. */
 
-#define PASTE(x, y) x##_##y
-
-#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
-#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)
-
-#define _DOPAIR1(f) PASTE(runner_dopair1, f)
-#define DOPAIR1 _DOPAIR1(FUNCTION)
-
-#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
-#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)
-
-#define _DOPAIR2(f) PASTE(runner_dopair2, f)
-#define DOPAIR2 _DOPAIR2(FUNCTION)
-
-#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
-#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
-
-#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f)
-#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION)
-
-#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
-#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)
-
-#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
-#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)
-
-#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
-#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)
-
-#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
-#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
-
-#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f)
-#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION)
-
-#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
-#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
-
-#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f)
-#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION)
-
-#define _DOSELF1(f) PASTE(runner_doself1, f)
-#define DOSELF1 _DOSELF1(FUNCTION)
-
-#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f)
-#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION)
-
-#define _DOSELF2(f) PASTE(runner_doself2, f)
-#define DOSELF2 _DOSELF2(FUNCTION)
-
-#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
-#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
-
-#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f)
-#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION)
-
-#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
-#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)
-
-#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
-#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)
-
-#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
-#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
-
-#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
-#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
-
-#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
-#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
-
-#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
-#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
-
-#define _IACT(f) PASTE(runner_iact, f)
-#define IACT _IACT(FUNCTION)
-
-#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
-#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
-
-#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
-#define IACT_VEC _IACT_VEC(FUNCTION)
-
-#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
-#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
-
-#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
-#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
-
-#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
-#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)
-
-#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
-#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
-
-#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
-#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
-
-#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
-#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
+#include "runner_doiact_hydro.h"
 
 /**
  * @brief Compute the interactions between a cell pair (non-symmetric case).
diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0d731857e9b4b0474e47c3ac3fca540eecb1cbb
--- /dev/null
+++ b/src/runner_doiact_functions_stars.h
@@ -0,0 +1,1332 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Before including this file, define FUNCTION, which is the
+   name of the interaction function. This creates the interaction functions
+   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
+   and runner_dosub_FUNCTION calling the pairwise interaction function
+   runner_iact_FUNCTION. */
+
+#include "runner_doiact_stars.h"
+
+/**
+ * @brief Calculate the number density of #part around the #spart
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (c->hydro.count == 0 || c->stars.count == 0) return;
+  if (!cell_is_active_stars(c, e)) return;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int scount = c->stars.count;
+  const int count = c->hydro.count;
+  struct spart *restrict sparts = c->stars.parts;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  /* Loop over the sparts in ci. */
+  for (int sid = 0; sid < scount; sid++) {
+
+    /* Get a hold of the ith spart in ci. */
+    struct spart *restrict si = &sparts[sid];
+
+    /* Skip inactive particles */
+    if (!spart_is_active(si, e)) continue;
+
+    /* Skip inactive particles */
+    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
+
+    const float hi = si->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float six[3] = {(float)(si->x[0] - c->loc[0]),
+                          (float)(si->x[1] - c->loc[1]),
+                          (float)(si->x[2] - c->loc[2])};
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts[pjd];
+      struct xpart *restrict xpj = &xparts[pjd];
+      const float hj = pj->h;
+
+      /* Early abort? */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
+                            (float)(pj->x[1] - c->loc[1]),
+                            (float)(pj->x[2] - c->loc[2])};
+      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                            ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                          ti_current);
+#endif
+      }
+    } /* loop over the parts in ci. */
+  }   /* loop over the sparts in ci. */
+
+  TIMER_TOC(TIMER_DOSELF_STARS);
+}
+
+/**
+ * @brief Calculate the number density of cj #part around the ci #spart
+ *
+ * @param r runner task
+ * @param ci The first #cell
+ * @param cj The second #cell
+ */
+void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
+                                 struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#else
+  if (cj->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+#endif
+
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Anything to do here? */
+  if (cj->hydro.count == 0 || ci->stars.count == 0) return;
+  if (!cell_is_active_stars(ci, e)) return;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int scount_i = ci->stars.count;
+  const int count_j = cj->hydro.count;
+  struct spart *restrict sparts_i = ci->stars.parts;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Loop over the sparts in ci. */
+  for (int sid = 0; sid < scount_i; sid++) {
+
+    /* Get a hold of the ith spart in ci. */
+    struct spart *restrict si = &sparts_i[sid];
+
+    /* Skip inactive particles */
+    if (!spart_is_active(si, e)) continue;
+
+    /* Skip inactive particles */
+    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
+
+    const float hi = si->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])),
+                          (float)(si->x[1] - (cj->loc[1] + shift[1])),
+                          (float)(si->x[2] - (cj->loc[2] + shift[2]))};
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+      const float hj = pj->h;
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
+                            (float)(pj->x[1] - cj->loc[1]),
+                            (float)(pj->x[2] - cj->loc[2])};
+      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                            ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
+                                          ti_current);
+#endif
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Compute the interactions between a cell pair.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param sid The direction of the pair.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
+                        const int sid, const double *shift) {
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  /* Get the cutoff shift. */
+  double rshift = 0.0;
+  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) &&
+                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
+  const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) &&
+                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
+#else
+  /* here we are updating the hydro -> switch ci, cj for local */
+  const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) &&
+                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
+  const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) &&
+                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
+#endif
+
+  if (do_ci_stars) {
+
+    /* Pick-out the sorted lists. */
+    const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
+    const struct sort_entry *restrict sort_i = ci->stars.sort[sid];
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Some constants used to checks that the parts are in the right frame */
+    const float shift_threshold_x =
+        2. * ci->width[0] +
+        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
+    const float shift_threshold_y =
+        2. * ci->width[1] +
+        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
+    const float shift_threshold_z =
+        2. * ci->width[2] +
+        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
+#endif /* SWIFT_DEBUG_CHECKS */
+
+    /* Get some other useful values. */
+    const double hi_max = ci->stars.h_max * kernel_gamma - rshift;
+    const int count_i = ci->stars.count;
+    const int count_j = cj->hydro.count;
+    struct spart *restrict sparts_i = ci->stars.parts;
+    struct part *restrict parts_j = cj->hydro.parts;
+    struct xpart *restrict xparts_j = cj->hydro.xparts;
+    const double dj_min = sort_j[0].d;
+    const float dx_max_rshift =
+        (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift;
+    const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort);
+
+    /* Loop over the sparts in ci. */
+    for (int pid = count_i - 1;
+         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
+
+      /* Get a hold of the ith part in ci. */
+      struct spart *restrict spi = &sparts_i[sort_i[pid].i];
+      const float hi = spi->h;
+
+      /* Skip inactive particles */
+      if (!spart_is_active(spi, e)) continue;
+
+      /* Skip inactive particles */
+      if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue;
+
+      /* Compute distance from the other cell. */
+      const double px[3] = {spi->x[0], spi->x[1], spi->x[2]};
+      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
+                   px[2] * runner_shift[sid][2];
+
+      /* Is there anything we need to interact with ? */
+      const double di = dist + hi * kernel_gamma + dx_max_rshift;
+      if (di < dj_min) continue;
+
+      /* Get some additional information about pi */
+      const float hig2 = hi * hi * kernel_gamma2;
+      const float pix = spi->x[0] - (cj->loc[0] + shift[0]);
+      const float piy = spi->x[1] - (cj->loc[1] + shift[1]);
+      const float piz = spi->x[2] - (cj->loc[2] + shift[2]);
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+
+        /* Recover pj */
+        struct part *pj = &parts_j[sort_j[pjd].i];
+        struct xpart *xpj = &xparts_j[sort_j[pjd].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        const float hj = pj->h;
+        const float pjx = pj->x[0] - cj->loc[0];
+        const float pjy = pj->x[1] - cj->loc[1];
+        const float pjz = pj->x[2] - cj->loc[2];
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles are in the correct frame after the shifts */
+        if (pix > shift_threshold_x || pix < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
+              pix, ci->width[0]);
+        if (piy > shift_threshold_y || piy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
+              piy, ci->width[1]);
+        if (piz > shift_threshold_z || piz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
+              piz, ci->width[2]);
+        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
+              pjx, ci->width[0]);
+        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
+              pjy, ci->width[1]);
+        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
+              pjz, ci->width[2]);
+
+        /* Check that particles have been drifted to the current time */
+        if (spi->ti_drift != e->ti_current)
+          error("Particle spi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hig2) {
+          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the parts in ci. */
+  }     /* do_ci_stars */
+
+  if (do_cj_stars) {
+    /* Pick-out the sorted lists. */
+    const struct sort_entry *restrict sort_i = ci->hydro.sort[sid];
+    const struct sort_entry *restrict sort_j = cj->stars.sort[sid];
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Some constants used to checks that the parts are in the right frame */
+    const float shift_threshold_x =
+        2. * ci->width[0] +
+        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
+    const float shift_threshold_y =
+        2. * ci->width[1] +
+        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
+    const float shift_threshold_z =
+        2. * ci->width[2] +
+        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
+#endif /* SWIFT_DEBUG_CHECKS */
+
+    /* Get some other useful values. */
+    const double hj_max = cj->hydro.h_max * kernel_gamma;
+    const int count_i = ci->hydro.count;
+    const int count_j = cj->stars.count;
+    struct part *restrict parts_i = ci->hydro.parts;
+    struct xpart *restrict xparts_i = ci->hydro.xparts;
+    struct spart *restrict sparts_j = cj->stars.parts;
+    const double di_max = sort_i[count_i - 1].d - rshift;
+    const float dx_max_rshift =
+        (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift;
+    const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort);
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
+         pjd++) {
+
+      /* Get a hold of the jth part in cj. */
+      struct spart *spj = &sparts_j[sort_j[pjd].i];
+      const float hj = spj->h;
+
+      /* Skip inactive particles */
+      if (!spart_is_active(spj, e)) continue;
+
+      /* Skip inactive particles */
+      if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue;
+
+      /* Compute distance from the other cell. */
+      const double px[3] = {spj->x[0], spj->x[1], spj->x[2]};
+      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
+                   px[2] * runner_shift[sid][2];
+
+      /* Is there anything we need to interact with ? */
+      const double dj = dist - hj * kernel_gamma - dx_max_rshift;
+      if (dj - rshift > di_max) continue;
+
+      /* Get some additional information about pj */
+      const float hjg2 = hj * hj * kernel_gamma2;
+      const float pjx = spj->x[0] - cj->loc[0];
+      const float pjy = spj->x[1] - cj->loc[1];
+      const float pjz = spj->x[2] - cj->loc[2];
+
+      /* Loop over the parts in ci. */
+      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
+
+        /* Recover pi */
+        struct part *pi = &parts_i[sort_i[pid].i];
+        struct xpart *xpi = &xparts_i[sort_i[pid].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pi, e)) continue;
+
+        const float hi = pi->h;
+        const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
+        const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
+        const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles are in the correct frame after the shifts */
+        if (pix > shift_threshold_x || pix < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
+              pix, ci->width[0]);
+        if (piy > shift_threshold_y || piy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
+              piy, ci->width[1]);
+        if (piz > shift_threshold_z || piz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
+              piz, ci->width[2]);
+        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
+          error(
+              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
+              pjx, ci->width[0]);
+        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
+          error(
+              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
+              pjy, ci->width[1]);
+        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
+          error(
+              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
+              pjz, ci->width[2]);
+
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (spj->ti_drift != e->ti_current)
+          error("Particle spj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hjg2) {
+
+          IACT_STARS(r2, dx, hj, hi, spj, pi, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in ci. */
+    }   /* loop over the parts in cj. */
+  }     /* Cell cj is active */
+
+  TIMER_TOC(TIMER_DOPAIR_STARS);
+}
+
+void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
+                         struct cell *restrict cj, int timer) {
+
+  TIMER_TIC;
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_stars = ci->nodeID == r->e->nodeID;
+  const int do_cj_stars = cj->nodeID == r->e->nodeID;
+#else
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_stars = cj->nodeID == r->e->nodeID;
+  const int do_cj_stars = ci->nodeID == r->e->nodeID;
+#endif
+  if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0)
+    DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj);
+  if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0)
+    DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci);
+
+  TIMER_TOC(TIMER_DOPAIR_STARS);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * Version using a brute-force algorithm.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ * @param cj The second #cell.
+ * @param sid The direction of the pair.
+ * @param flipped Flag to check whether the cells have been flipped or not.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
+                          struct spart *restrict sparts_i, int *restrict ind,
+                          int scount, struct cell *restrict cj, const int sid,
+                          const int flipped, const double *shift) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int count_j = cj->hydro.count;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Early abort? */
+  if (count_j == 0) return;
+
+  /* Pick-out the sorted lists. */
+  const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
+  const float dxj = cj->hydro.dx_max_sort;
+
+  /* Sparts are on the left? */
+  if (!flipped) {
+
+    /* Loop over the sparts_i. */
+    for (int pid = 0; pid < scount; pid++) {
+
+      /* Get a hold of the ith spart in ci. */
+      struct spart *restrict spi = &sparts_i[ind[pid]];
+      const double pix = spi->x[0] - (shift[0]);
+      const double piy = spi->x[1] - (shift[1]);
+      const double piz = spi->x[2] - (shift[2]);
+      const float hi = spi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
+                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
+
+      /* Loop over the parts in cj. */
+      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        const double pjx = pj->x[0];
+        const double pjy = pj->x[1];
+        const double pjz = pj->x[2];
+        const float hj = pj->h;
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
+                       (float)(piz - pjz)};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (spi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hig2) {
+          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the sparts in ci. */
+  }
+
+  /* Sparts are on the right. */
+  else {
+
+    /* Loop over the sparts_i. */
+    for (int pid = 0; pid < scount; pid++) {
+
+      /* Get a hold of the ith spart in ci. */
+      struct spart *restrict spi = &sparts_i[ind[pid]];
+      const double pix = spi->x[0] - (shift[0]);
+      const double piy = spi->x[1] - (shift[1]);
+      const double piz = spi->x[2] - (shift[2]);
+      const float hi = spi->h;
+      const float hig2 = hi * hi * kernel_gamma2;
+      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
+                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
+
+      /* Loop over the parts in cj. */
+      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
+
+        /* Get a pointer to the jth particle. */
+        struct part *restrict pj = &parts_j[sort_j[pjd].i];
+        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
+        const double pjx = pj->x[0];
+        const double pjy = pj->x[1];
+        const double pjz = pj->x[2];
+        const float hj = pj->h;
+
+        /* Compute the pairwise distance. */
+        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
+                       (float)(piz - pjz)};
+        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (spi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
+        /* Hit or miss? */
+        if (r2 < hig2) {
+          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
+                                              cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#endif
+        }
+      } /* loop over the parts in cj. */
+    }   /* loop over the sparts in ci. */
+  }
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * Version using a brute-force algorithm.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ * @param cj The second #cell.
+ * @param shift The shift vector to apply to the particles in ci.
+ */
+void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
+                                struct spart *restrict sparts_i,
+                                int *restrict ind, int scount,
+                                struct cell *restrict cj, const double *shift) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int count_j = cj->hydro.count;
+  struct part *restrict parts_j = cj->hydro.parts;
+  struct xpart *restrict xparts_j = cj->hydro.xparts;
+
+  /* Early abort? */
+  if (count_j == 0) return;
+
+  /* Loop over the parts_i. */
+  for (int pid = 0; pid < scount; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct spart *restrict spi = &sparts_i[ind[pid]];
+
+    const double pix = spi->x[0] - (shift[0]);
+    const double piy = spi->x[1] - (shift[1]);
+    const double piz = spi->x[2] - (shift[2]);
+    const float hi = spi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!spart_is_active(spi, e))
+      error("Trying to correct smoothing length of inactive particle !");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Skip inhibited particles */
+      if (part_is_inhibited(pj, e)) continue;
+
+      const double pjx = pj->x[0];
+      const double pjy = pj->x[1];
+      const double pjz = pj->x[2];
+      const float hj = pj->h;
+
+      /* Compute the pairwise distance. */
+      float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
+                     (float)(piz - pjz)};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                            ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
+                                          ti_current);
+#endif
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts The #spart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
+                          struct spart *restrict sparts, int *restrict ind,
+                          int scount) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank) error("Should be run on a different node");
+#endif
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const struct cosmology *cosmo = e->cosmology;
+
+  /* Cosmological terms */
+  const float a = cosmo->a;
+  const float H = cosmo->H;
+
+  const int count_i = ci->hydro.count;
+  struct part *restrict parts_j = ci->hydro.parts;
+  struct xpart *restrict xparts_j = ci->hydro.xparts;
+
+  /* Early abort? */
+  if (count_i == 0) return;
+
+  /* Loop over the parts in ci. */
+  for (int spid = 0; spid < scount; spid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct spart *spi = &sparts[ind[spid]];
+    const float spix[3] = {(float)(spi->x[0] - ci->loc[0]),
+                           (float)(spi->x[1] - ci->loc[1]),
+                           (float)(spi->x[2] - ci->loc[2])};
+    const float hi = spi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!spart_is_active(spi, e))
+      error("Inactive particle in subset function!");
+#endif
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_i; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      struct xpart *restrict xpj = &xparts_j[pjd];
+
+      /* Early abort? */
+      if (part_is_inhibited(pj, e)) continue;
+
+      /* Compute the pairwise distance. */
+      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
+                            (float)(pj->x[1] - ci->loc[1]),
+                            (float)(pj->x[2] - ci->loc[2])};
+      float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]};
+      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+        runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj,
+                                            cosmo, ti_current);
+#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
+        runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj,
+                                          cosmo, ti_current);
+#endif
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called
+ * depending on the optimisation level.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts The #spart to interact.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ */
+void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
+                                 struct spart *restrict sparts,
+                                 int *restrict ind, int scount) {
+
+  DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount);
+}
+
+/**
+ * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called
+ * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS
+ * needs to be called at all.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param sparts_i The #spart to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param scount The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
+                                 struct spart *restrict sparts_i,
+                                 int *restrict ind, int scount,
+                                 struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  /* Anything to do here? */
+  if (cj->hydro.count == 0) return;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
+  DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift);
+#else
+  /* Get the sorting index. */
+  int sid = 0;
+  for (int k = 0; k < 3; k++)
+    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
+                         ? 0
+                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
+
+  /* Switch the cells around? */
+  const int flipped = runner_flip[sid];
+  sid = sortlistID[sid];
+
+  /* Has the cell cj been sorted? */
+  if (!(cj->hydro.sorted & (1 << sid)) ||
+      cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)
+    error("Interacting unsorted cells.");
+
+  DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift);
+#endif
+}
+
+void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts,
+                        int *ind, int scount, struct cell *cj, int gettimer) {
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+
+  /* Should we even bother? */
+  if (!cell_is_active_stars(ci, e) &&
+      (cj == NULL || !cell_is_active_stars(cj, e)))
+    return;
+
+  /* Find out in which sub-cell of ci the parts are. */
+  struct cell *sub = NULL;
+  if (ci->split) {
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL) {
+        if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] &&
+            &sparts[ind[0]] <
+                &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) {
+          sub = ci->progeny[k];
+          break;
+        }
+      }
+    }
+  }
+
+  /* Is this a single cell? */
+  if (cj == NULL) {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_self_stars_task(ci)) {
+
+      /* Loop over all progeny. */
+      DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0);
+      for (int j = 0; j < 8; j++)
+        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
+          DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0);
+
+    }
+
+    /* Otherwise, compute self-interaction. */
+    else
+      DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount);
+  } /* self-interaction. */
+
+  /* Otherwise, it's a pair interaction. */
+  else {
+
+    /* Recurse? */
+    if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
+        cell_can_recurse_in_pair_stars_task(cj, ci)) {
+
+      /* Get the type of pair and flip ci/cj if needed. */
+      double shift[3] = {0.0, 0.0, 0.0};
+      const int sid = space_getsid(s, &ci, &cj, shift);
+
+      struct cell_split_pair *csp = &cell_split_pairs[sid];
+      for (int k = 0; k < csp->count; k++) {
+        const int pid = csp->pairs[k].pid;
+        const int pjd = csp->pairs[k].pjd;
+        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
+          DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount,
+                             cj->progeny[pjd], 0);
+        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
+          DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount,
+                             ci->progeny[pid], 0);
+      }
+    }
+
+    /* Otherwise, compute the pair directly. */
+    else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) {
+
+      /* Do any of the cells need to be drifted first? */
+      if (cell_is_active_stars(ci, e)) {
+        if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!");
+        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
+      }
+
+      DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj);
+    }
+
+  } /* otherwise, pair interaction. */
+}
+
+/**
+ * @brief Determine which version of DOSELF1_STARS needs to be called depending
+ * on the optimisation level.
+ *
+ * @param r #runner
+ * @param c #cell c
+ *
+ */
+void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Anything to do here? */
+  if (c->stars.count == 0) return;
+
+  /* Anything to do here? */
+  if (!cell_is_active_stars(c, e)) return;
+
+  /* Did we mess up the recursion? */
+  if (c->stars.h_max_old * kernel_gamma > c->dmin)
+    error("Cell smaller than smoothing length");
+
+  DOSELF1_STARS(r, c, 1);
+}
+
+#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid)                          \
+  ({                                                                        \
+    const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid];          \
+                                                                            \
+    for (int pjd = 0; pjd < cj->TYPE.count; pjd++) {                        \
+      const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i];                \
+      if (PART##_is_inhibited(p, e)) continue;                              \
+                                                                            \
+      const float d = p->x[0] * runner_shift[sid][0] +                      \
+                      p->x[1] * runner_shift[sid][1] +                      \
+                      p->x[2] * runner_shift[sid][2];                       \
+      if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
+              1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) &&           \
+          (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
+              cj->width[0] * 1.0e-10)                                       \
+        error(                                                              \
+            "particle shift diff exceeds dx_max_sort in cell cj. "          \
+            "cj->nodeID=%d "                                                \
+            "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE                \
+            ".dx_max_sort=%e "                                              \
+            "cj->" #TYPE                                                    \
+            ".dx_max_sort_old=%e, cellID=%i super->cellID=%i"               \
+            "cj->depth=%d cj->maxdepth=%d",                                 \
+            cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \
+            cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID,  \
+            cj->depth, cj->maxdepth);                                       \
+    }                                                                       \
+  })
+
+/**
+ * @brief Determine which version of DOPAIR1_STARS needs to be called depending
+ * on the orientation of the cells or whether DOPAIR1_STARS needs to be called
+ * at all.
+ *
+ * @param r #runner
+ * @param ci #cell ci
+ * @param cj #cell cj
+ *
+ */
+void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *restrict e = r->e;
+
+  /* Get the sort ID. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  const int sid = space_getsid(e->s, &ci, &cj, shift);
+
+  const int ci_active = cell_is_active_stars(ci, e);
+  const int cj_active = cell_is_active_stars(cj, e);
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+  const int do_ci_stars = ci->nodeID == e->nodeID;
+  const int do_cj_stars = cj->nodeID == e->nodeID;
+#else
+  /* here we are updating the hydro -> switch ci, cj */
+  const int do_ci_stars = cj->nodeID == e->nodeID;
+  const int do_cj_stars = ci->nodeID == e->nodeID;
+#endif
+  const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 &&
+                     ci_active && do_ci_stars);
+  const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 &&
+                     cj_active && do_cj_stars);
+
+  /* Anything to do here? */
+  if (!do_ci && !do_cj) return;
+
+  /* Check that cells are drifted. */
+  if (do_ci &&
+      (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  /* Have the cells been sorted? */
+  if (do_ci && (!(ci->stars.sorted & (1 << sid)) ||
+                ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin))
+    error("Interacting unsorted cells.");
+
+  if (do_ci && (!(cj->hydro.sorted & (1 << sid)) ||
+                cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin))
+    error("Interacting unsorted cells.");
+
+  if (do_cj &&
+      (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e)))
+    error("Interacting undrifted cells.");
+
+  /* Have the cells been sorted? */
+  if (do_cj && (!(ci->hydro.sorted & (1 << sid)) ||
+                ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin))
+    error("Interacting unsorted cells.");
+
+  if (do_cj && (!(cj->stars.sorted & (1 << sid)) ||
+                cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin))
+    error("Interacting unsorted cells.");
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (do_ci) {
+    // MATTHIEU: This test is faulty. To be fixed...
+    // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid);
+    RUNNER_CHECK_SORT(stars, spart, ci, cj, sid);
+  }
+
+  if (do_cj) {
+    // MATTHIEU: This test is faulty. To be fixed...
+    // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid);
+    RUNNER_CHECK_SORT(stars, spart, cj, ci, sid);
+  }
+#endif /* SWIFT_DEBUG_CHECKS */
+
+#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
+  DOPAIR1_STARS_NAIVE(r, ci, cj, 1);
+#else
+  DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift);
+#endif
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for pairs
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param gettimer Do we have a timer ?
+ *
+ * @todo Hard-code the sid on the recursive calls to avoid the
+ * redundant computations to find the sid on-the-fly.
+ */
+void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
+                       int gettimer) {
+
+  TIMER_TIC;
+
+  struct space *s = r->e->s;
+  const struct engine *e = r->e;
+
+  /* Should we even bother? */
+  const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
+                           cell_is_active_stars(ci, e);
+  const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
+                           cell_is_active_stars(cj, e);
+  if (!should_do_ci && !should_do_cj) return;
+
+  /* Get the type of pair and flip ci/cj if needed. */
+  double shift[3];
+  const int sid = space_getsid(s, &ci, &cj, shift);
+
+  /* Recurse? */
+  if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
+      cell_can_recurse_in_pair_stars_task(cj, ci)) {
+    struct cell_split_pair *csp = &cell_split_pairs[sid];
+    for (int k = 0; k < csp->count; k++) {
+      const int pid = csp->pairs[k].pid;
+      const int pjd = csp->pairs[k].pjd;
+      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
+        DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0);
+    }
+  }
+
+  /* Otherwise, compute the pair directly. */
+  else {
+
+#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
+    const int do_ci_stars = ci->nodeID == e->nodeID;
+    const int do_cj_stars = cj->nodeID == e->nodeID;
+#else
+    /* here we are updating the hydro -> switch ci, cj */
+    const int do_ci_stars = cj->nodeID == e->nodeID;
+    const int do_cj_stars = ci->nodeID == e->nodeID;
+#endif
+    const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
+                      cell_is_active_stars(ci, e) && do_ci_stars;
+    const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
+                      cell_is_active_stars(cj, e) && do_cj_stars;
+
+    if (do_ci) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (!cell_are_spart_drifted(ci, e))
+        error("Interacting undrifted cells (sparts).");
+
+      if (!cell_are_part_drifted(cj, e))
+        error("Interacting undrifted cells (parts).");
+
+      /* Do any of the cells need to be sorted first? */
+      if (!(ci->stars.sorted & (1 << sid)) ||
+          ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) {
+        error("Interacting unsorted cell (sparts).");
+      }
+
+      if (!(cj->hydro.sorted & (1 << sid)) ||
+          cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx)
+        error("Interacting unsorted cell (parts). %i", cj->nodeID);
+    }
+
+    if (do_cj) {
+
+      /* Make sure both cells are drifted to the current timestep. */
+      if (!cell_are_part_drifted(ci, e))
+        error("Interacting undrifted cells (parts).");
+
+      if (!cell_are_spart_drifted(cj, e))
+        error("Interacting undrifted cells (sparts).");
+
+      /* Do any of the cells need to be sorted first? */
+      if (!(ci->hydro.sorted & (1 << sid)) ||
+          ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) {
+        error("Interacting unsorted cell (parts).");
+      }
+
+      if (!(cj->stars.sorted & (1 << sid)) ||
+          cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) {
+        error("Interacting unsorted cell (sparts).");
+      }
+    }
+
+    if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_PAIR_STARS);
+}
+
+/**
+ * @brief Compute grouped sub-cell interactions for self tasks
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param gettimer Do we have a timer ?
+ */
+void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) {
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci->nodeID != engine_rank)
+    error("This function should not be called on foreign cells");
+#endif
+
+  /* Should we even bother? */
+  if (ci->hydro.count == 0 || ci->stars.count == 0 ||
+      !cell_is_active_stars(ci, r->e))
+    return;
+
+  /* Recurse? */
+  if (cell_can_recurse_in_self_stars_task(ci)) {
+
+    /* Loop over all progeny. */
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL) {
+        DOSUB_SELF1_STARS(r, ci->progeny[k], 0);
+        for (int j = k + 1; j < 8; j++)
+          if (ci->progeny[j] != NULL)
+            DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0);
+      }
+  }
+
+  /* Otherwise, compute self-interaction. */
+  else {
+
+    /* Drift the cell to the current timestep if needed. */
+    if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell.");
+
+    DOSELF1_BRANCH_STARS(r, ci);
+  }
+
+  TIMER_TOC(TIMER_DOSUB_SELF_STARS);
+}
diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c
new file mode 100644
index 0000000000000000000000000000000000000000..d4b71b7e94ad1d5731cd81747e296a0aed05e520
--- /dev/null
+++ b/src/runner_doiact_grav.c
@@ -0,0 +1,1824 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include "../config.h"
+
+/* This object's header. */
+#include "runner_doiact_grav.h"
+
+/* Local includes. */
+#include "active.h"
+#include "cell.h"
+#include "gravity.h"
+#include "gravity_cache.h"
+#include "gravity_iact.h"
+#include "inline.h"
+#include "part.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/**
+ * @brief Recursively propagate the multipoles down the tree by applying the
+ * L2L and L2P kernels.
+ *
+ * @param r The #runner.
+ * @param c The #cell we are working on.
+ * @param timer Are we timing this ?
+ */
+void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->grav.ti_old_multipole != e->ti_current)
+    error("c->multipole not drifted.");
+  if (c->grav.multipole->pot.ti_init != e->ti_current)
+    error("c->field tensor not initialised");
+#endif
+
+  if (c->split) {
+
+    /* Node case */
+
+    /* Add the field-tensor to all the 8 progenitors */
+    for (int k = 0; k < 8; ++k) {
+      struct cell *cp = c->progeny[k];
+
+      /* Do we have a progenitor with any active g-particles ? */
+      if (cp != NULL && cell_is_active_gravity(cp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (cp->grav.ti_old_multipole != e->ti_current)
+          error("cp->multipole not drifted.");
+        if (cp->grav.multipole->pot.ti_init != e->ti_current)
+          error("cp->field tensor not initialised");
+#endif
+        /* If the tensor received any contribution, push it down */
+        if (c->grav.multipole->pot.interacted) {
+
+          struct grav_tensor shifted_tensor;
+
+          /* Shift the field tensor */
+          gravity_L2L(&shifted_tensor, &c->grav.multipole->pot,
+                      cp->grav.multipole->CoM, c->grav.multipole->CoM);
+
+          /* Add it to this level's tensor */
+          gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor);
+        }
+
+        /* Recurse */
+        runner_do_grav_down(r, cp, 0);
+      }
+    }
+
+  } else {
+
+    /* Leaf case */
+
+    /* We can abort early if no interactions via multipole happened */
+    if (!c->grav.multipole->pot.interacted) return;
+
+    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
+
+    /* Cell properties */
+    struct gpart *gparts = c->grav.parts;
+    const int gcount = c->grav.count;
+    const struct grav_tensor *pot = &c->grav.multipole->pot;
+    const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1],
+                           c->grav.multipole->CoM[2]};
+
+    /* Apply accelerations to the particles */
+    for (int i = 0; i < gcount; ++i) {
+
+      /* Get a handle on the gpart */
+      struct gpart *gp = &gparts[i];
+
+      /* Update if active */
+      if (gpart_is_active(gp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (gp->ti_drift != e->ti_current)
+          error("gpart not drifted to current time");
+        if (c->grav.multipole->pot.ti_init != e->ti_current)
+          error("c->field tensor not initialised");
+
+        /* Check that we are not updated an inhibited particle */
+        if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!");
+
+        /* Check that the particle was initialised */
+        if (gp->initialised == 0)
+          error("Adding forces to an un-initialised gpart.");
+#endif
+        /* Apply the kernel */
+        gravity_L2P(pot, CoM, gp);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_dograv_down);
+}
+
+/**
+ * @brief Compute the non-truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param cj_cache #gravity_cache contaning the source particles.
+ * @param gcount_i The number of particles in the cell i.
+ * @param gcount_padded_j The number of particles in the cell j padded to the
+ * vector length.
+ * @param periodic Is the calculation using periodic BCs ?
+ * @param dim The size of the simulation volume.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gparts_j The #gpart in cell j (for debugging checks only).
+ * @param gcount_j The number of particles in the cell j (for debugging checks
+ * only).
+ */
+static INLINE void runner_dopair_grav_pp_full(
+    struct gravity_cache *restrict ci_cache,
+    struct gravity_cache *restrict cj_cache, const int gcount_i,
+    const int gcount_j, const int gcount_padded_j, const int periodic,
+    const float dim[3], const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    /* Skip particle that can use the multipole */
+    if (ci_cache->use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!gpart_is_active(&gparts_i[pid], e))
+      error("Inactive particle went through the cache");
+#endif
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded_j, VEC_SIZE);
+
+    /* Loop over every particle in the other cell. */
+    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+      /* Get info about j */
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      const float mass_j = cj_cache->m[pjd];
+      const float h_j = cj_cache->epsilon[pjd];
+
+      /* Compute the pairwise distance. */
+      float dx = x_j - x_i;
+      float dy = y_j - y_i;
+      float dz = z_j - z_i;
+
+      /* Correct for periodic BCs */
+      if (periodic) {
+        dx = nearestf(dx, dim[0]);
+        dy = nearestf(dy, dim[1]);
+        dz = nearestf(dz, dim[2]);
+      }
+
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts_i[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts_j[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts_i[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
+          mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts_i[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
+        gparts_i[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Compute the truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * This function only makes sense in periodic BCs.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param cj_cache #gravity_cache contaning the source particles.
+ * @param gcount_i The number of particles in the cell i.
+ * @param gcount_padded_j The number of particles in the cell j padded to the
+ * vector length.
+ * @param dim The size of the simulation volume.
+ * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gparts_j The #gpart in cell j (for debugging checks only).
+ * @param gcount_j The number of particles in the cell j (for debugging checks
+ * only).
+ */
+static INLINE void runner_dopair_grav_pp_truncated(
+    struct gravity_cache *restrict ci_cache,
+    struct gravity_cache *restrict cj_cache, const int gcount_i,
+    const int gcount_j, const int gcount_padded_j, const float dim[3],
+    const float r_s_inv, const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+#endif
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    /* Skip particle that can use the multipole */
+    if (ci_cache->use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (!gpart_is_active(&gparts_i[pid], e))
+      error("Inactive particle went through the cache");
+#endif
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded_j, VEC_SIZE);
+
+    /* Loop over every particle in the other cell. */
+    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+      /* Get info about j */
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      const float mass_j = cj_cache->m[pjd];
+      const float h_j = cj_cache->epsilon[pjd];
+
+      /* Compute the pairwise distance. */
+      float dx = x_j - x_i;
+      float dy = y_j - y_i;
+      float dz = z_j - z_i;
+
+      /* Correct for periodic BCs */
+      dx = nearestf(dx, dim[0]);
+      dy = nearestf(dy, dim[1]);
+      dz = nearestf(dz, dim[2]);
+
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts_i[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts_j[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts_i[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
+          mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts_i[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
+                                    &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
+        gparts_i[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Compute the gravity interactions between all particles
+ * of a cell and the multipole of the other cell.
+ *
+ * The calculation is performedusing the pre-filled
+ * #gravity_cache structure. The loop over the i cache should auto-vectorize.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount_padded_i The number of particles in the cell i padded to the
+ * vector length.
+ * @param CoM_j Position of the #multipole in #cell j.
+ * @param multi_j The #multipole in #cell j.
+ * @param periodic Is the calculation using periodic BCs ?
+ * @param dim The size of the simulation volume.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gcount_i The number of particles in the cell i (for debugging checks
+ * only).
+ * @param cj The #cell j (for debugging checks only).
+ */
+static INLINE void runner_dopair_grav_pm_full(
+    struct gravity_cache *ci_cache, const int gcount_padded_i,
+    const float CoM_j[3], const struct multipole *restrict multi_j,
+    const int periodic, const float dim[3], const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const int gcount_i,
+    const struct cell *restrict cj) {
+
+  /* Make the compiler understand we are in happy vectorization land */
+  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, ci_cache->active,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_assume_size(gcount_padded_i, VEC_SIZE);
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_padded_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!active[pid]) continue;
+
+    /* Skip particle that cannot use the multipole */
+    if (!use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
+
+    /* Check that particles have been drifted to the current time */
+    if (gparts_i[pid].ti_drift != e->ti_current)
+      error("gpi not drifted to current time");
+
+    /* Check that we are not updated an inhibited particle */
+    if (gpart_is_inhibited(&gparts_i[pid], e))
+      error("Updating an inhibited particle!");
+
+    /* Check that the particle was initialised */
+    if (gparts_i[pid].initialised == 0)
+      error("Adding forces to an un-initialised gpart.");
+
+    if (pid >= gcount_i) error("Adding forces to padded particle");
+#endif
+
+    const float x_i = x[pid];
+    const float y_i = y[pid];
+    const float z_i = z[pid];
+
+    /* Some powers of the softening length */
+    const float h_i = epsilon[pid];
+    const float h_inv_i = 1.f / h_i;
+
+    /* Distance to the Multipole */
+    float dx = CoM_j[0] - x_i;
+    float dy = CoM_j[1] - y_i;
+    float dz = CoM_j[2] - z_i;
+
+    /* Apply periodic BCs? */
+    if (periodic) {
+      dx = nearestf(dx, dim[0]);
+      dy = nearestf(dy, dim[1]);
+      dz = nearestf(dz, dim[2]);
+    }
+
+    const float r2 = dx * dx + dy * dy + dz * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    const float r_max_j = cj->grav.multipole->r_max;
+    const float r_max2 = r_max_j * r_max_j;
+    const float theta_crit2 = e->gravity_properties->theta_crit2;
+
+    /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */
+    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
+      error(
+          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
+          "%e], rmax=%e r=%e epsilon=%e",
+          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i);
+#endif
+
+    /* Interact! */
+    float f_x, f_y, f_z, pot_ij;
+    runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
+                             &f_z, &pot_ij);
+
+    /* Store it back */
+    a_x[pid] += f_x;
+    a_y[pid] += f_y;
+    a_z[pid] += f_z;
+    pot[pid] += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Update the interaction counter */
+    if (pid < gcount_i)
+      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+#endif
+  }
+}
+
+/**
+ * @brief Compute the gravity interactions between all particles
+ * of a cell and the multipole of the other cell.
+ *
+ * The calculation is performedusing the pre-filled
+ * #gravity_cache structure. The loop over the i cache should auto-vectorize.
+ *
+ * This function only makes sense in periodic BCs.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount_padded_i The number of particles in the cell i padded to the
+ * vector length.
+ * @param CoM_j Position of the #multipole in #cell j.
+ * @param multi_j The #multipole in #cell j.
+ * @param dim The size of the simulation volume.
+ * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts_i The #gpart in cell i (for debugging checks only).
+ * @param gcount_i The number of particles in the cell i (for debugging checks
+ * only).
+ * @param cj The #cell j (for debugging checks only).
+ */
+static INLINE void runner_dopair_grav_pm_truncated(
+    struct gravity_cache *ci_cache, const int gcount_padded_i,
+    const float CoM_j[3], const struct multipole *restrict multi_j,
+    const float dim[3], const float r_s_inv, const struct engine *restrict e,
+    struct gpart *restrict gparts_i, const int gcount_i,
+    const struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+#endif
+
+  /* Make the compiler understand we are in happy vectorization land */
+  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, ci_cache->active,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_assume_size(gcount_padded_i, VEC_SIZE);
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount_padded_i; pid++) {
+
+    /* Skip inactive particles */
+    if (!active[pid]) continue;
+
+    /* Skip particle that cannot use the multipole */
+    if (!use_mpole[pid]) continue;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
+      error("Active particle went through the cache");
+
+    /* Check that particles have been drifted to the current time */
+    if (gparts_i[pid].ti_drift != e->ti_current)
+      error("gpi not drifted to current time");
+
+    /* Check that we are not updated an inhibited particle */
+    if (gpart_is_inhibited(&gparts_i[pid], e))
+      error("Updating an inhibited particle!");
+
+    /* Check that the particle was initialised */
+    if (gparts_i[pid].initialised == 0)
+      error("Adding forces to an un-initialised gpart.");
+
+    if (pid >= gcount_i) error("Adding forces to padded particle");
+#endif
+
+    const float x_i = x[pid];
+    const float y_i = y[pid];
+    const float z_i = z[pid];
+
+    /* Some powers of the softening length */
+    const float h_i = epsilon[pid];
+    const float h_inv_i = 1.f / h_i;
+
+    /* Distance to the Multipole */
+    float dx = CoM_j[0] - x_i;
+    float dy = CoM_j[1] - y_i;
+    float dz = CoM_j[2] - z_i;
+
+    /* Apply periodic BCs */
+    dx = nearestf(dx, dim[0]);
+    dy = nearestf(dy, dim[1]);
+    dz = nearestf(dz, dim[2]);
+
+    const float r2 = dx * dx + dy * dy + dz * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    const float r_max_j = cj->grav.multipole->r_max;
+    const float r_max2 = r_max_j * r_max_j;
+    const float theta_crit2 = e->gravity_properties->theta_crit2;
+
+    /* 0.99 and 1.1 to avoid FP rounding false-positives */
+    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
+      error(
+          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
+          "%e], rmax=%e",
+          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j);
+#endif
+
+    /* Interact! */
+    float f_x, f_y, f_z, pot_ij;
+    runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv,
+                                  multi_j, &f_x, &f_y, &f_z, &pot_ij);
+
+    /* Store it back */
+    a_x[pid] += f_x;
+    a_y[pid] += f_y;
+    a_z[pid] += f_z;
+    pot[pid] += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Update the interaction counter */
+    if (pid < gcount_i)
+      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+#endif
+  }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * particles of another cell.
+ *
+ * This function switches between the full potential and the truncated one
+ * depending on needs. It will also use the M2P (multipole) interaction
+ * for the subset of particles in either cell for which the distance criterion
+ * is valid.
+ *
+ * This function starts by constructing the require #gravity_cache for both
+ * cells and then call the specialised functions doing the actual work on
+ * the caches. It then write the data back to the particles.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The other #cell.
+ * @param symmetric Are we updating both cells (1) or just ci (0) ?
+ * @param allow_mpole Are we allowing the use of P2M interactions ?
+ */
+void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
+                           const int symmetric, const int allow_mpole) {
+
+  /* Recover some useful constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
+                        (float)e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+  const double min_trunc = e->mesh->r_cut_min;
+
+  TIMER_TIC;
+
+  /* Record activity status */
+  const int ci_active =
+      cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID);
+  const int cj_active =
+      cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID);
+
+  /* Anything to do here? */
+  if (!ci_active && !cj_active) return;
+  if (!ci_active && !symmetric) return;
+
+  /* Check that we are not doing something stupid */
+  if (ci->split || cj->split) error("Running P-P on splitable cells");
+
+  /* Let's start by checking things are drifted */
+  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
+  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");
+  if (cj_active && ci->grav.ti_old_multipole != e->ti_current)
+    error("Un-drifted multipole");
+  if (ci_active && cj->grav.ti_old_multipole != e->ti_current)
+    error("Un-drifted multipole");
+
+  /* Caches to play with */
+  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
+
+  /* Shift to apply to the particles in each cell */
+  const double shift_i[3] = {0., 0., 0.};
+  const double shift_j[3] = {0., 0., 0.};
+
+  /* Recover the multipole info and shift the CoM locations */
+  const float rmax_i = ci->grav.multipole->r_max;
+  const float rmax_j = cj->grav.multipole->r_max;
+  const float rmax2_i = rmax_i * rmax_i;
+  const float rmax2_j = rmax_j * rmax_j;
+  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
+  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+  const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]),
+                          (float)(ci->grav.multipole->CoM[1] - shift_i[1]),
+                          (float)(ci->grav.multipole->CoM[2] - shift_i[2])};
+  const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]),
+                          (float)(cj->grav.multipole->CoM[1] - shift_j[1]),
+                          (float)(cj->grav.multipole->CoM[2] - shift_j[2])};
+
+  /* Start by constructing particle caches */
+
+  /* Computed the padded counts */
+  const int gcount_i = ci->grav.count;
+  const int gcount_j = cj->grav.count;
+  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we fit in cache */
+  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
+    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
+          gcount_j);
+#endif
+
+  /* Fill the caches */
+  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
+                         ci_cache, ci->grav.parts, gcount_i, gcount_padded_i,
+                         shift_i, CoM_j, rmax2_j, ci, e->gravity_properties);
+  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
+                         cj_cache, cj->grav.parts, gcount_j, gcount_padded_j,
+                         shift_j, CoM_i, rmax2_i, cj, e->gravity_properties);
+
+  /* Can we use the Newtonian version or do we need the truncated one ? */
+  if (!periodic) {
+
+    /* Not periodic -> Can always use Newtonian potential */
+
+    /* Let's updated the active cell(s) only */
+    if (ci_active) {
+
+      /* First the P2P */
+      runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
+                                 gcount_padded_j, periodic, dim, e,
+                                 ci->grav.parts, cj->grav.parts);
+
+      /* Then the M2P */
+      if (allow_mpole)
+        runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                   periodic, dim, e, ci->grav.parts, gcount_i,
+                                   cj);
+    }
+    if (cj_active && symmetric) {
+
+      /* First the P2P */
+      runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
+                                 gcount_padded_i, periodic, dim, e,
+                                 cj->grav.parts, ci->grav.parts);
+
+      /* Then the M2P */
+      if (allow_mpole)
+        runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
+                                   periodic, dim, e, cj->grav.parts, gcount_j,
+                                   ci);
+    }
+
+  } else { /* Periodic BC */
+
+    /* Get the relative distance between the CoMs */
+    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
+                          CoM_j[2] - CoM_i[2]};
+    const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+    /* Get the maximal distance between any two particles */
+    const double max_r = sqrt(r2) + rmax_i + rmax_j;
+
+    /* Do we need to use the truncated interactions ? */
+    if (max_r > min_trunc) {
+
+      /* Periodic but far-away cells must use the truncated potential */
+
+      /* Let's updated the active cell(s) only */
+      if (ci_active) {
+
+        /* First the (truncated) P2P */
+        runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j,
+                                        gcount_padded_j, dim, r_s_inv, e,
+                                        ci->grav.parts, cj->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j,
+                                          multi_j, dim, r_s_inv, e,
+                                          ci->grav.parts, gcount_i, cj);
+      }
+      if (cj_active && symmetric) {
+
+        /* First the (truncated) P2P */
+        runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i,
+                                        gcount_padded_i, dim, r_s_inv, e,
+                                        cj->grav.parts, ci->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i,
+                                          multi_i, dim, r_s_inv, e,
+                                          cj->grav.parts, gcount_j, ci);
+      }
+
+    } else {
+
+      /* Periodic but close-by cells can use the full Newtonian potential */
+
+      /* Let's updated the active cell(s) only */
+      if (ci_active) {
+
+        /* First the (Newtonian) P2P */
+        runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
+                                   gcount_padded_j, periodic, dim, e,
+                                   ci->grav.parts, cj->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                     periodic, dim, e, ci->grav.parts, gcount_i,
+                                     cj);
+      }
+      if (cj_active && symmetric) {
+
+        /* First the (Newtonian) P2P */
+        runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
+                                   gcount_padded_i, periodic, dim, e,
+                                   cj->grav.parts, ci->grav.parts);
+
+        /* Then the M2P */
+        if (allow_mpole)
+          runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
+                                     periodic, dim, e, cj->grav.parts, gcount_j,
+                                     ci);
+      }
+    }
+  }
+
+  /* Write back to the particles */
+  if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
+  if (cj_active && symmetric)
+    gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j);
+
+  TIMER_TOC(timer_dopair_grav_pp);
+}
+
+/**
+ * @brief Compute the non-truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount The number of particles in the cell.
+ * @param gcount_padded The number of particles in the cell padded to the
+ * vector length.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts The #gpart in the cell (for debugging checks only).
+ */
+static INLINE void runner_doself_grav_pp_full(
+    struct gravity_cache *restrict ci_cache, const int gcount,
+    const int gcount_padded, const struct engine *e, struct gpart *gparts) {
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded, VEC_SIZE);
+
+    /* Loop over every other particle in the cell. */
+    for (int pjd = 0; pjd < gcount_padded; pjd++) {
+
+      /* No self interaction */
+      if (pid == pjd) continue;
+
+      /* Get info about j */
+      const float x_j = ci_cache->x[pjd];
+      const float y_j = ci_cache->y[pjd];
+      const float z_j = ci_cache->z[pjd];
+      const float mass_j = ci_cache->m[pjd];
+      const float h_j = ci_cache->epsilon[pjd];
+
+      /* Compute the pairwise (square) distance. */
+      /* Note: no need for periodic wrapping inside a cell */
+      const float dx = x_j - x_i;
+      const float dy = y_j - y_i;
+      const float dz = z_j - z_i;
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
+        gparts[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Compute the truncated gravity interactions between all particles
+ * of a cell and the particles of the other cell.
+ *
+ * The calculation is performed non-symmetrically using the pre-filled
+ * #gravity_cache structures. The loop over the j cache should auto-vectorize.
+ *
+ * This function only makes sense in periodic BCs.
+ *
+ * @param ci_cache #gravity_cache contaning the particles to be updated.
+ * @param gcount The number of particles in the cell.
+ * @param gcount_padded The number of particles in the cell padded to the
+ * vector length.
+ * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
+ *
+ * @param e The #engine (for debugging checks only).
+ * @param gparts The #gpart in the cell (for debugging checks only).
+ */
+static INLINE void runner_doself_grav_pp_truncated(
+    struct gravity_cache *restrict ci_cache, const int gcount,
+    const int gcount_padded, const float r_s_inv, const struct engine *e,
+    struct gpart *gparts) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic)
+    error("Calling truncated PP function in non-periodic setup.");
+#endif
+
+  /* Loop over all particles in ci... */
+  for (int pid = 0; pid < gcount; pid++) {
+
+    /* Skip inactive particles */
+    if (!ci_cache->active[pid]) continue;
+
+    const float x_i = ci_cache->x[pid];
+    const float y_i = ci_cache->y[pid];
+    const float z_i = ci_cache->z[pid];
+    const float h_i = ci_cache->epsilon[pid];
+
+    /* Local accumulators for the acceleration and potential */
+    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
+
+    /* Make the compiler understand we are in happy vectorization land */
+    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
+    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
+    swift_assume_size(gcount_padded, VEC_SIZE);
+
+    /* Loop over every other particle in the cell. */
+    for (int pjd = 0; pjd < gcount_padded; pjd++) {
+
+      /* No self interaction */
+      if (pid == pjd) continue;
+
+      /* Get info about j */
+      const float x_j = ci_cache->x[pjd];
+      const float y_j = ci_cache->y[pjd];
+      const float z_j = ci_cache->z[pjd];
+      const float mass_j = ci_cache->m[pjd];
+      const float h_j = ci_cache->epsilon[pjd];
+
+      /* Compute the pairwise (square) distance. */
+      /* Note: no need for periodic wrapping inside a cell */
+      const float dx = x_j - x_i;
+      const float dy = y_j - y_i;
+      const float dz = z_j - z_i;
+
+      const float r2 = dx * dx + dy * dy + dz * dz;
+
+      /* Pick the maximal softening length of i and j */
+      const float h = max(h_i, h_j);
+      const float h2 = h * h;
+      const float h_inv = 1.f / h;
+      const float h_inv_3 = h_inv * h_inv * h_inv;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (r2 == 0.f && h2 == 0.)
+        error("Interacting particles with 0 distance and 0 softening.");
+
+      /* Check that particles have been drifted to the current time */
+      if (gparts[pid].ti_drift != e->ti_current)
+        error("gpi not drifted to current time");
+      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
+          !gpart_is_inhibited(&gparts[pjd], e))
+        error("gpj not drifted to current time");
+
+      /* Check that we are not updated an inhibited particle */
+      if (gpart_is_inhibited(&gparts[pid], e))
+        error("Updating an inhibited particle!");
+
+      /* Check that the particle we interact with was not inhibited */
+      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
+        error("Inhibited particle used as gravity source.");
+
+      /* Check that the particle was initialised */
+      if (gparts[pid].initialised == 0)
+        error("Adding forces to an un-initialised gpart.");
+#endif
+
+      /* Interact! */
+      float f_ij, pot_ij;
+      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
+                                    &f_ij, &pot_ij);
+
+      /* Store it back */
+      a_x += f_ij * dx;
+      a_y += f_ij * dy;
+      a_z += f_ij * dz;
+      pot += pot_ij;
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Update the interaction counter if it's not a padded gpart */
+      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
+        gparts[pid].num_interacted++;
+#endif
+    }
+
+    /* Store everything back in cache */
+    ci_cache->a_x[pid] += a_x;
+    ci_cache->a_y[pid] += a_y;
+    ci_cache->a_z[pid] += a_z;
+    ci_cache->pot[pid] += pot;
+  }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * other ones.
+ *
+ * This function switches between the full potential and the truncated one
+ * depending on needs.
+ *
+ * This function starts by constructing the require #gravity_cache for the
+ * cell and then call the specialised functions doing the actual work on
+ * the cache. It then write the data back to the particles.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
+void runner_doself_grav_pp(struct runner *r, struct cell *c) {
+
+  /* Recover some useful constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float r_s_inv = e->mesh->r_s_inv;
+  const double min_trunc = e->mesh->r_cut_min;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
+#endif
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Check that we are not doing something stupid */
+  if (c->split) error("Running P-P on a splitable cell");
+
+  /* Do we need to start by drifting things ? */
+  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
+
+  /* Start by constructing a cache for the particles */
+  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+
+  /* Shift to apply to the particles in the cell */
+  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
+                         c->loc[1] + 0.5 * c->width[1],
+                         c->loc[2] + 0.5 * c->width[2]};
+
+  /* Computed the padded counts */
+  const int gcount = c->grav.count;
+  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we fit in cache */
+  if (gcount > ci_cache->count)
+    error("Not enough space in the cache! gcount=%d", gcount);
+#endif
+
+  /* Fill the cache */
+  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts,
+                                  gcount, gcount_padded, loc, c,
+                                  e->gravity_properties);
+
+  /* Can we use the Newtonian version or do we need the truncated one ? */
+  if (!periodic) {
+
+    /* Not periodic -> Can always use Newtonian potential */
+    runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
+                               c->grav.parts);
+
+  } else {
+
+    /* Get the maximal distance between any two particles */
+    const double max_r = 2. * c->grav.multipole->r_max;
+
+    /* Do we need to use the truncated interactions ? */
+    if (max_r > min_trunc) {
+
+      /* Periodic but far-away cells must use the truncated potential */
+      runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv,
+                                      e, c->grav.parts);
+
+    } else {
+
+      /* Periodic but close-by cells can use the full Newtonian potential */
+      runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
+                                 c->grav.parts);
+    }
+  }
+
+  /* Write back to the particles */
+  gravity_cache_write_back(ci_cache, c->grav.parts, gcount);
+
+  TIMER_TOC(timer_doself_grav_pp);
+}
+
+/**
+ * @brief Computes the interaction of the field tensor and multipole
+ * of two cells symmetrically.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
+                                                   struct cell *restrict ci,
+                                                   struct cell *restrict cj) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const struct gravity_props *props = e->gravity_properties;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) ||
+      (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank))
+    error("Invalid state in symmetric M-M calculation!");
+
+  /* Short-cut to the multipole */
+  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
+  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == cj) error("Interacting a cell with itself using M2L");
+
+  if (multi_i->num_gpart == 0)
+    error("Multipole i does not seem to have been set.");
+
+  if (multi_j->num_gpart == 0)
+    error("Multipole j does not seem to have been set.");
+
+  if (ci->grav.multipole->pot.ti_init != e->ti_current)
+    error("ci->grav tensor not initialised.");
+
+  if (ci->grav.multipole->pot.ti_init != e->ti_current)
+    error("cj->grav tensor not initialised.");
+
+  if (ci->grav.ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d "
+        "cj->nodeID=%d e->ti_current=%lld",
+        ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current);
+
+  if (cj->grav.ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
+        "ci->nodeID=%d e->ti_current=%lld",
+        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
+#endif
+
+  /* Let's interact at this level */
+  gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot,
+                        multi_i, multi_j, ci->grav.multipole->CoM,
+                        cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
+
+  TIMER_TOC(timer_dopair_grav_mm);
+}
+
+/**
+ * @brief Computes the interaction of the field tensor in a cell with the
+ * multipole of another cell.
+ *
+ * @param r The #runner.
+ * @param ci The #cell with field tensor to interact.
+ * @param cj The #cell with the multipole.
+ */
+static INLINE void runner_dopair_grav_mm_nonsym(
+    struct runner *r, struct cell *restrict ci,
+    const struct cell *restrict cj) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const struct gravity_props *props = e->gravity_properties;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return;
+
+  /* Short-cut to the multipole */
+  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == cj) error("Interacting a cell with itself using M2L");
+
+  if (multi_j->num_gpart == 0)
+    error("Multipole does not seem to have been set.");
+
+  if (ci->grav.multipole->pot.ti_init != e->ti_current)
+    error("ci->grav tensor not initialised.");
+
+  if (cj->grav.ti_old_multipole != e->ti_current)
+    error(
+        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
+        "ci->nodeID=%d e->ti_current=%lld",
+        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
+#endif
+
+  /* Let's interact at this level */
+  gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM,
+                     cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
+
+  TIMER_TOC(timer_dopair_grav_mm);
+}
+
+/**
+ * @brief Call the M-M calculation on two cells if active.
+ *
+ * @param r The #runner object.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+static INLINE void runner_dopair_grav_mm(struct runner *r,
+                                         struct cell *restrict ci,
+                                         struct cell *restrict cj) {
+
+  const struct engine *e = r->e;
+
+  /* What do we need to do? */
+  const int do_i =
+      cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID);
+  const int do_j =
+      cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID);
+
+  /* Do we need drifting first? */
+  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
+  if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e);
+
+  /* Interact! */
+  if (do_i && do_j)
+    runner_dopair_grav_mm_symmetric(r, ci, cj);
+  else if (do_i)
+    runner_dopair_grav_mm_nonsym(r, ci, cj);
+  else if (do_j)
+    runner_dopair_grav_mm_nonsym(r, cj, ci);
+}
+
+/**
+ * @brief Computes all the M-M interactions between all the well-separated (at
+ * rebuild) pairs of progenies of the two cells.
+ *
+ * @param r The #runner thread.
+ * @param flags The task flag containing the list of well-separated pairs as a
+ * bit-field.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags,
+                                     struct cell *restrict ci,
+                                     struct cell *restrict cj) {
+
+  /* Loop over all pairs of progenies */
+  for (int i = 0; i < 8; i++) {
+    if (ci->progeny[i] != NULL) {
+      for (int j = 0; j < 8; j++) {
+        if (cj->progeny[j] != NULL) {
+
+          struct cell *cpi = ci->progeny[i];
+          struct cell *cpj = cj->progeny[j];
+
+          const int flag = i * 8 + j;
+
+          /* Did we agree to use an M-M interaction here at the last rebuild? */
+          if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj);
+        }
+      }
+    }
+  }
+}
+
+static INLINE void runner_dopair_recursive_grav_pm(struct runner *r,
+                                                   struct cell *ci,
+                                                   const struct cell *cj) {
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
+                        (float)e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  /* Anything to do here? */
+  if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Early abort? */
+  if (ci->grav.count == 0 || cj->grav.count == 0)
+    error("Doing pair gravity on an empty cell !");
+
+  /* Sanity check */
+  if (ci == cj) error("Pair interaction between a cell and itself.");
+
+  if (cj->grav.ti_old_multipole != e->ti_current)
+    error("cj->grav.multipole not drifted.");
+#endif
+
+  /* Can we recurse further? */
+  if (ci->split) {
+
+    /* Loop over ci's children */
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL)
+        runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj);
+    }
+
+    /* Ok, let's do the interaction here */
+  } else {
+
+    /* Start by constructing particle caches */
+
+    /* Cache to play with */
+    struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+
+    /* Computed the padded counts */
+    const int gcount_i = ci->grav.count;
+    const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Check that we fit in cache */
+    if (gcount_i > ci_cache->count)
+      error("Not enough space in the cache! gcount_i=%d", gcount_i);
+#endif
+
+    /* Recover the multipole info and the CoM locations */
+    const struct multipole *multi_j = &cj->grav.multipole->m_pole;
+    const float r_max = cj->grav.multipole->r_max;
+    const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]),
+                            (float)(cj->grav.multipole->CoM[1]),
+                            (float)(cj->grav.multipole->CoM[2])};
+
+    /* Fill the cache */
+    gravity_cache_populate_all_mpole(
+        e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i,
+        gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties);
+
+    /* Can we use the Newtonian version or do we need the truncated one ? */
+    if (!periodic) {
+
+      runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                 periodic, dim, e, ci->grav.parts, gcount_i,
+                                 cj);
+
+    } else {
+
+      runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                      dim, r_s_inv, e, ci->grav.parts, gcount_i,
+                                      cj);
+    }
+
+    /* Write back to the particles */
+    gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
+  }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * particles of another cell.
+ *
+ * This function will try to recurse as far down the tree as possible and only
+ * default to direct summation if there is no better option.
+ *
+ * If using periodic BCs, we will abort the recursion if th distance between the
+ * cells is larger than the set threshold.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The other #cell.
+ * @param gettimer Are we timing this ?
+ */
+void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
+                                  struct cell *cj, int gettimer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int nodeID = e->nodeID;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const double max_distance = e->mesh->r_cut_max;
+
+  /* Anything to do here? */
+  if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) ||
+        (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID)))
+    return;
+
+#ifdef SWIFT_DEBUG_CHECKS
+
+  const int gcount_i = ci->grav.count;
+  const int gcount_j = cj->grav.count;
+
+  /* Early abort? */
+  if (gcount_i == 0 || gcount_j == 0)
+    error("Doing pair gravity on an empty cell !");
+
+  /* Sanity check */
+  if (ci == cj) error("Pair interaction between a cell and itself.");
+
+  if (cell_is_active_gravity(ci, e) &&
+      ci->grav.ti_old_multipole != e->ti_current)
+    error("ci->grav.multipole not drifted.");
+  if (cell_is_active_gravity(cj, e) &&
+      cj->grav.ti_old_multipole != e->ti_current)
+    error("cj->grav.multipole not drifted.");
+#endif
+
+  TIMER_TIC;
+
+  /* Recover the multipole information */
+  struct gravity_tensors *const multi_i = ci->grav.multipole;
+  struct gravity_tensors *const multi_j = cj->grav.multipole;
+
+  /* Get the distance between the CoMs */
+  double dx = multi_i->CoM[0] - multi_j->CoM[0];
+  double dy = multi_i->CoM[1] - multi_j->CoM[1];
+  double dz = multi_i->CoM[2] - multi_j->CoM[2];
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+  const double r2 = dx * dx + dy * dy + dz * dz;
+
+  /* Minimal distance between any 2 particles in the two cells */
+  const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max);
+
+  /* Are we beyond the distance where the truncated forces are 0? */
+  if (periodic && r_lr_check > max_distance) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Need to account for the interactions we missed */
+    if (cell_is_active_gravity(ci, e))
+      multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+    if (cell_is_active_gravity(cj, e))
+      multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
+#endif
+    return;
+  }
+
+  /* OK, we actually need to compute this pair. Let's find the cheapest
+   * option... */
+
+  /* Can we use M-M interactions ? */
+  if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2,
+                         multi_i->m_pole.max_softening,
+                         multi_j->m_pole.max_softening)) {
+
+    /* Go M-M */
+    runner_dopair_grav_mm(r, ci, cj);
+
+  } else if (!ci->split && !cj->split) {
+
+    /* We have two leaves. Go P-P. */
+    runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1);
+
+  } else {
+
+    /* Alright, we'll have to split and recurse. */
+    /* We know at least one of ci and cj is splittable */
+
+    const double ri_max = multi_i->r_max;
+    const double rj_max = multi_j->r_max;
+
+    /* Split the larger of the two cells and start over again */
+    if (ri_max > rj_max) {
+
+      /* Can we actually split that interaction ? */
+      if (ci->split) {
+
+        /* Loop over ci's children */
+        for (int k = 0; k < 8; k++) {
+          if (ci->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
+        }
+
+      } else {
+        /* cj is split */
+
+        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
+
+        /* Loop over cj's children */
+        for (int k = 0; k < 8; k++) {
+          if (cj->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
+        }
+      }
+    } else {
+
+      /* Can we actually split that interaction ? */
+      if (cj->split) {
+
+        /* Loop over cj's children */
+        for (int k = 0; k < 8; k++) {
+          if (cj->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
+        }
+
+      } else {
+        /* ci is split */
+
+        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
+
+        /* Loop over ci's children */
+        for (int k = 0; k < 8; k++) {
+          if (ci->progeny[k] != NULL)
+            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
+        }
+      }
+    }
+  }
+
+  if (gettimer) TIMER_TOC(timer_dosub_pair_grav);
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell.
+ *
+ * This function will try to recurse as far down the tree as possible and only
+ * default to direct summation if there is no better option.
+ *
+ * @param r The #runner.
+ * @param c The first #cell.
+ * @param gettimer Are we timing this ?
+ */
+void runner_doself_recursive_grav(struct runner *r, struct cell *c,
+                                  int gettimer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Early abort? */
+  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
+#endif
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* If the cell is split, interact each progeny with itself, and with
+     each of its siblings. */
+  if (c->split) {
+
+    for (int j = 0; j < 8; j++) {
+      if (c->progeny[j] != NULL) {
+
+        runner_doself_recursive_grav(r, c->progeny[j], 0);
+
+        for (int k = j + 1; k < 8; k++) {
+          if (c->progeny[k] != NULL) {
+
+            runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0);
+          }
+        }
+      }
+    }
+  }
+
+  /* If the cell is not split, then just go for it... */
+  else {
+
+    runner_doself_grav_pp(r, c);
+  }
+
+  if (gettimer) TIMER_TOC(timer_dosub_self_grav);
+}
+
+/**
+ * @brief Performs all M-M interactions between a given top-level cell and all
+ * the other top-levels that are far enough.
+ *
+ * @param r The thread #runner.
+ * @param ci The #cell of interest.
+ * @param timer Are we timing this ?
+ */
+void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
+
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max;
+
+  TIMER_TIC;
+
+  /* Recover the list of top-level cells */
+  struct cell *cells = e->s->cells_top;
+  int *cells_with_particles = e->s->cells_with_particles_top;
+  const int nr_cells_with_particles = e->s->nr_cells_with_particles;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(ci, e)) return;
+
+  if (ci->nodeID != engine_rank)
+    error("Non-local cell in long-range gravity task!");
+
+  /* Check multipole has been drifted */
+  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
+
+  /* Get this cell's multipole information */
+  struct gravity_tensors *const multi_i = ci->grav.multipole;
+
+  /* Find this cell's top-level (great-)parent */
+  struct cell *top = ci;
+  while (top->parent != NULL) top = top->parent;
+
+  /* Recover the top-level multipole (for distance checks) */
+  struct gravity_tensors *const multi_top = top->grav.multipole;
+  const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0],
+                                     multi_top->CoM_rebuild[1],
+                                     multi_top->CoM_rebuild[2]};
+
+  /* Loop over all the top-level cells and go for a M-M interaction if
+   * well-separated */
+  for (int n = 0; n < nr_cells_with_particles; ++n) {
+
+    /* Handle on the top-level cell and it's gravity business*/
+    const struct cell *cj = &cells[cells_with_particles[n]];
+    const struct gravity_tensors *const multi_j = cj->grav.multipole;
+
+    /* Avoid self contributions */
+    if (top == cj) continue;
+
+    /* Skip empty cells */
+    if (multi_j->m_pole.M_000 == 0.f) continue;
+
+    /* Can we escape early in the periodic BC case? */
+    if (periodic) {
+
+      /* Minimal distance between any pair of particles */
+      const double min_radius2 =
+          cell_min_dist2_same_size(top, cj, periodic, dim);
+
+      /* Are we beyond the distance where the truncated forces are 0 ?*/
+      if (min_radius2 > max_distance2) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Need to account for the interactions we missed */
+        multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+#endif
+
+        /* Record that this multipole received a contribution */
+        multi_i->pot.interacted = 1;
+
+        /* We are done here. */
+        continue;
+      }
+    }
+
+    /* Get the distance between the CoMs at the last rebuild*/
+    double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0];
+    double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1];
+    double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2];
+
+    /* Apply BC */
+    if (periodic) {
+      dx_r = nearest(dx_r, dim[0]);
+      dy_r = nearest(dy_r, dim[1]);
+      dz_r = nearest(dz_r, dim[2]);
+    }
+    const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r;
+
+    /* Are we in charge of this cell pair? */
+    if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild,
+                           theta_crit2, r2_rebuild,
+                           multi_top->m_pole.max_softening,
+                           multi_j->m_pole.max_softening)) {
+
+      /* Call the PM interaction fucntion on the active sub-cells of ci */
+      runner_dopair_grav_mm_nonsym(r, ci, cj);
+      // runner_dopair_recursive_grav_pm(r, ci, cj);
+
+      /* Record that this multipole received a contribution */
+      multi_i->pot.interacted = 1;
+
+    } /* We are in charge of this pair */
+  }   /* Loop over top-level cells */
+
+  if (timer) TIMER_TOC(timer_dograv_long_range);
+}
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index b4ee8225a7aada8cf595ae7bca251d61b5226f64..34f3e9ec147574357620cc8f485889b87880f06e 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -20,1810 +20,30 @@
 #ifndef SWIFT_RUNNER_DOIACT_GRAV_H
 #define SWIFT_RUNNER_DOIACT_GRAV_H
 
-/* Includes. */
-#include "active.h"
-#include "cell.h"
-#include "gravity.h"
-#include "gravity_cache.h"
-#include "gravity_iact.h"
-#include "inline.h"
-#include "part.h"
-#include "space_getsid.h"
-#include "timers.h"
+#include "../config.h"
 
-/**
- * @brief Recursively propagate the multipoles down the tree by applying the
- * L2L and L2P kernels.
- *
- * @param r The #runner.
- * @param c The #cell we are working on.
- * @param timer Are we timing this ?
- */
-static INLINE void runner_do_grav_down(struct runner *r, struct cell *c,
-                                       int timer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->grav.ti_old_multipole != e->ti_current)
-    error("c->multipole not drifted.");
-  if (c->grav.multipole->pot.ti_init != e->ti_current)
-    error("c->field tensor not initialised");
-#endif
-
-  if (c->split) {
-
-    /* Node case */
-
-    /* Add the field-tensor to all the 8 progenitors */
-    for (int k = 0; k < 8; ++k) {
-      struct cell *cp = c->progeny[k];
-
-      /* Do we have a progenitor with any active g-particles ? */
-      if (cp != NULL && cell_is_active_gravity(cp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        if (cp->grav.ti_old_multipole != e->ti_current)
-          error("cp->multipole not drifted.");
-        if (cp->grav.multipole->pot.ti_init != e->ti_current)
-          error("cp->field tensor not initialised");
-#endif
-        /* If the tensor received any contribution, push it down */
-        if (c->grav.multipole->pot.interacted) {
-
-          struct grav_tensor shifted_tensor;
-
-          /* Shift the field tensor */
-          gravity_L2L(&shifted_tensor, &c->grav.multipole->pot,
-                      cp->grav.multipole->CoM, c->grav.multipole->CoM);
-
-          /* Add it to this level's tensor */
-          gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor);
-        }
-
-        /* Recurse */
-        runner_do_grav_down(r, cp, 0);
-      }
-    }
-
-  } else {
-
-    /* Leaf case */
-
-    /* We can abort early if no interactions via multipole happened */
-    if (!c->grav.multipole->pot.interacted) return;
-
-    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
-
-    /* Cell properties */
-    struct gpart *gparts = c->grav.parts;
-    const int gcount = c->grav.count;
-    const struct grav_tensor *pot = &c->grav.multipole->pot;
-    const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1],
-                           c->grav.multipole->CoM[2]};
-
-    /* Apply accelerations to the particles */
-    for (int i = 0; i < gcount; ++i) {
-
-      /* Get a handle on the gpart */
-      struct gpart *gp = &gparts[i];
-
-      /* Update if active */
-      if (gpart_is_active(gp, e)) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (gp->ti_drift != e->ti_current)
-          error("gpart not drifted to current time");
-        if (c->grav.multipole->pot.ti_init != e->ti_current)
-          error("c->field tensor not initialised");
-
-        /* Check that we are not updated an inhibited particle */
-        if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!");
-
-        /* Check that the particle was initialised */
-        if (gp->initialised == 0)
-          error("Adding forces to an un-initialised gpart.");
-#endif
-        /* Apply the kernel */
-        gravity_L2P(pot, CoM, gp);
-      }
-    }
-  }
-
-  if (timer) TIMER_TOC(timer_dograv_down);
-}
-
-/**
- * @brief Compute the non-truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param cj_cache #gravity_cache contaning the source particles.
- * @param gcount_i The number of particles in the cell i.
- * @param gcount_padded_j The number of particles in the cell j padded to the
- * vector length.
- * @param periodic Is the calculation using periodic BCs ?
- * @param dim The size of the simulation volume.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gparts_j The #gpart in cell j (for debugging checks only).
- * @param gcount_j The number of particles in the cell j (for debugging checks
- * only).
- */
-static INLINE void runner_dopair_grav_pp_full(
-    struct gravity_cache *restrict ci_cache,
-    struct gravity_cache *restrict cj_cache, const int gcount_i,
-    const int gcount_j, const int gcount_padded_j, const int periodic,
-    const float dim[3], const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    /* Skip particle that can use the multipole */
-    if (ci_cache->use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!gpart_is_active(&gparts_i[pid], e))
-      error("Inactive particle went through the cache");
-#endif
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration and potential */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded_j, VEC_SIZE);
-
-    /* Loop over every particle in the other cell. */
-    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
-
-      /* Get info about j */
-      const float x_j = cj_cache->x[pjd];
-      const float y_j = cj_cache->y[pjd];
-      const float z_j = cj_cache->z[pjd];
-      const float mass_j = cj_cache->m[pjd];
-      const float h_j = cj_cache->epsilon[pjd];
-
-      /* Compute the pairwise distance. */
-      float dx = x_j - x_i;
-      float dy = y_j - y_i;
-      float dz = z_j - z_i;
-
-      /* Correct for periodic BCs */
-      if (periodic) {
-        dx = nearestf(dx, dim[0]);
-        dy = nearestf(dy, dim[1]);
-        dz = nearestf(dz, dim[2]);
-      }
-
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts_i[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts_j[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts_i[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
-          mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts_i[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Compute the truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * This function only makes sense in periodic BCs.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param cj_cache #gravity_cache contaning the source particles.
- * @param gcount_i The number of particles in the cell i.
- * @param gcount_padded_j The number of particles in the cell j padded to the
- * vector length.
- * @param dim The size of the simulation volume.
- * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gparts_j The #gpart in cell j (for debugging checks only).
- * @param gcount_j The number of particles in the cell j (for debugging checks
- * only).
- */
-static INLINE void runner_dopair_grav_pp_truncated(
-    struct gravity_cache *restrict ci_cache,
-    struct gravity_cache *restrict cj_cache, const int gcount_i,
-    const int gcount_j, const int gcount_padded_j, const float dim[3],
-    const float r_s_inv, const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic)
-    error("Calling truncated PP function in non-periodic setup.");
-#endif
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    /* Skip particle that can use the multipole */
-    if (ci_cache->use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!gpart_is_active(&gparts_i[pid], e))
-      error("Inactive particle went through the cache");
-#endif
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration and potential */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded_j, VEC_SIZE);
-
-    /* Loop over every particle in the other cell. */
-    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
-
-      /* Get info about j */
-      const float x_j = cj_cache->x[pjd];
-      const float y_j = cj_cache->y[pjd];
-      const float z_j = cj_cache->z[pjd];
-      const float mass_j = cj_cache->m[pjd];
-      const float h_j = cj_cache->epsilon[pjd];
-
-      /* Compute the pairwise distance. */
-      float dx = x_j - x_i;
-      float dy = y_j - y_i;
-      float dz = z_j - z_i;
-
-      /* Correct for periodic BCs */
-      dx = nearestf(dx, dim[0]);
-      dy = nearestf(dy, dim[1]);
-      dz = nearestf(dz, dim[2]);
-
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts_i[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts_j[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts_i[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) &&
-          mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts_i[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
-                                    &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Compute the gravity interactions between all particles
- * of a cell and the multipole of the other cell.
- *
- * The calculation is performedusing the pre-filled
- * #gravity_cache structure. The loop over the i cache should auto-vectorize.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount_padded_i The number of particles in the cell i padded to the
- * vector length.
- * @param CoM_j Position of the #multipole in #cell j.
- * @param multi_j The #multipole in #cell j.
- * @param periodic Is the calculation using periodic BCs ?
- * @param dim The size of the simulation volume.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gcount_i The number of particles in the cell i (for debugging checks
- * only).
- * @param cj The #cell j (for debugging checks only).
- */
-static INLINE void runner_dopair_grav_pm_full(
-    struct gravity_cache *ci_cache, const int gcount_padded_i,
-    const float CoM_j[3], const struct multipole *restrict multi_j,
-    const int periodic, const float dim[3], const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const int gcount_i,
-    const struct cell *restrict cj) {
-
-  /* Make the compiler understand we are in happy vectorization land */
-  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, active, ci_cache->active,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_assume_size(gcount_padded_i, VEC_SIZE);
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_padded_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!active[pid]) continue;
-
-    /* Skip particle that cannot use the multipole */
-    if (!use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
-      error("Active particle went through the cache");
-
-    /* Check that particles have been drifted to the current time */
-    if (gparts_i[pid].ti_drift != e->ti_current)
-      error("gpi not drifted to current time");
-
-    /* Check that we are not updated an inhibited particle */
-    if (gpart_is_inhibited(&gparts_i[pid], e))
-      error("Updating an inhibited particle!");
-
-    /* Check that the particle was initialised */
-    if (gparts_i[pid].initialised == 0)
-      error("Adding forces to an un-initialised gpart.");
-
-    if (pid >= gcount_i) error("Adding forces to padded particle");
-#endif
-
-    const float x_i = x[pid];
-    const float y_i = y[pid];
-    const float z_i = z[pid];
-
-    /* Some powers of the softening length */
-    const float h_i = epsilon[pid];
-    const float h_inv_i = 1.f / h_i;
-
-    /* Distance to the Multipole */
-    float dx = CoM_j[0] - x_i;
-    float dy = CoM_j[1] - y_i;
-    float dz = CoM_j[2] - z_i;
-
-    /* Apply periodic BCs? */
-    if (periodic) {
-      dx = nearestf(dx, dim[0]);
-      dy = nearestf(dy, dim[1]);
-      dz = nearestf(dz, dim[2]);
-    }
-
-    const float r2 = dx * dx + dy * dy + dz * dz;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    const float r_max_j = cj->grav.multipole->r_max;
-    const float r_max2 = r_max_j * r_max_j;
-    const float theta_crit2 = e->gravity_properties->theta_crit2;
-
-    /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */
-    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
-      error(
-          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
-          "%e], rmax=%e r=%e epsilon=%e",
-          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i);
-#endif
-
-    /* Interact! */
-    float f_x, f_y, f_z, pot_ij;
-    runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
-                             &f_z, &pot_ij);
-
-    /* Store it back */
-    a_x[pid] += f_x;
-    a_y[pid] += f_y;
-    a_z[pid] += f_z;
-    pot[pid] += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Update the interaction counter */
-    if (pid < gcount_i)
-      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
-#endif
-  }
-}
-
-/**
- * @brief Compute the gravity interactions between all particles
- * of a cell and the multipole of the other cell.
- *
- * The calculation is performedusing the pre-filled
- * #gravity_cache structure. The loop over the i cache should auto-vectorize.
- *
- * This function only makes sense in periodic BCs.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount_padded_i The number of particles in the cell i padded to the
- * vector length.
- * @param CoM_j Position of the #multipole in #cell j.
- * @param multi_j The #multipole in #cell j.
- * @param dim The size of the simulation volume.
- * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts_i The #gpart in cell i (for debugging checks only).
- * @param gcount_i The number of particles in the cell i (for debugging checks
- * only).
- * @param cj The #cell j (for debugging checks only).
- */
-static INLINE void runner_dopair_grav_pm_truncated(
-    struct gravity_cache *ci_cache, const int gcount_padded_i,
-    const float CoM_j[3], const struct multipole *restrict multi_j,
-    const float dim[3], const float r_s_inv, const struct engine *restrict e,
-    struct gpart *restrict gparts_i, const int gcount_i,
-    const struct cell *restrict cj) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic)
-    error("Calling truncated PP function in non-periodic setup.");
-#endif
-
-  /* Make the compiler understand we are in happy vectorization land */
-  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, active, ci_cache->active,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
-                            SWIFT_CACHE_ALIGNMENT);
-  swift_assume_size(gcount_padded_i, VEC_SIZE);
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount_padded_i; pid++) {
-
-    /* Skip inactive particles */
-    if (!active[pid]) continue;
-
-    /* Skip particle that cannot use the multipole */
-    if (!use_mpole[pid]) continue;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
-      error("Active particle went through the cache");
-
-    /* Check that particles have been drifted to the current time */
-    if (gparts_i[pid].ti_drift != e->ti_current)
-      error("gpi not drifted to current time");
-
-    /* Check that we are not updated an inhibited particle */
-    if (gpart_is_inhibited(&gparts_i[pid], e))
-      error("Updating an inhibited particle!");
-
-    /* Check that the particle was initialised */
-    if (gparts_i[pid].initialised == 0)
-      error("Adding forces to an un-initialised gpart.");
-
-    if (pid >= gcount_i) error("Adding forces to padded particle");
-#endif
-
-    const float x_i = x[pid];
-    const float y_i = y[pid];
-    const float z_i = z[pid];
-
-    /* Some powers of the softening length */
-    const float h_i = epsilon[pid];
-    const float h_inv_i = 1.f / h_i;
-
-    /* Distance to the Multipole */
-    float dx = CoM_j[0] - x_i;
-    float dy = CoM_j[1] - y_i;
-    float dz = CoM_j[2] - z_i;
-
-    /* Apply periodic BCs */
-    dx = nearestf(dx, dim[0]);
-    dy = nearestf(dy, dim[1]);
-    dz = nearestf(dz, dim[2]);
-
-    const float r2 = dx * dx + dy * dy + dz * dz;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    const float r_max_j = cj->grav.multipole->r_max;
-    const float r_max2 = r_max_j * r_max_j;
-    const float theta_crit2 = e->gravity_properties->theta_crit2;
-
-    /* 0.99 and 1.1 to avoid FP rounding false-positives */
-    if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i))
-      error(
-          "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e "
-          "%e], rmax=%e",
-          CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j);
-#endif
-
-    /* Interact! */
-    float f_x, f_y, f_z, pot_ij;
-    runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv,
-                                  multi_j, &f_x, &f_y, &f_z, &pot_ij);
-
-    /* Store it back */
-    a_x[pid] += f_x;
-    a_y[pid] += f_y;
-    a_z[pid] += f_z;
-    pot[pid] += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Update the interaction counter */
-    if (pid < gcount_i)
-      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
-#endif
-  }
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell.
- *
- * This function switches between the full potential and the truncated one
- * depending on needs. It will also use the M2P (multipole) interaction
- * for the subset of particles in either cell for which the distance criterion
- * is valid.
- *
- * This function starts by constructing the require #gravity_cache for both
- * cells and then call the specialised functions doing the actual work on
- * the caches. It then write the data back to the particles.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The other #cell.
- * @param symmetric Are we updating both cells (1) or just ci (0) ?
- * @param allow_mpole Are we allowing the use of P2M interactions ?
- */
-static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci,
-                                         struct cell *cj, const int symmetric,
-                                         const int allow_mpole) {
-
-  /* Recover some useful constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
-                        (float)e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-  const double min_trunc = e->mesh->r_cut_min;
-
-  TIMER_TIC;
-
-  /* Record activity status */
-  const int ci_active =
-      cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID);
-  const int cj_active =
-      cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID);
-
-  /* Anything to do here? */
-  if (!ci_active && !cj_active) return;
-  if (!ci_active && !symmetric) return;
-
-  /* Check that we are not doing something stupid */
-  if (ci->split || cj->split) error("Running P-P on splitable cells");
-
-  /* Let's start by checking things are drifted */
-  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
-  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");
-  if (cj_active && ci->grav.ti_old_multipole != e->ti_current)
-    error("Un-drifted multipole");
-  if (ci_active && cj->grav.ti_old_multipole != e->ti_current)
-    error("Un-drifted multipole");
-
-  /* Caches to play with */
-  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
-
-  /* Shift to apply to the particles in each cell */
-  const double shift_i[3] = {0., 0., 0.};
-  const double shift_j[3] = {0., 0., 0.};
-
-  /* Recover the multipole info and shift the CoM locations */
-  const float rmax_i = ci->grav.multipole->r_max;
-  const float rmax_j = cj->grav.multipole->r_max;
-  const float rmax2_i = rmax_i * rmax_i;
-  const float rmax2_j = rmax_j * rmax_j;
-  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
-  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-  const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]),
-                          (float)(ci->grav.multipole->CoM[1] - shift_i[1]),
-                          (float)(ci->grav.multipole->CoM[2] - shift_i[2])};
-  const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]),
-                          (float)(cj->grav.multipole->CoM[1] - shift_j[1]),
-                          (float)(cj->grav.multipole->CoM[2] - shift_j[2])};
-
-  /* Start by constructing particle caches */
-
-  /* Computed the padded counts */
-  const int gcount_i = ci->grav.count;
-  const int gcount_j = cj->grav.count;
-  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
-  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Check that we fit in cache */
-  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
-    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
-          gcount_j);
-#endif
-
-  /* Fill the caches */
-  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
-                         ci_cache, ci->grav.parts, gcount_i, gcount_padded_i,
-                         shift_i, CoM_j, rmax2_j, ci, e->gravity_properties);
-  gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim,
-                         cj_cache, cj->grav.parts, gcount_j, gcount_padded_j,
-                         shift_j, CoM_i, rmax2_i, cj, e->gravity_properties);
-
-  /* Can we use the Newtonian version or do we need the truncated one ? */
-  if (!periodic) {
-
-    /* Not periodic -> Can always use Newtonian potential */
-
-    /* Let's updated the active cell(s) only */
-    if (ci_active) {
-
-      /* First the P2P */
-      runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
-                                 gcount_padded_j, periodic, dim, e,
-                                 ci->grav.parts, cj->grav.parts);
-
-      /* Then the M2P */
-      if (allow_mpole)
-        runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                   periodic, dim, e, ci->grav.parts, gcount_i,
-                                   cj);
-    }
-    if (cj_active && symmetric) {
-
-      /* First the P2P */
-      runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
-                                 gcount_padded_i, periodic, dim, e,
-                                 cj->grav.parts, ci->grav.parts);
-
-      /* Then the M2P */
-      if (allow_mpole)
-        runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
-                                   periodic, dim, e, cj->grav.parts, gcount_j,
-                                   ci);
-    }
-
-  } else { /* Periodic BC */
-
-    /* Get the relative distance between the CoMs */
-    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
-                          CoM_j[2] - CoM_i[2]};
-    const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-    /* Get the maximal distance between any two particles */
-    const double max_r = sqrt(r2) + rmax_i + rmax_j;
-
-    /* Do we need to use the truncated interactions ? */
-    if (max_r > min_trunc) {
-
-      /* Periodic but far-away cells must use the truncated potential */
-
-      /* Let's updated the active cell(s) only */
-      if (ci_active) {
-
-        /* First the (truncated) P2P */
-        runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j,
-                                        gcount_padded_j, dim, r_s_inv, e,
-                                        ci->grav.parts, cj->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j,
-                                          multi_j, dim, r_s_inv, e,
-                                          ci->grav.parts, gcount_i, cj);
-      }
-      if (cj_active && symmetric) {
-
-        /* First the (truncated) P2P */
-        runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i,
-                                        gcount_padded_i, dim, r_s_inv, e,
-                                        cj->grav.parts, ci->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i,
-                                          multi_i, dim, r_s_inv, e,
-                                          cj->grav.parts, gcount_j, ci);
-      }
-
-    } else {
-
-      /* Periodic but close-by cells can use the full Newtonian potential */
-
-      /* Let's updated the active cell(s) only */
-      if (ci_active) {
-
-        /* First the (Newtonian) P2P */
-        runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j,
-                                   gcount_padded_j, periodic, dim, e,
-                                   ci->grav.parts, cj->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                     periodic, dim, e, ci->grav.parts, gcount_i,
-                                     cj);
-      }
-      if (cj_active && symmetric) {
-
-        /* First the (Newtonian) P2P */
-        runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i,
-                                   gcount_padded_i, periodic, dim, e,
-                                   cj->grav.parts, ci->grav.parts);
-
-        /* Then the M2P */
-        if (allow_mpole)
-          runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i,
-                                     periodic, dim, e, cj->grav.parts, gcount_j,
-                                     ci);
-      }
-    }
-  }
-
-  /* Write back to the particles */
-  if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
-  if (cj_active && symmetric)
-    gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j);
-
-  TIMER_TOC(timer_dopair_grav_pp);
-}
-
-/**
- * @brief Compute the non-truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount The number of particles in the cell.
- * @param gcount_padded The number of particles in the cell padded to the
- * vector length.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts The #gpart in the cell (for debugging checks only).
- */
-static INLINE void runner_doself_grav_pp_full(
-    struct gravity_cache *restrict ci_cache, const int gcount,
-    const int gcount_padded, const struct engine *e, struct gpart *gparts) {
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded, VEC_SIZE);
-
-    /* Loop over every other particle in the cell. */
-    for (int pjd = 0; pjd < gcount_padded; pjd++) {
-
-      /* No self interaction */
-      if (pid == pjd) continue;
-
-      /* Get info about j */
-      const float x_j = ci_cache->x[pjd];
-      const float y_j = ci_cache->y[pjd];
-      const float z_j = ci_cache->z[pjd];
-      const float mass_j = ci_cache->m[pjd];
-      const float h_j = ci_cache->epsilon[pjd];
-
-      /* Compute the pairwise (square) distance. */
-      /* Note: no need for periodic wrapping inside a cell */
-      const float dx = x_j - x_i;
-      const float dy = y_j - y_i;
-      const float dz = z_j - z_i;
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Compute the truncated gravity interactions between all particles
- * of a cell and the particles of the other cell.
- *
- * The calculation is performed non-symmetrically using the pre-filled
- * #gravity_cache structures. The loop over the j cache should auto-vectorize.
- *
- * This function only makes sense in periodic BCs.
- *
- * @param ci_cache #gravity_cache contaning the particles to be updated.
- * @param gcount The number of particles in the cell.
- * @param gcount_padded The number of particles in the cell padded to the
- * vector length.
- * @param r_s_inv The inverse of the gravity-mesh smoothing-scale.
- *
- * @param e The #engine (for debugging checks only).
- * @param gparts The #gpart in the cell (for debugging checks only).
- */
-static INLINE void runner_doself_grav_pp_truncated(
-    struct gravity_cache *restrict ci_cache, const int gcount,
-    const int gcount_padded, const float r_s_inv, const struct engine *e,
-    struct gpart *gparts) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (!e->s->periodic)
-    error("Calling truncated PP function in non-periodic setup.");
-#endif
-
-  /* Loop over all particles in ci... */
-  for (int pid = 0; pid < gcount; pid++) {
-
-    /* Skip inactive particles */
-    if (!ci_cache->active[pid]) continue;
-
-    const float x_i = ci_cache->x[pid];
-    const float y_i = ci_cache->y[pid];
-    const float z_i = ci_cache->z[pid];
-    const float h_i = ci_cache->epsilon[pid];
-
-    /* Local accumulators for the acceleration and potential */
-    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
-
-    /* Make the compiler understand we are in happy vectorization land */
-    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
-    swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
-    swift_assume_size(gcount_padded, VEC_SIZE);
-
-    /* Loop over every other particle in the cell. */
-    for (int pjd = 0; pjd < gcount_padded; pjd++) {
-
-      /* No self interaction */
-      if (pid == pjd) continue;
-
-      /* Get info about j */
-      const float x_j = ci_cache->x[pjd];
-      const float y_j = ci_cache->y[pjd];
-      const float z_j = ci_cache->z[pjd];
-      const float mass_j = ci_cache->m[pjd];
-      const float h_j = ci_cache->epsilon[pjd];
-
-      /* Compute the pairwise (square) distance. */
-      /* Note: no need for periodic wrapping inside a cell */
-      const float dx = x_j - x_i;
-      const float dy = y_j - y_i;
-      const float dz = z_j - z_i;
-
-      const float r2 = dx * dx + dy * dy + dz * dz;
-
-      /* Pick the maximal softening length of i and j */
-      const float h = max(h_i, h_j);
-      const float h2 = h * h;
-      const float h_inv = 1.f / h;
-      const float h_inv_3 = h_inv * h_inv * h_inv;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (r2 == 0.f && h2 == 0.)
-        error("Interacting particles with 0 distance and 0 softening.");
-
-      /* Check that particles have been drifted to the current time */
-      if (gparts[pid].ti_drift != e->ti_current)
-        error("gpi not drifted to current time");
-      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current &&
-          !gpart_is_inhibited(&gparts[pjd], e))
-        error("gpj not drifted to current time");
-
-      /* Check that we are not updated an inhibited particle */
-      if (gpart_is_inhibited(&gparts[pid], e))
-        error("Updating an inhibited particle!");
-
-      /* Check that the particle we interact with was not inhibited */
-      if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f)
-        error("Inhibited particle used as gravity source.");
-
-      /* Check that the particle was initialised */
-      if (gparts[pid].initialised == 0)
-        error("Adding forces to an un-initialised gpart.");
-#endif
-
-      /* Interact! */
-      float f_ij, pot_ij;
-      runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv,
-                                    &f_ij, &pot_ij);
-
-      /* Store it back */
-      a_x += f_ij * dx;
-      a_y += f_ij * dy;
-      a_z += f_ij * dz;
-      pot += pot_ij;
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Update the interaction counter if it's not a padded gpart */
-      if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted++;
-#endif
-    }
-
-    /* Store everything back in cache */
-    ci_cache->a_x[pid] += a_x;
-    ci_cache->a_y[pid] += a_y;
-    ci_cache->a_z[pid] += a_z;
-    ci_cache->pot[pid] += pot;
-  }
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * other ones.
- *
- * This function switches between the full potential and the truncated one
- * depending on needs.
- *
- * This function starts by constructing the require #gravity_cache for the
- * cell and then call the specialised functions doing the actual work on
- * the cache. It then write the data back to the particles.
- *
- * @param r The #runner.
- * @param c The #cell.
- */
-static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) {
-
-  /* Recover some useful constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const float r_s_inv = e->mesh->r_s_inv;
-  const double min_trunc = e->mesh->r_cut_min;
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
-#endif
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* Check that we are not doing something stupid */
-  if (c->split) error("Running P-P on a splitable cell");
-
-  /* Do we need to start by drifting things ? */
-  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
-
-  /* Start by constructing a cache for the particles */
-  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-
-  /* Shift to apply to the particles in the cell */
-  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
-                         c->loc[1] + 0.5 * c->width[1],
-                         c->loc[2] + 0.5 * c->width[2]};
-
-  /* Computed the padded counts */
-  const int gcount = c->grav.count;
-  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Check that we fit in cache */
-  if (gcount > ci_cache->count)
-    error("Not enough space in the cache! gcount=%d", gcount);
-#endif
-
-  /* Fill the cache */
-  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts,
-                                  gcount, gcount_padded, loc, c,
-                                  e->gravity_properties);
-
-  /* Can we use the Newtonian version or do we need the truncated one ? */
-  if (!periodic) {
-
-    /* Not periodic -> Can always use Newtonian potential */
-    runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
-                               c->grav.parts);
-
-  } else {
-
-    /* Get the maximal distance between any two particles */
-    const double max_r = 2. * c->grav.multipole->r_max;
-
-    /* Do we need to use the truncated interactions ? */
-    if (max_r > min_trunc) {
-
-      /* Periodic but far-away cells must use the truncated potential */
-      runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv,
-                                      e, c->grav.parts);
-
-    } else {
-
-      /* Periodic but close-by cells can use the full Newtonian potential */
-      runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e,
-                                 c->grav.parts);
-    }
-  }
-
-  /* Write back to the particles */
-  gravity_cache_write_back(ci_cache, c->grav.parts, gcount);
-
-  TIMER_TOC(timer_doself_grav_pp);
-}
-
-/**
- * @brief Computes the interaction of the field tensor and multipole
- * of two cells symmetrically.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
-                                                   struct cell *restrict ci,
-                                                   struct cell *restrict cj) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const struct gravity_props *props = e->gravity_properties;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) ||
-      (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank))
-    error("Invalid state in symmetric M-M calculation!");
-
-  /* Short-cut to the multipole */
-  const struct multipole *multi_i = &ci->grav.multipole->m_pole;
-  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci == cj) error("Interacting a cell with itself using M2L");
-
-  if (multi_i->num_gpart == 0)
-    error("Multipole i does not seem to have been set.");
-
-  if (multi_j->num_gpart == 0)
-    error("Multipole j does not seem to have been set.");
-
-  if (ci->grav.multipole->pot.ti_init != e->ti_current)
-    error("ci->grav tensor not initialised.");
-
-  if (ci->grav.multipole->pot.ti_init != e->ti_current)
-    error("cj->grav tensor not initialised.");
-
-  if (ci->grav.ti_old_multipole != e->ti_current)
-    error(
-        "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d "
-        "cj->nodeID=%d e->ti_current=%lld",
-        ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current);
-
-  if (cj->grav.ti_old_multipole != e->ti_current)
-    error(
-        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
-        "ci->nodeID=%d e->ti_current=%lld",
-        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
-#endif
-
-  /* Let's interact at this level */
-  gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot,
-                        multi_i, multi_j, ci->grav.multipole->CoM,
-                        cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
-
-  TIMER_TOC(timer_dopair_grav_mm);
-}
-
-/**
- * @brief Computes the interaction of the field tensor in a cell with the
- * multipole of another cell.
- *
- * @param r The #runner.
- * @param ci The #cell with field tensor to interact.
- * @param cj The #cell with the multipole.
- */
-static INLINE void runner_dopair_grav_mm_nonsym(
-    struct runner *r, struct cell *restrict ci,
-    const struct cell *restrict cj) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const struct gravity_props *props = e->gravity_properties;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return;
-
-  /* Short-cut to the multipole */
-  const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci == cj) error("Interacting a cell with itself using M2L");
-
-  if (multi_j->num_gpart == 0)
-    error("Multipole does not seem to have been set.");
-
-  if (ci->grav.multipole->pot.ti_init != e->ti_current)
-    error("ci->grav tensor not initialised.");
-
-  if (cj->grav.ti_old_multipole != e->ti_current)
-    error(
-        "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d "
-        "ci->nodeID=%d e->ti_current=%lld",
-        cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
-#endif
-
-  /* Let's interact at this level */
-  gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM,
-                     cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
-
-  TIMER_TOC(timer_dopair_grav_mm);
-}
-
-/**
- * @brief Call the M-M calculation on two cells if active.
- *
- * @param r The #runner object.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm(struct runner *r,
-                                         struct cell *restrict ci,
-                                         struct cell *restrict cj) {
-
-  const struct engine *e = r->e;
-
-  /* What do we need to do? */
-  const int do_i =
-      cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID);
-  const int do_j =
-      cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID);
-
-  /* Do we need drifting first? */
-  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
-  if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e);
-
-  /* Interact! */
-  if (do_i && do_j)
-    runner_dopair_grav_mm_symmetric(r, ci, cj);
-  else if (do_i)
-    runner_dopair_grav_mm_nonsym(r, ci, cj);
-  else if (do_j)
-    runner_dopair_grav_mm_nonsym(r, cj, ci);
-}
-
-/**
- * @brief Computes all the M-M interactions between all the well-separated (at
- * rebuild) pairs of progenies of the two cells.
- *
- * @param r The #runner thread.
- * @param flags The task flag containing the list of well-separated pairs as a
- * bit-field.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
-static INLINE void runner_dopair_grav_mm_progenies(struct runner *r,
-                                                   const long long flags,
-                                                   struct cell *restrict ci,
-                                                   struct cell *restrict cj) {
-
-  /* Loop over all pairs of progenies */
-  for (int i = 0; i < 8; i++) {
-    if (ci->progeny[i] != NULL) {
-      for (int j = 0; j < 8; j++) {
-        if (cj->progeny[j] != NULL) {
-
-          struct cell *cpi = ci->progeny[i];
-          struct cell *cpj = cj->progeny[j];
-
-          const int flag = i * 8 + j;
-
-          /* Did we agree to use an M-M interaction here at the last rebuild? */
-          if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj);
-        }
-      }
-    }
-  }
-}
-
-static INLINE void runner_dopair_recursive_grav_pm(struct runner *r,
-                                                   struct cell *ci,
-                                                   const struct cell *cj) {
-  /* Some constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1],
-                        (float)e->mesh->dim[2]};
-  const float r_s_inv = e->mesh->r_s_inv;
-
-  /* Anything to do here? */
-  if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Early abort? */
-  if (ci->grav.count == 0 || cj->grav.count == 0)
-    error("Doing pair gravity on an empty cell !");
-
-  /* Sanity check */
-  if (ci == cj) error("Pair interaction between a cell and itself.");
-
-  if (cj->grav.ti_old_multipole != e->ti_current)
-    error("cj->grav.multipole not drifted.");
-#endif
-
-  /* Can we recurse further? */
-  if (ci->split) {
-
-    /* Loop over ci's children */
-    for (int k = 0; k < 8; k++) {
-      if (ci->progeny[k] != NULL)
-        runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj);
-    }
-
-    /* Ok, let's do the interaction here */
-  } else {
-
-    /* Start by constructing particle caches */
-
-    /* Cache to play with */
-    struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
-
-    /* Computed the padded counts */
-    const int gcount_i = ci->grav.count;
-    const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Check that we fit in cache */
-    if (gcount_i > ci_cache->count)
-      error("Not enough space in the cache! gcount_i=%d", gcount_i);
-#endif
-
-    /* Recover the multipole info and the CoM locations */
-    const struct multipole *multi_j = &cj->grav.multipole->m_pole;
-    const float r_max = cj->grav.multipole->r_max;
-    const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]),
-                            (float)(cj->grav.multipole->CoM[1]),
-                            (float)(cj->grav.multipole->CoM[2])};
-
-    /* Fill the cache */
-    gravity_cache_populate_all_mpole(
-        e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i,
-        gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties);
-
-    /* Can we use the Newtonian version or do we need the truncated one ? */
-    if (!periodic) {
-
-      runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                 periodic, dim, e, ci->grav.parts, gcount_i,
-                                 cj);
-
-    } else {
-
-      runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j,
-                                      dim, r_s_inv, e, ci->grav.parts, gcount_i,
-                                      cj);
-    }
-
-    /* Write back to the particles */
-    gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i);
-  }
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell with all the
- * particles of another cell.
- *
- * This function will try to recurse as far down the tree as possible and only
- * default to direct summation if there is no better option.
- *
- * If using periodic BCs, we will abort the recursion if th distance between the
- * cells is larger than the set threshold.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The other #cell.
- * @param gettimer Are we timing this ?
- */
-static INLINE void runner_dopair_recursive_grav(struct runner *r,
-                                                struct cell *ci,
-                                                struct cell *cj, int gettimer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const int nodeID = e->nodeID;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
-  const double max_distance = e->mesh->r_cut_max;
-
-  /* Anything to do here? */
-  if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) ||
-        (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID)))
-    return;
-
-#ifdef SWIFT_DEBUG_CHECKS
-
-  const int gcount_i = ci->grav.count;
-  const int gcount_j = cj->grav.count;
-
-  /* Early abort? */
-  if (gcount_i == 0 || gcount_j == 0)
-    error("Doing pair gravity on an empty cell !");
-
-  /* Sanity check */
-  if (ci == cj) error("Pair interaction between a cell and itself.");
-
-  if (cell_is_active_gravity(ci, e) &&
-      ci->grav.ti_old_multipole != e->ti_current)
-    error("ci->grav.multipole not drifted.");
-  if (cell_is_active_gravity(cj, e) &&
-      cj->grav.ti_old_multipole != e->ti_current)
-    error("cj->grav.multipole not drifted.");
-#endif
-
-  TIMER_TIC;
-
-  /* Recover the multipole information */
-  struct gravity_tensors *const multi_i = ci->grav.multipole;
-  struct gravity_tensors *const multi_j = cj->grav.multipole;
-
-  /* Get the distance between the CoMs */
-  double dx = multi_i->CoM[0] - multi_j->CoM[0];
-  double dy = multi_i->CoM[1] - multi_j->CoM[1];
-  double dz = multi_i->CoM[2] - multi_j->CoM[2];
-
-  /* Apply BC */
-  if (periodic) {
-    dx = nearest(dx, dim[0]);
-    dy = nearest(dy, dim[1]);
-    dz = nearest(dz, dim[2]);
-  }
-  const double r2 = dx * dx + dy * dy + dz * dz;
-
-  /* Minimal distance between any 2 particles in the two cells */
-  const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max);
-
-  /* Are we beyond the distance where the truncated forces are 0? */
-  if (periodic && r_lr_check > max_distance) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Need to account for the interactions we missed */
-    if (cell_is_active_gravity(ci, e))
-      multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
-    if (cell_is_active_gravity(cj, e))
-      multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
-#endif
-    return;
-  }
-
-  /* OK, we actually need to compute this pair. Let's find the cheapest
-   * option... */
-
-  /* Can we use M-M interactions ? */
-  if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2,
-                         multi_i->m_pole.max_softening,
-                         multi_j->m_pole.max_softening)) {
-
-    /* Go M-M */
-    runner_dopair_grav_mm(r, ci, cj);
-
-  } else if (!ci->split && !cj->split) {
-
-    /* We have two leaves. Go P-P. */
-    runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1);
-
-  } else {
-
-    /* Alright, we'll have to split and recurse. */
-    /* We know at least one of ci and cj is splittable */
-
-    const double ri_max = multi_i->r_max;
-    const double rj_max = multi_j->r_max;
-
-    /* Split the larger of the two cells and start over again */
-    if (ri_max > rj_max) {
-
-      /* Can we actually split that interaction ? */
-      if (ci->split) {
-
-        /* Loop over ci's children */
-        for (int k = 0; k < 8; k++) {
-          if (ci->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
-        }
-
-      } else {
-        /* cj is split */
-
-        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
-
-        /* Loop over cj's children */
-        for (int k = 0; k < 8; k++) {
-          if (cj->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
-        }
-      }
-    } else {
-
-      /* Can we actually split that interaction ? */
-      if (cj->split) {
-
-        /* Loop over cj's children */
-        for (int k = 0; k < 8; k++) {
-          if (cj->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0);
-        }
-
-      } else {
-        /* ci is split */
-
-        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
-
-        /* Loop over ci's children */
-        for (int k = 0; k < 8; k++) {
-          if (ci->progeny[k] != NULL)
-            runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0);
-        }
-      }
-    }
-  }
-
-  if (gettimer) TIMER_TOC(timer_dosub_pair_grav);
-}
-
-/**
- * @brief Computes the interaction of all the particles in a cell.
- *
- * This function will try to recurse as far down the tree as possible and only
- * default to direct summation if there is no better option.
- *
- * @param r The #runner.
- * @param c The first #cell.
- * @param gettimer Are we timing this ?
- */
-static INLINE void runner_doself_recursive_grav(struct runner *r,
-                                                struct cell *c, int gettimer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  /* Early abort? */
-  if (c->grav.count == 0) error("Doing self gravity on an empty cell !");
-#endif
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(c, e)) return;
-
-  /* If the cell is split, interact each progeny with itself, and with
-     each of its siblings. */
-  if (c->split) {
-
-    for (int j = 0; j < 8; j++) {
-      if (c->progeny[j] != NULL) {
-
-        runner_doself_recursive_grav(r, c->progeny[j], 0);
-
-        for (int k = j + 1; k < 8; k++) {
-          if (c->progeny[k] != NULL) {
-
-            runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0);
-          }
-        }
-      }
-    }
-  }
-
-  /* If the cell is not split, then just go for it... */
-  else {
-
-    runner_doself_grav_pp(r, c);
-  }
-
-  if (gettimer) TIMER_TOC(timer_dosub_self_grav);
-}
-
-/**
- * @brief Performs all M-M interactions between a given top-level cell and all
- * the other top-levels that are far enough.
- *
- * @param r The thread #runner.
- * @param ci The #cell of interest.
- * @param timer Are we timing this ?
- */
-static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
-                                             int timer) {
-
-  /* Some constants */
-  const struct engine *e = r->e;
-  const int periodic = e->mesh->periodic;
-  const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
-  const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max;
-
-  TIMER_TIC;
-
-  /* Recover the list of top-level cells */
-  struct cell *cells = e->s->cells_top;
-  int *cells_with_particles = e->s->cells_with_particles_top;
-  const int nr_cells_with_particles = e->s->nr_cells_with_particles;
-
-  /* Anything to do here? */
-  if (!cell_is_active_gravity(ci, e)) return;
-
-  if (ci->nodeID != engine_rank)
-    error("Non-local cell in long-range gravity task!");
-
-  /* Check multipole has been drifted */
-  if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
-
-  /* Get this cell's multipole information */
-  struct gravity_tensors *const multi_i = ci->grav.multipole;
-
-  /* Find this cell's top-level (great-)parent */
-  struct cell *top = ci;
-  while (top->parent != NULL) top = top->parent;
-
-  /* Recover the top-level multipole (for distance checks) */
-  struct gravity_tensors *const multi_top = top->grav.multipole;
-  const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0],
-                                     multi_top->CoM_rebuild[1],
-                                     multi_top->CoM_rebuild[2]};
-
-  /* Loop over all the top-level cells and go for a M-M interaction if
-   * well-separated */
-  for (int n = 0; n < nr_cells_with_particles; ++n) {
-
-    /* Handle on the top-level cell and it's gravity business*/
-    const struct cell *cj = &cells[cells_with_particles[n]];
-    const struct gravity_tensors *const multi_j = cj->grav.multipole;
-
-    /* Avoid self contributions */
-    if (top == cj) continue;
-
-    /* Skip empty cells */
-    if (multi_j->m_pole.M_000 == 0.f) continue;
-
-    /* Can we escape early in the periodic BC case? */
-    if (periodic) {
-
-      /* Minimal distance between any pair of particles */
-      const double min_radius2 =
-          cell_min_dist2_same_size(top, cj, periodic, dim);
-
-      /* Are we beyond the distance where the truncated forces are 0 ?*/
-      if (min_radius2 > max_distance2) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Need to account for the interactions we missed */
-        multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
-#endif
-
-        /* Record that this multipole received a contribution */
-        multi_i->pot.interacted = 1;
+struct runner;
+struct cell;
 
-        /* We are done here. */
-        continue;
-      }
-    }
+void runner_do_grav_down(struct runner *r, struct cell *c, int timer);
 
-    /* Get the distance between the CoMs at the last rebuild*/
-    double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0];
-    double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1];
-    double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2];
+void runner_doself_recursive_grav(struct runner *r, struct cell *c,
+                                  int gettimer);
 
-    /* Apply BC */
-    if (periodic) {
-      dx_r = nearest(dx_r, dim[0]);
-      dy_r = nearest(dy_r, dim[1]);
-      dz_r = nearest(dz_r, dim[2]);
-    }
-    const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r;
+void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
+                                  struct cell *cj, int gettimer);
 
-    /* Are we in charge of this cell pair? */
-    if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild,
-                           theta_crit2, r2_rebuild,
-                           multi_top->m_pole.max_softening,
-                           multi_j->m_pole.max_softening)) {
+void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags,
+                                     struct cell *restrict ci,
+                                     struct cell *restrict cj);
 
-      /* Call the PM interaction fucntion on the active sub-cells of ci */
-      runner_dopair_grav_mm_nonsym(r, ci, cj);
-      // runner_dopair_recursive_grav_pm(r, ci, cj);
+void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer);
 
-      /* Record that this multipole received a contribution */
-      multi_i->pot.interacted = 1;
+/* Internal functions (for unit tests and debugging) */
 
-    } /* We are in charge of this pair */
-  }   /* Loop over top-level cells */
+void runner_doself_grav_pp(struct runner *r, struct cell *c);
 
-  if (timer) TIMER_TOC(timer_dograv_long_range);
-}
+void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj,
+                           const int symmetric, const int allow_mpole);
 
 #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */
diff --git a/src/runner_doiact_hydro.c b/src/runner_doiact_hydro.c
new file mode 100644
index 0000000000000000000000000000000000000000..480ea59f0a536aa340b7e4d8f838bef3a0cca072
--- /dev/null
+++ b/src/runner_doiact_hydro.c
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "pressure_floor_iact.h"
+#include "runner.h"
+#include "runner_doiact_hydro_vec.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/* Import the density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the gradient loop functions (if required). */
+#ifdef EXTRA_HYDRO_LOOP
+#define FUNCTION gradient
+#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+#endif
+
+/* Import the force loop functions. */
+#define FUNCTION force
+#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the limiter loop functions. */
+#define FUNCTION limiter
+#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
+#include "runner_doiact_functions_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
diff --git a/src/runner_doiact_hydro.h b/src/runner_doiact_hydro.h
new file mode 100644
index 0000000000000000000000000000000000000000..1fd54c1037e2d0b9c7a671311cfee4720ebe8d84
--- /dev/null
+++ b/src/runner_doiact_hydro.h
@@ -0,0 +1,151 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Before including this file, define FUNCTION, which is the
+   name of the interaction function. This creates the interaction functions
+   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
+   and runner_dosub_FUNCTION calling the pairwise interaction function
+   runner_iact_FUNCTION. */
+
+#define PASTE(x, y) x##_##y
+
+#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
+#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)
+
+#define _DOPAIR1(f) PASTE(runner_dopair1, f)
+#define DOPAIR1 _DOPAIR1(FUNCTION)
+
+#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
+#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)
+
+#define _DOPAIR2(f) PASTE(runner_dopair2, f)
+#define DOPAIR2 _DOPAIR2(FUNCTION)
+
+#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
+#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
+
+#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f)
+#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION)
+
+#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
+#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)
+
+#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
+#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)
+
+#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
+#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)
+
+#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
+#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
+
+#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f)
+#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION)
+
+#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
+#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
+
+#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f)
+#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION)
+
+#define _DOSELF1(f) PASTE(runner_doself1, f)
+#define DOSELF1 _DOSELF1(FUNCTION)
+
+#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f)
+#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION)
+
+#define _DOSELF2(f) PASTE(runner_doself2, f)
+#define DOSELF2 _DOSELF2(FUNCTION)
+
+#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
+#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
+
+#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f)
+#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION)
+
+#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
+#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)
+
+#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
+#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)
+
+#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
+#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
+
+#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
+#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
+
+#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
+#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
+
+#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
+#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
+
+#define _IACT(f) PASTE(runner_iact, f)
+#define IACT _IACT(FUNCTION)
+
+#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
+#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
+
+#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
+#define IACT_VEC _IACT_VEC(FUNCTION)
+
+#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
+#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
+
+#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
+#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
+
+#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
+#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)
+
+#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
+#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
+
+#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
+#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
+
+#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
+#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
+
+void DOSELF1_BRANCH(struct runner *r, struct cell *c);
+void DOSELF2_BRANCH(struct runner *r, struct cell *c);
+
+void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj);
+void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj);
+
+void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer);
+void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer);
+
+void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj,
+                 int gettimer);
+void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj,
+                 int gettimer);
+
+void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci,
+                          struct part *restrict parts, int *restrict ind,
+                          int count);
+
+void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci,
+                          struct part *restrict parts_i, int *restrict ind,
+                          int count, struct cell *restrict cj);
+
+void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
+                  int *ind, int count, struct cell *cj, int gettimer);
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_hydro_vec.c
similarity index 99%
rename from src/runner_doiact_vec.c
rename to src/runner_doiact_hydro_vec.c
index 68f34b0d3b8fc9c79097522f8a1618f86957612e..59401e4050dcb4481d1c56aa8857106558a06880 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_hydro_vec.c
@@ -21,7 +21,7 @@
 #include "../config.h"
 
 /* This object's header. */
-#include "runner_doiact_vec.h"
+#include "runner_doiact_hydro_vec.h"
 
 #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
 
diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_hydro_vec.h
similarity index 100%
rename from src/runner_doiact_vec.h
rename to src/runner_doiact_hydro_vec.h
diff --git a/src/runner_doiact_stars.c b/src/runner_doiact_stars.c
new file mode 100644
index 0000000000000000000000000000000000000000..1e1267df5195f727a19252b6ee654629e23149b6
--- /dev/null
+++ b/src/runner_doiact_stars.c
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "feedback.h"
+#include "runner.h"
+#include "space_getsid.h"
+#include "stars.h"
+#include "timers.h"
+
+/* Import the stars density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_functions_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the stars feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_functions_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h
index 7e9780def83bbdbab83a431a757a52f3ba51d2e4..2d41d5a0bd1b1003039e1795eec205889b46baf6 100644
--- a/src/runner_doiact_stars.h
+++ b/src/runner_doiact_stars.h
@@ -86,1307 +86,21 @@
 #define _IACT_STARS(f) PASTE(runner_iact_nonsym_stars, f)
 #define IACT_STARS _IACT_STARS(FUNCTION)
 
-/**
- * @brief Calculate the number density of #part around the #spart
- *
- * @param r runner task
- * @param c cell
- * @param timer 1 if the time is to be recorded.
- */
-void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (c->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (c->hydro.count == 0 || c->stars.count == 0) return;
-  if (!cell_is_active_stars(c, e)) return;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int scount = c->stars.count;
-  const int count = c->hydro.count;
-  struct spart *restrict sparts = c->stars.parts;
-  struct part *restrict parts = c->hydro.parts;
-  struct xpart *restrict xparts = c->hydro.xparts;
-
-  /* Loop over the sparts in ci. */
-  for (int sid = 0; sid < scount; sid++) {
-
-    /* Get a hold of the ith spart in ci. */
-    struct spart *restrict si = &sparts[sid];
-
-    /* Skip inactive particles */
-    if (!spart_is_active(si, e)) continue;
-
-    /* Skip inactive particles */
-    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
-
-    const float hi = si->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float six[3] = {(float)(si->x[0] - c->loc[0]),
-                          (float)(si->x[1] - c->loc[1]),
-                          (float)(si->x[2] - c->loc[2])};
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts[pjd];
-      struct xpart *restrict xpj = &xparts[pjd];
-      const float hj = pj->h;
-
-      /* Early abort? */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
-                            (float)(pj->x[1] - c->loc[1]),
-                            (float)(pj->x[2] - c->loc[2])};
-      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                            ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                          ti_current);
-#endif
-      }
-    } /* loop over the parts in ci. */
-  }   /* loop over the sparts in ci. */
-
-  TIMER_TOC(TIMER_DOSELF_STARS);
-}
-
-/**
- * @brief Calculate the number density of cj #part around the ci #spart
- *
- * @param r runner task
- * @param ci The first #cell
- * @param cj The second #cell
- */
-void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
-                                 struct cell *restrict cj) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#else
-  if (cj->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-#endif
-
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Anything to do here? */
-  if (cj->hydro.count == 0 || ci->stars.count == 0) return;
-  if (!cell_is_active_stars(ci, e)) return;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int scount_i = ci->stars.count;
-  const int count_j = cj->hydro.count;
-  struct spart *restrict sparts_i = ci->stars.parts;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-  /* Loop over the sparts in ci. */
-  for (int sid = 0; sid < scount_i; sid++) {
-
-    /* Get a hold of the ith spart in ci. */
-    struct spart *restrict si = &sparts_i[sid];
-
-    /* Skip inactive particles */
-    if (!spart_is_active(si, e)) continue;
-
-    /* Skip inactive particles */
-    if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue;
-
-    const float hi = si->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])),
-                          (float)(si->x[1] - (cj->loc[1] + shift[1])),
-                          (float)(si->x[2] - (cj->loc[2] + shift[2]))};
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-      const float hj = pj->h;
-
-      /* Skip inhibited particles. */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
-                            (float)(pj->x[1] - cj->loc[1]),
-                            (float)(pj->x[2] - cj->loc[2])};
-      float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, hj, si, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                            ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo,
-                                          ti_current);
-#endif
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
-
-/**
- * @brief Compute the interactions between a cell pair.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param sid The direction of the pair.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
-                        const int sid, const double *shift) {
-
-  TIMER_TIC;
-
-  const struct engine *e = r->e;
-  const int with_cosmology = e->policy & engine_policy_cosmology;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  /* Get the cutoff shift. */
-  double rshift = 0.0;
-  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) &&
-                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
-  const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) &&
-                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
-#else
-  /* here we are updating the hydro -> switch ci, cj for local */
-  const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) &&
-                          (cj->hydro.count != 0) && cell_is_active_stars(ci, e);
-  const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) &&
-                          (ci->hydro.count != 0) && cell_is_active_stars(cj, e);
-#endif
-
-  if (do_ci_stars) {
-
-    /* Pick-out the sorted lists. */
-    const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
-    const struct sort_entry *restrict sort_i = ci->stars.sort[sid];
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Some constants used to checks that the parts are in the right frame */
-    const float shift_threshold_x =
-        2. * ci->width[0] +
-        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
-    const float shift_threshold_y =
-        2. * ci->width[1] +
-        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
-    const float shift_threshold_z =
-        2. * ci->width[2] +
-        2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part);
-#endif /* SWIFT_DEBUG_CHECKS */
-
-    /* Get some other useful values. */
-    const double hi_max = ci->stars.h_max * kernel_gamma - rshift;
-    const int count_i = ci->stars.count;
-    const int count_j = cj->hydro.count;
-    struct spart *restrict sparts_i = ci->stars.parts;
-    struct part *restrict parts_j = cj->hydro.parts;
-    struct xpart *restrict xparts_j = cj->hydro.xparts;
-    const double dj_min = sort_j[0].d;
-    const float dx_max_rshift =
-        (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift;
-    const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort);
-
-    /* Loop over the sparts in ci. */
-    for (int pid = count_i - 1;
-         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
-
-      /* Get a hold of the ith part in ci. */
-      struct spart *restrict spi = &sparts_i[sort_i[pid].i];
-      const float hi = spi->h;
-
-      /* Skip inactive particles */
-      if (!spart_is_active(spi, e)) continue;
-
-      /* Skip inactive particles */
-      if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue;
-
-      /* Compute distance from the other cell. */
-      const double px[3] = {spi->x[0], spi->x[1], spi->x[2]};
-      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
-                   px[2] * runner_shift[sid][2];
-
-      /* Is there anything we need to interact with ? */
-      const double di = dist + hi * kernel_gamma + dx_max_rshift;
-      if (di < dj_min) continue;
-
-      /* Get some additional information about pi */
-      const float hig2 = hi * hi * kernel_gamma2;
-      const float pix = spi->x[0] - (cj->loc[0] + shift[0]);
-      const float piy = spi->x[1] - (cj->loc[1] + shift[1]);
-      const float piz = spi->x[2] - (cj->loc[2] + shift[2]);
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
-
-        /* Recover pj */
-        struct part *pj = &parts_j[sort_j[pjd].i];
-        struct xpart *xpj = &xparts_j[sort_j[pjd].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        const float hj = pj->h;
-        const float pjx = pj->x[0] - cj->loc[0];
-        const float pjy = pj->x[1] - cj->loc[1];
-        const float pjz = pj->x[2] - cj->loc[2];
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles are in the correct frame after the shifts */
-        if (pix > shift_threshold_x || pix < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
-              pix, ci->width[0]);
-        if (piy > shift_threshold_y || piy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
-              piy, ci->width[1]);
-        if (piz > shift_threshold_z || piz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
-              piz, ci->width[2]);
-        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
-              pjx, ci->width[0]);
-        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
-              pjy, ci->width[1]);
-        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
-              pjz, ci->width[2]);
-
-        /* Check that particles have been drifted to the current time */
-        if (spi->ti_drift != e->ti_current)
-          error("Particle spi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hig2) {
-          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the parts in ci. */
-  }     /* do_ci_stars */
-
-  if (do_cj_stars) {
-    /* Pick-out the sorted lists. */
-    const struct sort_entry *restrict sort_i = ci->hydro.sort[sid];
-    const struct sort_entry *restrict sort_j = cj->stars.sort[sid];
-
-#ifdef SWIFT_DEBUG_CHECKS
-    /* Some constants used to checks that the parts are in the right frame */
-    const float shift_threshold_x =
-        2. * ci->width[0] +
-        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
-    const float shift_threshold_y =
-        2. * ci->width[1] +
-        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
-    const float shift_threshold_z =
-        2. * ci->width[2] +
-        2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part);
-#endif /* SWIFT_DEBUG_CHECKS */
-
-    /* Get some other useful values. */
-    const double hj_max = cj->hydro.h_max * kernel_gamma;
-    const int count_i = ci->hydro.count;
-    const int count_j = cj->stars.count;
-    struct part *restrict parts_i = ci->hydro.parts;
-    struct xpart *restrict xparts_i = ci->hydro.xparts;
-    struct spart *restrict sparts_j = cj->stars.parts;
-    const double di_max = sort_i[count_i - 1].d - rshift;
-    const float dx_max_rshift =
-        (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift;
-    const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort);
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
-         pjd++) {
-
-      /* Get a hold of the jth part in cj. */
-      struct spart *spj = &sparts_j[sort_j[pjd].i];
-      const float hj = spj->h;
-
-      /* Skip inactive particles */
-      if (!spart_is_active(spj, e)) continue;
-
-      /* Skip inactive particles */
-      if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue;
-
-      /* Compute distance from the other cell. */
-      const double px[3] = {spj->x[0], spj->x[1], spj->x[2]};
-      float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] +
-                   px[2] * runner_shift[sid][2];
-
-      /* Is there anything we need to interact with ? */
-      const double dj = dist - hj * kernel_gamma - dx_max_rshift;
-      if (dj - rshift > di_max) continue;
-
-      /* Get some additional information about pj */
-      const float hjg2 = hj * hj * kernel_gamma2;
-      const float pjx = spj->x[0] - cj->loc[0];
-      const float pjy = spj->x[1] - cj->loc[1];
-      const float pjz = spj->x[2] - cj->loc[2];
-
-      /* Loop over the parts in ci. */
-      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
-
-        /* Recover pi */
-        struct part *pi = &parts_i[sort_i[pid].i];
-        struct xpart *xpi = &xparts_i[sort_i[pid].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pi, e)) continue;
-
-        const float hi = pi->h;
-        const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
-        const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
-        const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles are in the correct frame after the shifts */
-        if (pix > shift_threshold_x || pix < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
-              pix, ci->width[0]);
-        if (piy > shift_threshold_y || piy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
-              piy, ci->width[1]);
-        if (piz > shift_threshold_z || piz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
-              piz, ci->width[2]);
-        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
-          error(
-              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
-              pjx, ci->width[0]);
-        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
-          error(
-              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
-              pjy, ci->width[1]);
-        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
-          error(
-              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
-              pjz, ci->width[2]);
-
-        /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current)
-          error("Particle pi not drifted to current time");
-        if (spj->ti_drift != e->ti_current)
-          error("Particle spj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hjg2) {
-
-          IACT_STARS(r2, dx, hj, hi, spj, pi, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in ci. */
-    }   /* loop over the parts in cj. */
-  }     /* Cell cj is active */
-
-  TIMER_TOC(TIMER_DOPAIR_STARS);
-}
-
-void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
-                         struct cell *restrict cj, int timer) {
-
-  TIMER_TIC;
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_stars = ci->nodeID == r->e->nodeID;
-  const int do_cj_stars = cj->nodeID == r->e->nodeID;
-#else
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_stars = cj->nodeID == r->e->nodeID;
-  const int do_cj_stars = ci->nodeID == r->e->nodeID;
-#endif
-  if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0)
-    DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj);
-  if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0)
-    DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci);
-
-  TIMER_TOC(TIMER_DOPAIR_STARS);
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * Version using a brute-force algorithm.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts_i The #part to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- * @param cj The second #cell.
- * @param sid The direction of the pair.
- * @param flipped Flag to check whether the cells have been flipped or not.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
-                          struct spart *restrict sparts_i, int *restrict ind,
-                          int scount, struct cell *restrict cj, const int sid,
-                          const int flipped, const double *shift) {
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int count_j = cj->hydro.count;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Early abort? */
-  if (count_j == 0) return;
-
-  /* Pick-out the sorted lists. */
-  const struct sort_entry *restrict sort_j = cj->hydro.sort[sid];
-  const float dxj = cj->hydro.dx_max_sort;
-
-  /* Sparts are on the left? */
-  if (!flipped) {
-
-    /* Loop over the sparts_i. */
-    for (int pid = 0; pid < scount; pid++) {
-
-      /* Get a hold of the ith spart in ci. */
-      struct spart *restrict spi = &sparts_i[ind[pid]];
-      const double pix = spi->x[0] - (shift[0]);
-      const double piy = spi->x[1] - (shift[1]);
-      const double piz = spi->x[2] - (shift[2]);
-      const float hi = spi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
-                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
-
-      /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[sort_j[pjd].i];
-        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        const double pjx = pj->x[0];
-        const double pjy = pj->x[1];
-        const double pjz = pj->x[2];
-        const float hj = pj->h;
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
-                       (float)(piz - pjz)};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (spi->ti_drift != e->ti_current)
-          error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hig2) {
-          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the sparts in ci. */
-  }
-
-  /* Sparts are on the right. */
-  else {
-
-    /* Loop over the sparts_i. */
-    for (int pid = 0; pid < scount; pid++) {
-
-      /* Get a hold of the ith spart in ci. */
-      struct spart *restrict spi = &sparts_i[ind[pid]];
-      const double pix = spi->x[0] - (shift[0]);
-      const double piy = spi->x[1] - (shift[1]);
-      const double piz = spi->x[2] - (shift[2]);
-      const float hi = spi->h;
-      const float hig2 = hi * hi * kernel_gamma2;
-      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
-                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
-
-      /* Loop over the parts in cj. */
-      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
-
-        /* Get a pointer to the jth particle. */
-        struct part *restrict pj = &parts_j[sort_j[pjd].i];
-        struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i];
-
-        /* Skip inhibited particles. */
-        if (part_is_inhibited(pj, e)) continue;
-
-        const double pjx = pj->x[0];
-        const double pjy = pj->x[1];
-        const double pjz = pj->x[2];
-        const float hj = pj->h;
-
-        /* Compute the pairwise distance. */
-        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
-                       (float)(piz - pjz)};
-        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-        /* Check that particles have been drifted to the current time */
-        if (spi->ti_drift != e->ti_current)
-          error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current)
-          error("Particle pj not drifted to current time");
-#endif
-
-        /* Hit or miss? */
-        if (r2 < hig2) {
-          IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-          runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj,
-                                              cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-          runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#endif
-        }
-      } /* loop over the parts in cj. */
-    }   /* loop over the sparts in ci. */
-  }
-}
+void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c);
+void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj);
 
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * Version using a brute-force algorithm.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts_i The #part to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- * @param cj The second #cell.
- * @param shift The shift vector to apply to the particles in ci.
- */
-void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci,
-                                struct spart *restrict sparts_i,
-                                int *restrict ind, int scount,
-                                struct cell *restrict cj, const double *shift) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int count_j = cj->hydro.count;
-  struct part *restrict parts_j = cj->hydro.parts;
-  struct xpart *restrict xparts_j = cj->hydro.xparts;
-
-  /* Early abort? */
-  if (count_j == 0) return;
-
-  /* Loop over the parts_i. */
-  for (int pid = 0; pid < scount; pid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct spart *restrict spi = &sparts_i[ind[pid]];
-
-    const double pix = spi->x[0] - (shift[0]);
-    const double piy = spi->x[1] - (shift[1]);
-    const double piz = spi->x[2] - (shift[2]);
-    const float hi = spi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!spart_is_active(spi, e))
-      error("Trying to correct smoothing length of inactive particle !");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_j; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Skip inhibited particles */
-      if (part_is_inhibited(pj, e)) continue;
-
-      const double pjx = pj->x[0];
-      const double pjy = pj->x[1];
-      const double pjz = pj->x[2];
-      const float hj = pj->h;
-
-      /* Compute the pairwise distance. */
-      float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
-                     (float)(piz - pjz)};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, hj, spi, pj, a, H);
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                            ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo,
-                                          ti_current);
-#endif
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
-
-/**
- * @brief Compute the interactions between a cell pair, but only for the
- *      given indices in ci.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts The #spart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- */
-void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci,
-                          struct spart *restrict sparts, int *restrict ind,
-                          int scount) {
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank) error("Should be run on a different node");
-#endif
-
-  const struct engine *e = r->e;
-  const integertime_t ti_current = e->ti_current;
-  const struct cosmology *cosmo = e->cosmology;
-
-  /* Cosmological terms */
-  const float a = cosmo->a;
-  const float H = cosmo->H;
-
-  const int count_i = ci->hydro.count;
-  struct part *restrict parts_j = ci->hydro.parts;
-  struct xpart *restrict xparts_j = ci->hydro.xparts;
-
-  /* Early abort? */
-  if (count_i == 0) return;
-
-  /* Loop over the parts in ci. */
-  for (int spid = 0; spid < scount; spid++) {
-
-    /* Get a hold of the ith part in ci. */
-    struct spart *spi = &sparts[ind[spid]];
-    const float spix[3] = {(float)(spi->x[0] - ci->loc[0]),
-                           (float)(spi->x[1] - ci->loc[1]),
-                           (float)(spi->x[2] - ci->loc[2])};
-    const float hi = spi->h;
-    const float hig2 = hi * hi * kernel_gamma2;
-
-#ifdef SWIFT_DEBUG_CHECKS
-    if (!spart_is_active(spi, e))
-      error("Inactive particle in subset function!");
-#endif
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < count_i; pjd++) {
-
-      /* Get a pointer to the jth particle. */
-      struct part *restrict pj = &parts_j[pjd];
-      struct xpart *restrict xpj = &xparts_j[pjd];
-
-      /* Early abort? */
-      if (part_is_inhibited(pj, e)) continue;
-
-      /* Compute the pairwise distance. */
-      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
-                            (float)(pj->x[1] - ci->loc[1]),
-                            (float)(pj->x[2] - ci->loc[2])};
-      float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]};
-      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
-
-#ifdef SWIFT_DEBUG_CHECKS
-      /* Check that particles have been drifted to the current time */
-      if (pj->ti_drift != e->ti_current)
-        error("Particle pj not drifted to current time");
-#endif
-
-      /* Hit or miss? */
-      if (r2 < hig2) {
-        IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-        runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj,
-                                            cosmo, ti_current);
-#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK)
-        runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj,
-                                          cosmo, ti_current);
-#endif
-      }
-    } /* loop over the parts in cj. */
-  }   /* loop over the parts in ci. */
-}
+void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer);
+void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
+                       int gettimer);
 
-/**
- * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called
- * depending on the optimisation level.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts The #spart to interact.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- */
 void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
                                  struct spart *restrict sparts,
-                                 int *restrict ind, int scount) {
+                                 int *restrict ind, int scount);
 
-  DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount);
-}
-
-/**
- * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called
- * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS
- * needs to be called at all.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param sparts_i The #spart to interact with @c cj.
- * @param ind The list of indices of particles in @c ci to interact with.
- * @param scount The number of particles in @c ind.
- * @param cj The second #cell.
- */
 void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci,
                                  struct spart *restrict sparts_i,
                                  int *restrict ind, int scount,
-                                 struct cell *restrict cj) {
-
-  const struct engine *e = r->e;
-
-  /* Anything to do here? */
-  if (cj->hydro.count == 0) return;
-
-  /* Get the relative distance between the pairs, wrapping. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  for (int k = 0; k < 3; k++) {
-    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
-      shift[k] = e->s->dim[k];
-    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
-      shift[k] = -e->s->dim[k];
-  }
-
-#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
-  DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift);
-#else
-  /* Get the sorting index. */
-  int sid = 0;
-  for (int k = 0; k < 3; k++)
-    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
-                         ? 0
-                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
-
-  /* Switch the cells around? */
-  const int flipped = runner_flip[sid];
-  sid = sortlistID[sid];
-
-  /* Has the cell cj been sorted? */
-  if (!(cj->hydro.sorted & (1 << sid)) ||
-      cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)
-    error("Interacting unsorted cells.");
-
-  DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift);
-#endif
-}
+                                 struct cell *restrict cj);
 
 void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts,
-                        int *ind, int scount, struct cell *cj, int gettimer) {
-
-  const struct engine *e = r->e;
-  struct space *s = e->s;
-
-  /* Should we even bother? */
-  if (!cell_is_active_stars(ci, e) &&
-      (cj == NULL || !cell_is_active_stars(cj, e)))
-    return;
-
-  /* Find out in which sub-cell of ci the parts are. */
-  struct cell *sub = NULL;
-  if (ci->split) {
-    for (int k = 0; k < 8; k++) {
-      if (ci->progeny[k] != NULL) {
-        if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] &&
-            &sparts[ind[0]] <
-                &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) {
-          sub = ci->progeny[k];
-          break;
-        }
-      }
-    }
-  }
-
-  /* Is this a single cell? */
-  if (cj == NULL) {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_self_stars_task(ci)) {
-
-      /* Loop over all progeny. */
-      DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0);
-      for (int j = 0; j < 8; j++)
-        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
-          DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0);
-
-    }
-
-    /* Otherwise, compute self-interaction. */
-    else
-      DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount);
-  } /* self-interaction. */
-
-  /* Otherwise, it's a pair interaction. */
-  else {
-
-    /* Recurse? */
-    if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
-        cell_can_recurse_in_pair_stars_task(cj, ci)) {
-
-      /* Get the type of pair and flip ci/cj if needed. */
-      double shift[3] = {0.0, 0.0, 0.0};
-      const int sid = space_getsid(s, &ci, &cj, shift);
-
-      struct cell_split_pair *csp = &cell_split_pairs[sid];
-      for (int k = 0; k < csp->count; k++) {
-        const int pid = csp->pairs[k].pid;
-        const int pjd = csp->pairs[k].pjd;
-        if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL)
-          DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount,
-                             cj->progeny[pjd], 0);
-        if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub)
-          DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount,
-                             ci->progeny[pid], 0);
-      }
-    }
-
-    /* Otherwise, compute the pair directly. */
-    else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) {
-
-      /* Do any of the cells need to be drifted first? */
-      if (cell_is_active_stars(ci, e)) {
-        if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!");
-        if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
-      }
-
-      DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj);
-    }
-
-  } /* otherwise, pair interaction. */
-}
-
-/**
- * @brief Determine which version of DOSELF1_STARS needs to be called depending
- * on the optimisation level.
- *
- * @param r #runner
- * @param c #cell c
- *
- */
-void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) {
-
-  const struct engine *restrict e = r->e;
-
-  /* Anything to do here? */
-  if (c->stars.count == 0) return;
-
-  /* Anything to do here? */
-  if (!cell_is_active_stars(c, e)) return;
-
-  /* Did we mess up the recursion? */
-  if (c->stars.h_max_old * kernel_gamma > c->dmin)
-    error("Cell smaller than smoothing length");
-
-  DOSELF1_STARS(r, c, 1);
-}
-
-#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid)                          \
-  ({                                                                        \
-    const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid];          \
-                                                                            \
-    for (int pjd = 0; pjd < cj->TYPE.count; pjd++) {                        \
-      const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i];                \
-      if (PART##_is_inhibited(p, e)) continue;                              \
-                                                                            \
-      const float d = p->x[0] * runner_shift[sid][0] +                      \
-                      p->x[1] * runner_shift[sid][1] +                      \
-                      p->x[2] * runner_shift[sid][2];                       \
-      if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
-              1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) &&           \
-          (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) >               \
-              cj->width[0] * 1.0e-10)                                       \
-        error(                                                              \
-            "particle shift diff exceeds dx_max_sort in cell cj. "          \
-            "cj->nodeID=%d "                                                \
-            "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE                \
-            ".dx_max_sort=%e "                                              \
-            "cj->" #TYPE                                                    \
-            ".dx_max_sort_old=%e, cellID=%i super->cellID=%i"               \
-            "cj->depth=%d cj->maxdepth=%d",                                 \
-            cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \
-            cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID,  \
-            cj->depth, cj->maxdepth);                                       \
-    }                                                                       \
-  })
-
-/**
- * @brief Determine which version of DOPAIR1_STARS needs to be called depending
- * on the orientation of the cells or whether DOPAIR1_STARS needs to be called
- * at all.
- *
- * @param r #runner
- * @param ci #cell ci
- * @param cj #cell cj
- *
- */
-void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) {
-
-  const struct engine *restrict e = r->e;
-
-  /* Get the sort ID. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  const int sid = space_getsid(e->s, &ci, &cj, shift);
-
-  const int ci_active = cell_is_active_stars(ci, e);
-  const int cj_active = cell_is_active_stars(cj, e);
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-  const int do_ci_stars = ci->nodeID == e->nodeID;
-  const int do_cj_stars = cj->nodeID == e->nodeID;
-#else
-  /* here we are updating the hydro -> switch ci, cj */
-  const int do_ci_stars = cj->nodeID == e->nodeID;
-  const int do_cj_stars = ci->nodeID == e->nodeID;
-#endif
-  const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 &&
-                     ci_active && do_ci_stars);
-  const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 &&
-                     cj_active && do_cj_stars);
-
-  /* Anything to do here? */
-  if (!do_ci && !do_cj) return;
-
-  /* Check that cells are drifted. */
-  if (do_ci &&
-      (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  /* Have the cells been sorted? */
-  if (do_ci && (!(ci->stars.sorted & (1 << sid)) ||
-                ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin))
-    error("Interacting unsorted cells.");
-
-  if (do_ci && (!(cj->hydro.sorted & (1 << sid)) ||
-                cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin))
-    error("Interacting unsorted cells.");
-
-  if (do_cj &&
-      (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e)))
-    error("Interacting undrifted cells.");
-
-  /* Have the cells been sorted? */
-  if (do_cj && (!(ci->hydro.sorted & (1 << sid)) ||
-                ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin))
-    error("Interacting unsorted cells.");
-
-  if (do_cj && (!(cj->stars.sorted & (1 << sid)) ||
-                cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin))
-    error("Interacting unsorted cells.");
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (do_ci) {
-    // MATTHIEU: This test is faulty. To be fixed...
-    // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid);
-    RUNNER_CHECK_SORT(stars, spart, ci, cj, sid);
-  }
-
-  if (do_cj) {
-    // MATTHIEU: This test is faulty. To be fixed...
-    // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid);
-    RUNNER_CHECK_SORT(stars, spart, cj, ci, sid);
-  }
-#endif /* SWIFT_DEBUG_CHECKS */
-
-#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS
-  DOPAIR1_STARS_NAIVE(r, ci, cj, 1);
-#else
-  DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift);
-#endif
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for pairs
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param gettimer Do we have a timer ?
- *
- * @todo Hard-code the sid on the recursive calls to avoid the
- * redundant computations to find the sid on-the-fly.
- */
-void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj,
-                       int gettimer) {
-
-  TIMER_TIC;
-
-  struct space *s = r->e->s;
-  const struct engine *e = r->e;
-
-  /* Should we even bother? */
-  const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
-                           cell_is_active_stars(ci, e);
-  const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
-                           cell_is_active_stars(cj, e);
-  if (!should_do_ci && !should_do_cj) return;
-
-  /* Get the type of pair and flip ci/cj if needed. */
-  double shift[3];
-  const int sid = space_getsid(s, &ci, &cj, shift);
-
-  /* Recurse? */
-  if (cell_can_recurse_in_pair_stars_task(ci, cj) &&
-      cell_can_recurse_in_pair_stars_task(cj, ci)) {
-    struct cell_split_pair *csp = &cell_split_pairs[sid];
-    for (int k = 0; k < csp->count; k++) {
-      const int pid = csp->pairs[k].pid;
-      const int pjd = csp->pairs[k].pjd;
-      if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
-        DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0);
-    }
-  }
-
-  /* Otherwise, compute the pair directly. */
-  else {
-
-#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
-    const int do_ci_stars = ci->nodeID == e->nodeID;
-    const int do_cj_stars = cj->nodeID == e->nodeID;
-#else
-    /* here we are updating the hydro -> switch ci, cj */
-    const int do_ci_stars = cj->nodeID == e->nodeID;
-    const int do_cj_stars = ci->nodeID == e->nodeID;
-#endif
-    const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 &&
-                      cell_is_active_stars(ci, e) && do_ci_stars;
-    const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 &&
-                      cell_is_active_stars(cj, e) && do_cj_stars;
-
-    if (do_ci) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (!cell_are_spart_drifted(ci, e))
-        error("Interacting undrifted cells (sparts).");
-
-      if (!cell_are_part_drifted(cj, e))
-        error("Interacting undrifted cells (parts).");
-
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->stars.sorted & (1 << sid)) ||
-          ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) {
-        error("Interacting unsorted cell (sparts).");
-      }
-
-      if (!(cj->hydro.sorted & (1 << sid)) ||
-          cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx)
-        error("Interacting unsorted cell (parts). %i", cj->nodeID);
-    }
-
-    if (do_cj) {
-
-      /* Make sure both cells are drifted to the current timestep. */
-      if (!cell_are_part_drifted(ci, e))
-        error("Interacting undrifted cells (parts).");
-
-      if (!cell_are_spart_drifted(cj, e))
-        error("Interacting undrifted cells (sparts).");
-
-      /* Do any of the cells need to be sorted first? */
-      if (!(ci->hydro.sorted & (1 << sid)) ||
-          ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) {
-        error("Interacting unsorted cell (parts).");
-      }
-
-      if (!(cj->stars.sorted & (1 << sid)) ||
-          cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) {
-        error("Interacting unsorted cell (sparts).");
-      }
-    }
-
-    if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_PAIR_STARS);
-}
-
-/**
- * @brief Compute grouped sub-cell interactions for self tasks
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param gettimer Do we have a timer ?
- */
-void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) {
-
-  TIMER_TIC;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  if (ci->nodeID != engine_rank)
-    error("This function should not be called on foreign cells");
-#endif
-
-  /* Should we even bother? */
-  if (ci->hydro.count == 0 || ci->stars.count == 0 ||
-      !cell_is_active_stars(ci, r->e))
-    return;
-
-  /* Recurse? */
-  if (cell_can_recurse_in_self_stars_task(ci)) {
-
-    /* Loop over all progeny. */
-    for (int k = 0; k < 8; k++)
-      if (ci->progeny[k] != NULL) {
-        DOSUB_SELF1_STARS(r, ci->progeny[k], 0);
-        for (int j = k + 1; j < 8; j++)
-          if (ci->progeny[j] != NULL)
-            DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0);
-      }
-  }
-
-  /* Otherwise, compute self-interaction. */
-  else {
-
-    /* Drift the cell to the current timestep if needed. */
-    if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell.");
-
-    DOSELF1_BRANCH_STARS(r, ci);
-  }
-
-  TIMER_TOC(TIMER_DOSUB_SELF_STARS);
-}
+                        int *ind, int scount, struct cell *cj, int gettimer);
diff --git a/src/runner_drift.c b/src/runner_drift.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c4376743cd50ffea4709cb471959864cedcc4b7
--- /dev/null
+++ b/src/runner_drift.c
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Drift all part in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_part(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_part(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_part);
+}
+
+/**
+ * @brief Drift all gpart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_gpart(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_gpart);
+}
+
+/**
+ * @brief Drift all spart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_spart(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_spart);
+}
+
+/**
+ * @brief Drift all bpart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_bpart(c, r->e, 0);
+
+  if (timer) TIMER_TOC(timer_drift_bpart);
+}
diff --git a/src/runner_ghost.c b/src/runner_ghost.c
new file mode 100644
index 0000000000000000000000000000000000000000..2c1e8cd7190858014f7914e293b5ffdadbdc2707
--- /dev/null
+++ b/src/runner_ghost.c
@@ -0,0 +1,1355 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "feedback.h"
+#include "pressure_floor.h"
+#include "pressure_floor_iact.h"
+#include "space_getsid.h"
+#include "stars.h"
+#include "timers.h"
+#include "tracers.h"
+
+/* Import the density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the stars density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) {
+
+  struct spart *restrict sparts = c->stars.parts;
+  const struct engine *e = r->e;
+  const struct unit_system *us = e->internal_units;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const struct cosmology *cosmo = e->cosmology;
+  const struct feedback_props *feedback_props = e->feedback_props;
+  const float stars_h_max = e->hydro_properties->h_max;
+  const float stars_h_min = e->hydro_properties->h_min;
+  const float eps = e->stars_properties->h_tolerance;
+  const float stars_eta_dim =
+      pow_dimension(e->stars_properties->eta_neighbours);
+  const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations;
+  int redo = 0, scount = 0;
+
+  /* Running value of the maximal smoothing length */
+  double h_max = c->stars.h_max;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != e->nodeID)
+    error("Running the star ghost on a foreign node!");
+#endif
+
+  /* Anything to do here? */
+  if (c->stars.count == 0) return;
+  if (!cell_is_active_stars(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_stars_ghost(r, c->progeny[k], 0);
+
+        /* Update h_max */
+        h_max = max(h_max, c->progeny[k]->stars.h_max);
+      }
+    }
+  } else {
+
+    /* Init the list of active particles that have to be updated. */
+    int *sid = NULL;
+    float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
+    if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL)
+      error("Can't allocate memory for sid.");
+    if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
+      error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL)
+      error("Can't allocate memory for right.");
+    for (int k = 0; k < c->stars.count; k++)
+      if (spart_is_active(&sparts[k], e) &&
+          feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) {
+        sid[scount] = k;
+        h_0[scount] = sparts[k].h;
+        left[scount] = 0.f;
+        right[scount] = stars_h_max;
+        ++scount;
+      }
+
+    /* While there are particles that need to be updated... */
+    for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter;
+         num_reruns++) {
+
+      /* Reset the redo-count. */
+      redo = 0;
+
+      /* Loop over the remaining active parts in this cell. */
+      for (int i = 0; i < scount; i++) {
+
+        /* Get a direct pointer on the part. */
+        struct spart *sp = &sparts[sid[i]];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Is this part within the timestep? */
+        if (!spart_is_active(sp, e))
+          error("Ghost applied to inactive particle");
+#endif
+
+        /* Get some useful values */
+        const float h_init = h_0[i];
+        const float h_old = sp->h;
+        const float h_old_dim = pow_dimension(h_old);
+        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
+
+        float h_new;
+        int has_no_neighbours = 0;
+
+        if (sp->density.wcount == 0.f) { /* No neighbours case */
+
+          /* Flag that there were no neighbours */
+          has_no_neighbours = 1;
+
+          /* Double h and try again */
+          h_new = 2.f * h_old;
+
+        } else {
+
+          /* Finish the density calculation */
+          stars_end_density(sp, cosmo);
+
+          /* Compute one step of the Newton-Raphson scheme */
+          const float n_sum = sp->density.wcount * h_old_dim;
+          const float n_target = stars_eta_dim;
+          const float f = n_sum - n_target;
+          const float f_prime =
+              sp->density.wcount_dh * h_old_dim +
+              hydro_dimension * sp->density.wcount * h_old_dim_minus_one;
+
+          /* Improve the bisection bounds */
+          if (n_sum < n_target)
+            left[i] = max(left[i], h_old);
+          else if (n_sum > n_target)
+            right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
+          /* Skip if h is already h_max and we don't have enough neighbours
+           */
+          /* Same if we are below h_min */
+          if (((sp->h >= stars_h_max) && (f < 0.f)) ||
+              ((sp->h <= stars_h_min) && (f > 0.f))) {
+
+            stars_reset_feedback(sp);
+
+            /* Only do feedback if stars have a reasonable birth time */
+            if (feedback_do_feedback(sp)) {
+
+              const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+              const integertime_t ti_begin =
+                  get_integer_time_begin(e->ti_current - 1, sp->time_bin);
+
+              /* Get particle time-step */
+              double dt;
+              if (with_cosmology) {
+                dt = cosmology_get_delta_time(e->cosmology, ti_begin,
+                                              ti_begin + ti_step);
+              } else {
+                dt = get_timestep(sp->time_bin, e->time_base);
+              }
+
+              /* Calculate age of the star at current time */
+              double star_age_end_of_step;
+              if (with_cosmology) {
+                star_age_end_of_step =
+                    cosmology_get_delta_time_from_scale_factors(
+                        cosmo, (double)sp->birth_scale_factor, cosmo->a);
+              } else {
+                star_age_end_of_step = (float)e->time - sp->birth_time;
+              }
+
+              /* Has this star been around for a while ? */
+              if (star_age_end_of_step > 0.) {
+
+                /* Age of the star at the start of the step */
+                const double star_age_beg_of_step =
+                    max(star_age_end_of_step - dt, 0.);
+
+                /* Compute the stellar evolution  */
+                feedback_evolve_spart(sp, feedback_props, cosmo, us,
+                                      star_age_beg_of_step, dt);
+              } else {
+
+                /* Reset the feedback fields of the star particle */
+                feedback_reset_feedback(sp, feedback_props);
+              }
+            } else {
+
+              feedback_reset_feedback(sp, feedback_props);
+            }
+
+            /* Ok, we are done with this particle */
+            continue;
+          }
+
+          /* Normal case: Use Newton-Raphson to get a better value of h */
+
+          /* Avoid floating point exception from f_prime = 0 */
+          h_new = h_old - f / (f_prime + FLT_MIN);
+
+          /* Be verbose about the particles that struggle to converge */
+          if (num_reruns > max_smoothing_iter - 10) {
+
+            message(
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
+          }
+
+          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
+          h_new = min(h_new, 2.f * h_old);
+          h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
+        }
+
+        /* Check whether the particle has an inappropriate smoothing length
+         */
+        if (fabsf(h_new - h_old) > eps * h_old) {
+
+          /* Ok, correct then */
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            sp->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            sp->h = h_new;
+          }
+
+          /* If below the absolute maximum, try again */
+          if (sp->h < stars_h_max && sp->h > stars_h_min) {
+
+            /* Flag for another round of fun */
+            sid[redo] = sid[i];
+            h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
+            redo += 1;
+
+            /* Re-initialise everything */
+            stars_init_spart(sp);
+            feedback_init_spart(sp);
+
+            /* Off we go ! */
+            continue;
+
+          } else if (sp->h <= stars_h_min) {
+
+            /* Ok, this particle is a lost cause... */
+            sp->h = stars_h_min;
+
+          } else if (sp->h >= stars_h_max) {
+
+            /* Ok, this particle is a lost cause... */
+            sp->h = stars_h_max;
+
+            /* Do some damage control if no neighbours at all were found */
+            if (has_no_neighbours) {
+              stars_spart_has_no_neighbours(sp, cosmo);
+            }
+
+          } else {
+            error(
+                "Fundamental problem with the smoothing length iteration "
+                "logic.");
+          }
+        }
+
+        /* We now have a particle whose smoothing length has converged */
+
+        /* Check if h_max has increased */
+        h_max = max(h_max, sp->h);
+
+        stars_reset_feedback(sp);
+
+        /* Only do feedback if stars have a reasonable birth time */
+        if (feedback_do_feedback(sp)) {
+
+          const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(e->ti_current - 1, sp->time_bin);
+
+          /* Get particle time-step */
+          double dt;
+          if (with_cosmology) {
+            dt = cosmology_get_delta_time(e->cosmology, ti_begin,
+                                          ti_begin + ti_step);
+          } else {
+            dt = get_timestep(sp->time_bin, e->time_base);
+          }
+
+          /* Calculate age of the star at current time */
+          double star_age_end_of_step;
+          if (with_cosmology) {
+            star_age_end_of_step = cosmology_get_delta_time_from_scale_factors(
+                cosmo, sp->birth_scale_factor, (float)cosmo->a);
+          } else {
+            star_age_end_of_step = (float)e->time - sp->birth_time;
+          }
+
+          /* Has this star been around for a while ? */
+          if (star_age_end_of_step > 0.) {
+
+            /* Age of the star at the start of the step */
+            const double star_age_beg_of_step =
+                max(star_age_end_of_step - dt, 0.);
+
+            /* Compute the stellar evolution  */
+            feedback_evolve_spart(sp, feedback_props, cosmo, us,
+                                  star_age_beg_of_step, dt);
+          } else {
+
+            /* Reset the feedback fields of the star particle */
+            feedback_reset_feedback(sp, feedback_props);
+          }
+        } else {
+
+          /* Reset the feedback fields of the star particle */
+          feedback_reset_feedback(sp, feedback_props);
+        }
+      }
+
+      /* We now need to treat the particles whose smoothing length had not
+       * converged again */
+
+      /* Re-set the counter for the next loop (potentially). */
+      scount = redo;
+      if (scount > 0) {
+
+        /* Climb up the cell hierarchy. */
+        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
+
+          /* Run through this cell's density interactions. */
+          for (struct link *l = finger->stars.density; l != NULL; l = l->next) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+            if (l->t->ti_run < r->e->ti_current)
+              error("Density task should have been run.");
+#endif
+
+            /* Self-interaction? */
+            if (l->t->type == task_type_self)
+              runner_doself_subset_branch_stars_density(r, finger, sparts, sid,
+                                                        scount);
+
+            /* Otherwise, pair interaction? */
+            else if (l->t->type == task_type_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dopair_subset_branch_stars_density(
+                    r, finger, sparts, sid, scount, l->t->cj);
+              else
+                runner_dopair_subset_branch_stars_density(
+                    r, finger, sparts, sid, scount, l->t->ci);
+            }
+
+            /* Otherwise, sub-self interaction? */
+            else if (l->t->type == task_type_sub_self)
+              runner_dosub_subset_stars_density(r, finger, sparts, sid, scount,
+                                                NULL, 1);
+
+            /* Otherwise, sub-pair interaction? */
+            else if (l->t->type == task_type_sub_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dosub_subset_stars_density(r, finger, sparts, sid,
+                                                  scount, l->t->cj, 1);
+              else
+                runner_dosub_subset_stars_density(r, finger, sparts, sid,
+                                                  scount, l->t->ci, 1);
+            }
+          }
+        }
+      }
+    }
+
+    if (scount) {
+      error("Smoothing length failed to converge on %i particles.", scount);
+    }
+
+    /* Be clean */
+    free(left);
+    free(right);
+    free(sid);
+    free(h_0);
+  }
+
+  /* Update h_max */
+  c->stars.h_max = h_max;
+
+  /* The ghost may not always be at the top level.
+   * Therefore we need to update h_max between the super- and top-levels */
+  if (c->stars.ghost) {
+    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
+      atomic_max_d(&tmp->stars.h_max, h_max);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_stars_ghost);
+}
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c,
+                                         int timer) {
+
+  struct bpart *restrict bparts = c->black_holes.parts;
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const float black_holes_h_max = e->hydro_properties->h_max;
+  const float black_holes_h_min = e->hydro_properties->h_min;
+  const float eps = e->black_holes_properties->h_tolerance;
+  const float black_holes_eta_dim =
+      pow_dimension(e->black_holes_properties->eta_neighbours);
+  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
+  int redo = 0, bcount = 0;
+
+  /* Running value of the maximal smoothing length */
+  double h_max = c->black_holes.h_max;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (c->black_holes.count == 0) return;
+  if (!cell_is_active_black_holes(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_black_holes_density_ghost(r, c->progeny[k], 0);
+
+        /* Update h_max */
+        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
+      }
+    }
+  } else {
+
+    /* Init the list of active particles that have to be updated. */
+    int *sid = NULL;
+    float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
+    if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for sid.");
+    if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL)
+      error("Can't allocate memory for right.");
+    for (int k = 0; k < c->black_holes.count; k++)
+      if (bpart_is_active(&bparts[k], e)) {
+        sid[bcount] = k;
+        h_0[bcount] = bparts[k].h;
+        left[bcount] = 0.f;
+        right[bcount] = black_holes_h_max;
+        ++bcount;
+      }
+
+    /* While there are particles that need to be updated... */
+    for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter;
+         num_reruns++) {
+
+      /* Reset the redo-count. */
+      redo = 0;
+
+      /* Loop over the remaining active parts in this cell. */
+      for (int i = 0; i < bcount; i++) {
+
+        /* Get a direct pointer on the part. */
+        struct bpart *bp = &bparts[sid[i]];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Is this part within the timestep? */
+        if (!bpart_is_active(bp, e))
+          error("Ghost applied to inactive particle");
+#endif
+
+        /* Get some useful values */
+        const float h_init = h_0[i];
+        const float h_old = bp->h;
+        const float h_old_dim = pow_dimension(h_old);
+        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
+
+        float h_new;
+        int has_no_neighbours = 0;
+
+        if (bp->density.wcount == 0.f) { /* No neighbours case */
+
+          /* Flag that there were no neighbours */
+          has_no_neighbours = 1;
+
+          /* Double h and try again */
+          h_new = 2.f * h_old;
+
+        } else {
+
+          /* Finish the density calculation */
+          black_holes_end_density(bp, cosmo);
+
+          /* Compute one step of the Newton-Raphson scheme */
+          const float n_sum = bp->density.wcount * h_old_dim;
+          const float n_target = black_holes_eta_dim;
+          const float f = n_sum - n_target;
+          const float f_prime =
+              bp->density.wcount_dh * h_old_dim +
+              hydro_dimension * bp->density.wcount * h_old_dim_minus_one;
+
+          /* Improve the bisection bounds */
+          if (n_sum < n_target)
+            left[i] = max(left[i], h_old);
+          else if (n_sum > n_target)
+            right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
+          /* Skip if h is already h_max and we don't have enough neighbours
+           */
+          /* Same if we are below h_min */
+          if (((bp->h >= black_holes_h_max) && (f < 0.f)) ||
+              ((bp->h <= black_holes_h_min) && (f > 0.f))) {
+
+            black_holes_reset_feedback(bp);
+
+            /* Ok, we are done with this particle */
+            continue;
+          }
+
+          /* Normal case: Use Newton-Raphson to get a better value of h */
+
+          /* Avoid floating point exception from f_prime = 0 */
+          h_new = h_old - f / (f_prime + FLT_MIN);
+
+          /* Be verbose about the particles that struggle to converge */
+          if (num_reruns > max_smoothing_iter - 10) {
+
+            message(
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
+          }
+
+          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
+          h_new = min(h_new, 2.f * h_old);
+          h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
+        }
+
+        /* Check whether the particle has an inappropriate smoothing length
+         */
+        if (fabsf(h_new - h_old) > eps * h_old) {
+
+          /* Ok, correct then */
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            bp->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            bp->h = h_new;
+          }
+
+          /* If below the absolute maximum, try again */
+          if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) {
+
+            /* Flag for another round of fun */
+            sid[redo] = sid[i];
+            h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
+            redo += 1;
+
+            /* Re-initialise everything */
+            black_holes_init_bpart(bp);
+
+            /* Off we go ! */
+            continue;
+
+          } else if (bp->h <= black_holes_h_min) {
+
+            /* Ok, this particle is a lost cause... */
+            bp->h = black_holes_h_min;
+
+          } else if (bp->h >= black_holes_h_max) {
+
+            /* Ok, this particle is a lost cause... */
+            bp->h = black_holes_h_max;
+
+            /* Do some damage control if no neighbours at all were found */
+            if (has_no_neighbours) {
+              black_holes_bpart_has_no_neighbours(bp, cosmo);
+            }
+
+          } else {
+            error(
+                "Fundamental problem with the smoothing length iteration "
+                "logic.");
+          }
+        }
+
+        /* We now have a particle whose smoothing length has converged */
+
+        black_holes_reset_feedback(bp);
+
+        /* Check if h_max has increased */
+        h_max = max(h_max, bp->h);
+      }
+
+      /* We now need to treat the particles whose smoothing length had not
+       * converged again */
+
+      /* Re-set the counter for the next loop (potentially). */
+      bcount = redo;
+      if (bcount > 0) {
+
+        /* Climb up the cell hierarchy. */
+        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
+
+          /* Run through this cell's density interactions. */
+          for (struct link *l = finger->black_holes.density; l != NULL;
+               l = l->next) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+            if (l->t->ti_run < r->e->ti_current)
+              error("Density task should have been run.");
+#endif
+
+            /* Self-interaction? */
+            if (l->t->type == task_type_self)
+              runner_doself_subset_branch_bh_density(r, finger, bparts, sid,
+                                                     bcount);
+
+            /* Otherwise, pair interaction? */
+            else if (l->t->type == task_type_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
+                                                       bcount, l->t->cj);
+              else
+                runner_dopair_subset_branch_bh_density(r, finger, bparts, sid,
+                                                       bcount, l->t->ci);
+            }
+
+            /* Otherwise, sub-self interaction? */
+            else if (l->t->type == task_type_sub_self)
+              runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
+                                             NULL, 1);
+
+            /* Otherwise, sub-pair interaction? */
+            else if (l->t->type == task_type_sub_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
+                                               l->t->cj, 1);
+              else
+                runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount,
+                                               l->t->ci, 1);
+            }
+          }
+        }
+      }
+    }
+
+    if (bcount) {
+      error("Smoothing length failed to converge on %i particles.", bcount);
+    }
+
+    /* Be clean */
+    free(left);
+    free(right);
+    free(sid);
+    free(h_0);
+  }
+
+  /* Update h_max */
+  c->black_holes.h_max = h_max;
+
+  /* The ghost may not always be at the top level.
+   * Therefore we need to update h_max between the super- and top-levels */
+  if (c->black_holes.density_ghost) {
+    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
+      atomic_max_d(&tmp->black_holes.h_max, h_max);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
+}
+
+/**
+ * @brief Intermediate task after the BHs have done their swallowing step.
+ * This is used to update the BH quantities if necessary.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c,
+                                         int timer) {
+
+  struct bpart *restrict bparts = c->black_holes.parts;
+  const int count = c->black_holes.count;
+  const struct engine *e = r->e;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct bpart *bp = &bparts[i];
+
+      if (bpart_is_active(bp, e)) {
+
+        /* Compute the final operations for repositioning of this BH */
+        black_holes_end_reposition(bp, e->black_holes_properties,
+                                   e->physical_constants, e->cosmology);
+
+        /* Get particle time-step */
+        double dt;
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(bp->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(e->ti_current - 1, bp->time_bin);
+
+          dt = cosmology_get_delta_time(e->cosmology, ti_begin,
+                                        ti_begin + ti_step);
+        } else {
+          dt = get_timestep(bp->time_bin, e->time_base);
+        }
+
+        /* Compute variables required for the feedback loop */
+        black_holes_prepare_feedback(bp, e->black_holes_properties,
+                                     e->physical_constants, e->cosmology, dt);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_black_holes_ghost);
+}
+
+/**
+ * @brief Intermediate task after the gradient loop that does final operations
+ * on the gradient quantities and optionally slope limits the gradients
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) {
+
+#ifdef EXTRA_HYDRO_LOOP
+
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int count = c->hydro.count;
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const double time_base = e->time_base;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct part *restrict p = &parts[i];
+      struct xpart *restrict xp = &xparts[i];
+
+      if (part_is_active(p, e)) {
+
+        /* Finish the gradient calculation */
+        hydro_end_gradient(p);
+
+        /* As of here, particle force variables will be set. */
+
+        /* Calculate the time-step for passing to hydro_prepare_force.
+         * This is the physical time between the start and end of the time-step
+         * without any scale-factor powers. */
+        double dt_alpha;
+
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(p->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(ti_current - 1, p->time_bin);
+
+          dt_alpha =
+              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+        } else {
+          dt_alpha = get_timestep(p->time_bin, time_base);
+        }
+
+        /* Compute variables required for the force loop */
+        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
+
+        /* The particle force values are now set.  Do _NOT_
+           try to read any particle density variables! */
+
+        /* Prepare the particle for the force loop over neighbours */
+        hydro_reset_acceleration(p);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_extra_ghost);
+
+#else
+  error("SWIFT was not compiled with the extra hydro loop activated.");
+#endif
+}
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
+
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const struct engine *e = r->e;
+  const struct space *s = e->s;
+  const struct hydro_space *hs = &s->hs;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct chemistry_global_data *chemistry = e->chemistry;
+
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+
+  const float hydro_h_max = e->hydro_properties->h_max;
+  const float hydro_h_min = e->hydro_properties->h_min;
+  const float eps = e->hydro_properties->h_tolerance;
+  const float hydro_eta_dim =
+      pow_dimension(e->hydro_properties->eta_neighbours);
+  const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
+  int redo = 0, count = 0;
+
+  /* Running value of the maximal smoothing length */
+  double h_max = c->hydro.h_max;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (c->hydro.count == 0) return;
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_ghost(r, c->progeny[k], 0);
+
+        /* Update h_max */
+        h_max = max(h_max, c->progeny[k]->hydro.h_max);
+      }
+    }
+  } else {
+
+    /* Init the list of active particles that have to be updated and their
+     * current smoothing lengths. */
+    int *pid = NULL;
+    float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
+    if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for pid.");
+    if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for right.");
+    for (int k = 0; k < c->hydro.count; k++)
+      if (part_is_active(&parts[k], e)) {
+        pid[count] = k;
+        h_0[count] = parts[k].h;
+        left[count] = 0.f;
+        right[count] = hydro_h_max;
+        ++count;
+      }
+
+    /* While there are particles that need to be updated... */
+    for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter;
+         num_reruns++) {
+
+      /* Reset the redo-count. */
+      redo = 0;
+
+      /* Loop over the remaining active parts in this cell. */
+      for (int i = 0; i < count; i++) {
+
+        /* Get a direct pointer on the part. */
+        struct part *p = &parts[pid[i]];
+        struct xpart *xp = &xparts[pid[i]];
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Is this part within the timestep? */
+        if (!part_is_active(p, e)) error("Ghost applied to inactive particle");
+#endif
+
+        /* Get some useful values */
+        const float h_init = h_0[i];
+        const float h_old = p->h;
+        const float h_old_dim = pow_dimension(h_old);
+        const float h_old_dim_minus_one = pow_dimension_minus_one(h_old);
+
+        float h_new;
+        int has_no_neighbours = 0;
+
+        if (p->density.wcount == 0.f) { /* No neighbours case */
+
+          /* Flag that there were no neighbours */
+          has_no_neighbours = 1;
+
+          /* Double h and try again */
+          h_new = 2.f * h_old;
+
+        } else {
+
+          /* Finish the density calculation */
+          hydro_end_density(p, cosmo);
+          chemistry_end_density(p, chemistry, cosmo);
+          pressure_floor_end_density(p, cosmo);
+
+          /* Compute one step of the Newton-Raphson scheme */
+          const float n_sum = p->density.wcount * h_old_dim;
+          const float n_target = hydro_eta_dim;
+          const float f = n_sum - n_target;
+          const float f_prime =
+              p->density.wcount_dh * h_old_dim +
+              hydro_dimension * p->density.wcount * h_old_dim_minus_one;
+
+          /* Improve the bisection bounds */
+          if (n_sum < n_target)
+            left[i] = max(left[i], h_old);
+          else if (n_sum > n_target)
+            right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
+          /* Skip if h is already h_max and we don't have enough neighbours */
+          /* Same if we are below h_min */
+          if (((p->h >= hydro_h_max) && (f < 0.f)) ||
+              ((p->h <= hydro_h_min) && (f > 0.f))) {
+
+          /* We have a particle whose smoothing length is already set (wants
+           * to be larger but has already hit the maximum OR wants to be
+           * smaller but has already reached the minimum). So, just tidy up
+           * as if the smoothing length had converged correctly  */
+
+#ifdef EXTRA_HYDRO_LOOP
+
+            /* As of here, particle gradient variables will be set. */
+            /* The force variables are set in the extra ghost. */
+
+            /* Compute variables required for the gradient loop */
+            hydro_prepare_gradient(p, xp, cosmo);
+
+            /* The particle gradient values are now set.  Do _NOT_
+               try to read any particle density variables! */
+
+            /* Prepare the particle for the gradient loop over neighbours
+             */
+            hydro_reset_gradient(p);
+
+#else
+            const struct hydro_props *hydro_props = e->hydro_properties;
+
+            /* Calculate the time-step for passing to hydro_prepare_force, used
+             * for the evolution of alpha factors (i.e. those involved in the
+             * artificial viscosity and thermal conduction terms) */
+            const double time_base = e->time_base;
+            const integertime_t ti_current = e->ti_current;
+            double dt_alpha;
+
+            if (with_cosmology) {
+              const integertime_t ti_step = get_integer_timestep(p->time_bin);
+              const integertime_t ti_begin =
+                  get_integer_time_begin(ti_current - 1, p->time_bin);
+
+              dt_alpha =
+                  cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+            } else {
+              dt_alpha = get_timestep(p->time_bin, time_base);
+            }
+
+            /* As of here, particle force variables will be set. */
+
+            /* Compute variables required for the force loop */
+            hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
+
+            /* The particle force values are now set.  Do _NOT_
+               try to read any particle density variables! */
+
+            /* Prepare the particle for the force loop over neighbours */
+            hydro_reset_acceleration(p);
+
+#endif /* EXTRA_HYDRO_LOOP */
+
+            /* Ok, we are done with this particle */
+            continue;
+          }
+
+          /* Normal case: Use Newton-Raphson to get a better value of h */
+
+          /* Avoid floating point exception from f_prime = 0 */
+          h_new = h_old - f / (f_prime + FLT_MIN);
+
+          /* Be verbose about the particles that struggle to converge */
+          if (num_reruns > max_smoothing_iter - 10) {
+
+            message(
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
+          }
+
+#ifdef SWIFT_DEBUG_CHECKS
+          if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old))
+            error(
+                "Smoothing length correction not going in the right direction");
+#endif
+
+          /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
+          h_new = min(h_new, 2.f * h_old);
+          h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
+        }
+
+        /* Check whether the particle has an inappropriate smoothing length
+         */
+        if (fabsf(h_new - h_old) > eps * h_old) {
+
+          /* Ok, correct then */
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            p->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            p->h = h_new;
+          }
+
+          /* If within the allowed range, try again */
+          if (p->h < hydro_h_max && p->h > hydro_h_min) {
+
+            /* Flag for another round of fun */
+            pid[redo] = pid[i];
+            h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
+            redo += 1;
+
+            /* Re-initialise everything */
+            hydro_init_part(p, hs);
+            chemistry_init_part(p, chemistry);
+            pressure_floor_init_part(p, xp);
+            tracers_after_init(p, xp, e->internal_units, e->physical_constants,
+                               with_cosmology, e->cosmology,
+                               e->hydro_properties, e->cooling_func, e->time);
+
+            /* Off we go ! */
+            continue;
+
+          } else if (p->h <= hydro_h_min) {
+
+            /* Ok, this particle is a lost cause... */
+            p->h = hydro_h_min;
+
+          } else if (p->h >= hydro_h_max) {
+
+            /* Ok, this particle is a lost cause... */
+            p->h = hydro_h_max;
+
+            /* Do some damage control if no neighbours at all were found */
+            if (has_no_neighbours) {
+              hydro_part_has_no_neighbours(p, xp, cosmo);
+              chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo);
+              pressure_floor_part_has_no_neighbours(p, xp, cosmo);
+            }
+
+          } else {
+            error(
+                "Fundamental problem with the smoothing length iteration "
+                "logic.");
+          }
+        }
+
+        /* We now have a particle whose smoothing length has converged */
+
+        /* Check if h_max is increased */
+        h_max = max(h_max, p->h);
+
+#ifdef EXTRA_HYDRO_LOOP
+
+        /* As of here, particle gradient variables will be set. */
+        /* The force variables are set in the extra ghost. */
+
+        /* Compute variables required for the gradient loop */
+        hydro_prepare_gradient(p, xp, cosmo);
+
+        /* The particle gradient values are now set.  Do _NOT_
+           try to read any particle density variables! */
+
+        /* Prepare the particle for the gradient loop over neighbours */
+        hydro_reset_gradient(p);
+
+#else
+        const struct hydro_props *hydro_props = e->hydro_properties;
+
+        /* Calculate the time-step for passing to hydro_prepare_force, used
+         * for the evolution of alpha factors (i.e. those involved in the
+         * artificial viscosity and thermal conduction terms) */
+        const double time_base = e->time_base;
+        const integertime_t ti_current = e->ti_current;
+        double dt_alpha;
+
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(p->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(ti_current - 1, p->time_bin);
+
+          dt_alpha =
+              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+        } else {
+          dt_alpha = get_timestep(p->time_bin, time_base);
+        }
+
+        /* As of here, particle force variables will be set. */
+
+        /* Compute variables required for the force loop */
+        hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha);
+
+        /* The particle force values are now set.  Do _NOT_
+           try to read any particle density variables! */
+
+        /* Prepare the particle for the force loop over neighbours */
+        hydro_reset_acceleration(p);
+
+#endif /* EXTRA_HYDRO_LOOP */
+      }
+
+      /* We now need to treat the particles whose smoothing length had not
+       * converged again */
+
+      /* Re-set the counter for the next loop (potentially). */
+      count = redo;
+      if (count > 0) {
+
+        /* Climb up the cell hierarchy. */
+        for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
+
+          /* Run through this cell's density interactions. */
+          for (struct link *l = finger->hydro.density; l != NULL; l = l->next) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+            if (l->t->ti_run < r->e->ti_current)
+              error("Density task should have been run.");
+#endif
+
+            /* Self-interaction? */
+            if (l->t->type == task_type_self)
+              runner_doself_subset_branch_density(r, finger, parts, pid, count);
+
+            /* Otherwise, pair interaction? */
+            else if (l->t->type == task_type_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dopair_subset_branch_density(r, finger, parts, pid,
+                                                    count, l->t->cj);
+              else
+                runner_dopair_subset_branch_density(r, finger, parts, pid,
+                                                    count, l->t->ci);
+            }
+
+            /* Otherwise, sub-self interaction? */
+            else if (l->t->type == task_type_sub_self)
+              runner_dosub_subset_density(r, finger, parts, pid, count, NULL,
+                                          1);
+
+            /* Otherwise, sub-pair interaction? */
+            else if (l->t->type == task_type_sub_pair) {
+
+              /* Left or right? */
+              if (l->t->ci == finger)
+                runner_dosub_subset_density(r, finger, parts, pid, count,
+                                            l->t->cj, 1);
+              else
+                runner_dosub_subset_density(r, finger, parts, pid, count,
+                                            l->t->ci, 1);
+            }
+          }
+        }
+      }
+    }
+
+    if (count) {
+      error("Smoothing length failed to converge on %i particles.", count);
+    }
+
+    /* Be clean */
+    free(left);
+    free(right);
+    free(pid);
+    free(h_0);
+  }
+
+  /* Update h_max */
+  c->hydro.h_max = h_max;
+
+  /* The ghost may not always be at the top level.
+   * Therefore we need to update h_max between the super- and top-levels */
+  if (c->hydro.ghost) {
+    for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) {
+      atomic_max_d(&tmp->hydro.h_max, h_max);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_ghost);
+}
diff --git a/src/runner_main.c b/src/runner_main.c
new file mode 100644
index 0000000000000000000000000000000000000000..a674b64ae671bf33df0b5ba9eaa951097d738ba9
--- /dev/null
+++ b/src/runner_main.c
@@ -0,0 +1,495 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "engine.h"
+#include "scheduler.h"
+#include "space_getsid.h"
+#include "timers.h"
+
+/* Import the gravity loop functions. */
+#include "runner_doiact_grav.h"
+
+/* Import the density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the gradient loop functions (if required). */
+#ifdef EXTRA_HYDRO_LOOP
+#define FUNCTION gradient
+#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+#endif
+
+/* Import the force loop functions. */
+#define FUNCTION force
+#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the limiter loop functions. */
+#define FUNCTION limiter
+#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
+#include "runner_doiact_hydro.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
+/* Import the stars density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the stars feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_stars.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole density loop functions. */
+#define FUNCTION density
+#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION swallow
+#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/* Import the black hole feedback loop functions. */
+#define FUNCTION feedback
+#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK
+#include "runner_doiact_black_holes.h"
+#undef FUNCTION_TASK_LOOP
+#undef FUNCTION
+
+/**
+ * @brief The #runner main thread routine.
+ *
+ * @param data A pointer to this thread's data.
+ */
+void *runner_main(void *data) {
+
+  struct runner *r = (struct runner *)data;
+  struct engine *e = r->e;
+  struct scheduler *sched = &e->sched;
+  unsigned int seed = r->id;
+  pthread_setspecific(sched->local_seed_pointer, &seed);
+  /* Main loop. */
+  while (1) {
+
+    /* Wait at the barrier. */
+    engine_barrier(e);
+
+    /* Can we go home yet? */
+    if (e->step_props & engine_step_prop_done) break;
+
+    /* Re-set the pointer to the previous task, as there is none. */
+    struct task *t = NULL;
+    struct task *prev = NULL;
+
+    /* Loop while there are tasks... */
+    while (1) {
+
+      /* If there's no old task, try to get a new one. */
+      if (t == NULL) {
+
+        /* Get the task. */
+        TIMER_TIC
+        t = scheduler_gettask(sched, r->qid, prev);
+        TIMER_TOC(timer_gettask);
+
+        /* Did I get anything? */
+        if (t == NULL) break;
+      }
+
+      /* Get the cells. */
+      struct cell *ci = t->ci;
+      struct cell *cj = t->cj;
+
+#ifdef SWIFT_DEBUG_TASKS
+      /* Mark the thread we run on */
+      t->rid = r->cpuid;
+
+      /* And recover the pair direction */
+      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
+        struct cell *ci_temp = ci;
+        struct cell *cj_temp = cj;
+        double shift[3];
+        t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift);
+      } else {
+        t->sid = -1;
+      }
+#endif
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that we haven't scheduled an inactive task */
+      t->ti_run = e->ti_current;
+      /* Store the task that will be running (for debugging only) */
+      r->t = t;
+#endif
+
+      /* Different types of tasks... */
+      switch (t->type) {
+        case task_type_self:
+          if (t->subtype == task_subtype_density)
+            runner_doself1_branch_density(r, ci);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_doself1_branch_gradient(r, ci);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_doself2_branch_force(r, ci);
+          else if (t->subtype == task_subtype_limiter)
+            runner_doself2_branch_limiter(r, ci);
+          else if (t->subtype == task_subtype_grav)
+            runner_doself_recursive_grav(r, ci, 1);
+          else if (t->subtype == task_subtype_external_grav)
+            runner_do_grav_external(r, ci, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_doself_branch_stars_density(r, ci);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_doself_branch_stars_feedback(r, ci);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_doself_branch_bh_density(r, ci);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_doself_branch_bh_swallow(r, ci);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_doself_branch_bh_feedback(r, ci);
+          else
+            error("Unknown/invalid task subtype (%s).",
+                  subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_pair:
+          if (t->subtype == task_subtype_density)
+            runner_dopair1_branch_density(r, ci, cj);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dopair1_branch_gradient(r, ci, cj);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_dopair2_branch_force(r, ci, cj);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dopair2_branch_limiter(r, ci, cj);
+          else if (t->subtype == task_subtype_grav)
+            runner_dopair_recursive_grav(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_dopair_branch_stars_density(r, ci, cj);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_dopair_branch_stars_feedback(r, ci, cj);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_dopair_branch_bh_density(r, ci, cj);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_dopair_branch_bh_swallow(r, ci, cj);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_dopair_branch_bh_feedback(r, ci, cj);
+          else
+            error("Unknown/invalid task subtype (%s/%s).",
+                  taskID_names[t->type], subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_sub_self:
+          if (t->subtype == task_subtype_density)
+            runner_dosub_self1_density(r, ci, 1);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dosub_self1_gradient(r, ci, 1);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_dosub_self2_force(r, ci, 1);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dosub_self2_limiter(r, ci, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_dosub_self_stars_density(r, ci, 1);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_dosub_self_stars_feedback(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_dosub_self_bh_density(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_dosub_self_bh_swallow(r, ci, 1);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_self(r, ci, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_dosub_self_bh_feedback(r, ci, 1);
+          else
+            error("Unknown/invalid task subtype (%s/%s).",
+                  taskID_names[t->type], subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_sub_pair:
+          if (t->subtype == task_subtype_density)
+            runner_dosub_pair1_density(r, ci, cj, 1);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dosub_pair1_gradient(r, ci, cj, 1);
+#endif
+          else if (t->subtype == task_subtype_force)
+            runner_dosub_pair2_force(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dosub_pair2_limiter(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_stars_density)
+            runner_dosub_pair_stars_density(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_stars_feedback)
+            runner_dosub_pair_stars_feedback(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_density)
+            runner_dosub_pair_bh_density(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_swallow)
+            runner_dosub_pair_bh_swallow(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_do_gas_swallow)
+            runner_do_gas_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_do_bh_swallow)
+            runner_do_bh_swallow_pair(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_bh_feedback)
+            runner_dosub_pair_bh_feedback(r, ci, cj, 1);
+          else
+            error("Unknown/invalid task subtype (%s/%s).",
+                  taskID_names[t->type], subtaskID_names[t->subtype]);
+          break;
+
+        case task_type_sort:
+          /* Cleanup only if any of the indices went stale. */
+          runner_do_hydro_sort(
+              r, ci, t->flags,
+              ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
+          /* Reset the sort flags as our work here is done. */
+          t->flags = 0;
+          break;
+        case task_type_stars_sort:
+          /* Cleanup only if any of the indices went stale. */
+          runner_do_stars_sort(
+              r, ci, t->flags,
+              ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1);
+          /* Reset the sort flags as our work here is done. */
+          t->flags = 0;
+          break;
+        case task_type_init_grav:
+          runner_do_init_grav(r, ci, 1);
+          break;
+        case task_type_ghost:
+          runner_do_ghost(r, ci, 1);
+          break;
+#ifdef EXTRA_HYDRO_LOOP
+        case task_type_extra_ghost:
+          runner_do_extra_ghost(r, ci, 1);
+          break;
+#endif
+        case task_type_stars_ghost:
+          runner_do_stars_ghost(r, ci, 1);
+          break;
+        case task_type_bh_density_ghost:
+          runner_do_black_holes_density_ghost(r, ci, 1);
+          break;
+        case task_type_bh_swallow_ghost3:
+          runner_do_black_holes_swallow_ghost(r, ci, 1);
+          break;
+        case task_type_drift_part:
+          runner_do_drift_part(r, ci, 1);
+          break;
+        case task_type_drift_spart:
+          runner_do_drift_spart(r, ci, 1);
+          break;
+        case task_type_drift_bpart:
+          runner_do_drift_bpart(r, ci, 1);
+          break;
+        case task_type_drift_gpart:
+          runner_do_drift_gpart(r, ci, 1);
+          break;
+        case task_type_kick1:
+          runner_do_kick1(r, ci, 1);
+          break;
+        case task_type_kick2:
+          runner_do_kick2(r, ci, 1);
+          break;
+        case task_type_end_hydro_force:
+          runner_do_end_hydro_force(r, ci, 1);
+          break;
+        case task_type_end_grav_force:
+          runner_do_end_grav_force(r, ci, 1);
+          break;
+        case task_type_logger:
+          runner_do_logger(r, ci, 1);
+          break;
+        case task_type_timestep:
+          runner_do_timestep(r, ci, 1);
+          break;
+        case task_type_timestep_limiter:
+          runner_do_limiter(r, ci, 0, 1);
+          break;
+#ifdef WITH_MPI
+        case task_type_send:
+          if (t->subtype == task_subtype_tend_part) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_gpart) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_spart) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_bpart) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_sf_counts) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_part_swallow) {
+            free(t->buff);
+          } else if (t->subtype == task_subtype_bpart_merger) {
+            free(t->buff);
+          }
+          break;
+        case task_type_recv:
+          if (t->subtype == task_subtype_tend_part) {
+            cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_gpart) {
+            cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_spart) {
+            cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_tend_bpart) {
+            cell_unpack_end_step_black_holes(
+                ci, (struct pcell_step_black_holes *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_sf_counts) {
+            cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff);
+            cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_xv) {
+            runner_do_recv_part(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_rho) {
+            runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_gradient) {
+            runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_part_swallow) {
+            cell_unpack_part_swallow(ci,
+                                     (struct black_holes_part_data *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_bpart_merger) {
+            cell_unpack_bpart_swallow(ci,
+                                      (struct black_holes_bpart_data *)t->buff);
+            free(t->buff);
+          } else if (t->subtype == task_subtype_limiter) {
+            runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_gpart) {
+            runner_do_recv_gpart(r, ci, 1);
+          } else if (t->subtype == task_subtype_spart) {
+            runner_do_recv_spart(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_bpart_rho) {
+            runner_do_recv_bpart(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_bpart_swallow) {
+            runner_do_recv_bpart(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_bpart_feedback) {
+            runner_do_recv_bpart(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_multipole) {
+            cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff);
+            free(t->buff);
+          } else {
+            error("Unknown/invalid task subtype (%d).", t->subtype);
+          }
+          break;
+#endif
+        case task_type_grav_down:
+          runner_do_grav_down(r, t->ci, 1);
+          break;
+        case task_type_grav_mesh:
+          runner_do_grav_mesh(r, t->ci, 1);
+          break;
+        case task_type_grav_long_range:
+          runner_do_grav_long_range(r, t->ci, 1);
+          break;
+        case task_type_grav_mm:
+          runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj);
+          break;
+        case task_type_cooling:
+          runner_do_cooling(r, t->ci, 1);
+          break;
+        case task_type_star_formation:
+          runner_do_star_formation(r, t->ci, 1);
+          break;
+        case task_type_stars_resort:
+          runner_do_stars_resort(r, t->ci, 1);
+          break;
+        case task_type_fof_self:
+          runner_do_fof_self(r, t->ci, 1);
+          break;
+        case task_type_fof_pair:
+          runner_do_fof_pair(r, t->ci, t->cj, 1);
+          break;
+        default:
+          error("Unknown/invalid task type (%d).", t->type);
+      }
+
+/* Mark that we have run this task on these cells */
+#ifdef SWIFT_DEBUG_CHECKS
+      if (ci != NULL) {
+        ci->tasks_executed[t->type]++;
+        ci->subtasks_executed[t->subtype]++;
+      }
+      if (cj != NULL) {
+        cj->tasks_executed[t->type]++;
+        cj->subtasks_executed[t->subtype]++;
+      }
+
+      /* This runner is not doing a task anymore */
+      r->t = NULL;
+#endif
+
+      /* We're done with this task, see if we get a next one. */
+      prev = t;
+      t = scheduler_done(sched, t);
+
+    } /* main loop. */
+  }
+
+  /* Be kind, rewind. */
+  return NULL;
+}
diff --git a/src/runner_others.c b/src/runner_others.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ffaf7aa321f658b6e0e7e10a9cb8ad2f4a5a541
--- /dev/null
+++ b/src/runner_others.c
@@ -0,0 +1,660 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *               2016 John A. Regan (john.a.regan@durham.ac.uk)
+ *                    Tom Theuns (tom.theuns@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <float.h>
+#include <limits.h>
+#include <stdlib.h>
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "chemistry.h"
+#include "cooling.h"
+#include "engine.h"
+#include "error.h"
+#include "gravity.h"
+#include "hydro.h"
+#include "logger.h"
+#include "pressure_floor.h"
+#include "space.h"
+#include "star_formation.h"
+#include "star_formation_logger.h"
+#include "stars.h"
+#include "timers.h"
+#include "tracers.h"
+
+/**
+ * @brief Calculate gravity acceleration from external potential
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
+
+  struct gpart *restrict gparts = c->grav.parts;
+  const int gcount = c->grav.count;
+  const struct engine *e = r->e;
+  const struct external_potential *potential = e->external_potential;
+  const struct phys_const *constants = e->physical_constants;
+  const double time = r->e->time;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the gparts in this cell. */
+    for (int i = 0; i < gcount; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct gpart *restrict gp = &gparts[i];
+
+      /* Is this part within the time step? */
+      if (gpart_is_active(gp, e)) {
+        external_gravity_acceleration(time, potential, constants, gp);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_dograv_external);
+}
+
+/**
+ * @brief Calculate gravity accelerations from the periodic mesh
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) {
+
+  struct gpart *restrict gparts = c->grav.parts;
+  const int gcount = c->grav.count;
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!e->s->periodic) error("Calling mesh forces in non-periodic mode.");
+#endif
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0);
+  } else {
+
+    /* Get the forces from the gravity mesh */
+    pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount);
+  }
+
+  if (timer) TIMER_TOC(timer_dograv_mesh);
+}
+
+/**
+ * @brief Calculate change in thermal state of particles induced
+ * by radiative cooling and heating.
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const struct cooling_function_data *cooling_func = e->cooling_func;
+  const struct phys_const *constants = e->physical_constants;
+  const struct unit_system *us = e->internal_units;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor;
+  const double time_base = e->time_base;
+  const integertime_t ti_current = e->ti_current;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int count = c->hydro.count;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct part *restrict p = &parts[i];
+      struct xpart *restrict xp = &xparts[i];
+
+      if (part_is_active(p, e)) {
+
+        double dt_cool, dt_therm;
+        if (with_cosmology) {
+          const integertime_t ti_step = get_integer_timestep(p->time_bin);
+          const integertime_t ti_begin =
+              get_integer_time_begin(ti_current - 1, p->time_bin);
+
+          dt_cool =
+              cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+          dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin,
+                                                     ti_begin + ti_step);
+
+        } else {
+          dt_cool = get_timestep(p->time_bin, time_base);
+          dt_therm = get_timestep(p->time_bin, time_base);
+        }
+
+        /* Let's cool ! */
+        cooling_cool_part(constants, us, cosmo, hydro_props,
+                          entropy_floor_props, cooling_func, p, xp, dt_cool,
+                          dt_therm);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_cooling);
+}
+
+/**
+ *
+ */
+void runner_do_star_formation(struct runner *r, struct cell *c, int timer) {
+
+  struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct star_formation *sf_props = e->star_formation;
+  const struct phys_const *phys_const = e->physical_constants;
+  const int count = c->hydro.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const int with_feedback = (e->policy & engine_policy_feedback);
+  const struct hydro_props *restrict hydro_props = e->hydro_properties;
+  const struct unit_system *restrict us = e->internal_units;
+  struct cooling_function_data *restrict cooling = e->cooling_func;
+  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
+  const double time_base = e->time_base;
+  const integertime_t ti_current = e->ti_current;
+  const int current_stars_count = c->stars.count;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != e->nodeID)
+    error("Running star formation task on a foreign node!");
+#endif
+
+  /* Anything to do here? */
+  if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) {
+    star_formation_logger_log_inactive_cell(&c->stars.sfh);
+    return;
+  }
+
+  /* Reset the SFR */
+  star_formation_logger_init(&c->stars.sfh);
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) {
+        /* Load the child cell */
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Do the recursion */
+        runner_do_star_formation(r, cp, 0);
+
+        /* Update current cell using child cells */
+        star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh);
+      }
+  } else {
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* Only work on active particles */
+      if (part_is_active(p, e)) {
+
+        /* Is this particle star forming? */
+        if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo,
+                                           hydro_props, us, cooling,
+                                           entropy_floor)) {
+
+          /* Time-step size for this particle */
+          double dt_star;
+          if (with_cosmology) {
+            const integertime_t ti_step = get_integer_timestep(p->time_bin);
+            const integertime_t ti_begin =
+                get_integer_time_begin(ti_current - 1, p->time_bin);
+
+            dt_star =
+                cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step);
+
+          } else {
+            dt_star = get_timestep(p->time_bin, time_base);
+          }
+
+          /* Compute the SF rate of the particle */
+          star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo,
+                                     dt_star);
+
+          /* Add the SFR and SFR*dt to the SFH struct of this cell */
+          star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star);
+
+          /* Are we forming a star particle from this SF rate? */
+          if (star_formation_should_convert_to_star(p, xp, sf_props, e,
+                                                    dt_star)) {
+
+            /* Convert the gas particle to a star particle */
+            struct spart *sp = cell_convert_part_to_spart(e, c, p, xp);
+
+            /* Did we get a star? (Or did we run out of spare ones?) */
+            if (sp != NULL) {
+
+              /* message("We formed a star id=%lld cellID=%d", sp->id,
+               * c->cellID); */
+
+              /* Copy the properties of the gas particle to the star particle */
+              star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo,
+                                             with_cosmology, phys_const,
+                                             hydro_props, us, cooling);
+
+              /* Update the Star formation history */
+              star_formation_logger_log_new_spart(sp, &c->stars.sfh);
+            }
+          }
+
+        } else { /* Are we not star-forming? */
+
+          /* Update the particle to flag it as not star-forming */
+          star_formation_update_part_not_SFR(p, xp, e, sf_props,
+                                             with_cosmology);
+
+        } /* Not Star-forming? */
+
+      } else { /* is active? */
+
+        /* Check if the particle is not inhibited */
+        if (!part_is_inhibited(p, e)) {
+          star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh);
+        }
+      }
+    } /* Loop over particles */
+  }
+
+  /* If we formed any stars, the star sorts are now invalid. We need to
+   * re-compute them. */
+  if (with_feedback && (c == c->top) &&
+      (current_stars_count != c->stars.count)) {
+    cell_set_star_resort_flag(c);
+  }
+
+  if (timer) TIMER_TOC(timer_do_star_formation);
+}
+
+/**
+ * @brief End the hydro force calculation of all active particles in a cell
+ * by multiplying the acccelerations by the relevant constants
+ *
+ * @param r The #runner thread.
+ * @param c The #cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0);
+  } else {
+
+    const struct cosmology *cosmo = e->cosmology;
+    const int count = c->hydro.count;
+    struct part *restrict parts = c->hydro.parts;
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+
+      if (part_is_active(p, e)) {
+
+        /* Finish the force loop */
+        hydro_end_force(p, cosmo);
+        chemistry_end_force(p, cosmo);
+
+#ifdef SWIFT_BOUNDARY_PARTICLES
+
+        /* Get the ID of the part */
+        const long long id = p->id;
+
+        /* Cancel hdyro forces of these particles */
+        if (id < SWIFT_BOUNDARY_PARTICLES) {
+
+          /* Don't move ! */
+          hydro_reset_acceleration(p);
+
+#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH)
+
+          /* Some values need to be reset in the Gizmo case. */
+          hydro_prepare_force(p, &c->hydro.xparts[k], cosmo,
+                              e->hydro_properties, 0);
+#endif
+        }
+#endif
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_end_hydro_force);
+}
+
+/**
+ * @brief End the gravity force calculation of all active particles in a cell
+ * by multiplying the acccelerations by the relevant constants
+ *
+ * @param r The #runner thread.
+ * @param c The #cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0);
+  } else {
+
+    const struct space *s = e->s;
+    const int periodic = s->periodic;
+    const float G_newton = e->physical_constants->const_newton_G;
+
+    /* Potential normalisation in the case of periodic gravity */
+    float potential_normalisation = 0.;
+    if (periodic && (e->policy & engine_policy_self_gravity)) {
+      const double volume = s->dim[0] * s->dim[1] * s->dim[2];
+      const double r_s = e->mesh->r_s;
+      potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume;
+    }
+
+    const int gcount = c->grav.count;
+    struct gpart *restrict gparts = c->grav.parts;
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *restrict gp = &gparts[k];
+
+      if (gpart_is_active(gp, e)) {
+
+        /* Finish the force calculation */
+        gravity_end_force(gp, G_newton, potential_normalisation, periodic);
+
+#ifdef SWIFT_MAKE_GRAVITY_GLASS
+
+        /* Negate the gravity forces */
+        gp->a_grav[0] *= -1.f;
+        gp->a_grav[1] *= -1.f;
+        gp->a_grav[2] *= -1.f;
+#endif
+
+#ifdef SWIFT_NO_GRAVITY_BELOW_ID
+
+        /* Get the ID of the gpart */
+        long long id = 0;
+        if (gp->type == swift_type_gas)
+          id = e->s->parts[-gp->id_or_neg_offset].id;
+        else if (gp->type == swift_type_stars)
+          id = e->s->sparts[-gp->id_or_neg_offset].id;
+        else if (gp->type == swift_type_black_hole)
+          error("Unexisting type");
+        else
+          id = gp->id_or_neg_offset;
+
+        /* Cancel gravity forces of these particles */
+        if (id < SWIFT_NO_GRAVITY_BELOW_ID) {
+
+          /* Don't move ! */
+          gp->a_grav[0] = 0.f;
+          gp->a_grav[1] = 0.f;
+          gp->a_grav[2] = 0.f;
+        }
+#endif
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if ((e->policy & engine_policy_self_gravity) &&
+            !(e->policy & engine_policy_black_holes)) {
+
+          /* Let's add a self interaction to simplify the count */
+          gp->num_interacted++;
+
+          /* Check that this gpart has interacted with all the other
+           * particles (via direct or multipoles) in the box */
+          if (gp->num_interacted !=
+              e->total_nr_gparts - e->count_inhibited_gparts) {
+
+            /* Get the ID of the gpart */
+            long long my_id = 0;
+            if (gp->type == swift_type_gas)
+              my_id = e->s->parts[-gp->id_or_neg_offset].id;
+            else if (gp->type == swift_type_stars)
+              my_id = e->s->sparts[-gp->id_or_neg_offset].id;
+            else if (gp->type == swift_type_black_hole)
+              error("Unexisting type");
+            else
+              my_id = gp->id_or_neg_offset;
+
+            error(
+                "g-particle (id=%lld, type=%s) did not interact "
+                "gravitationally with all other gparts "
+                "gp->num_interacted=%lld, total_gparts=%lld (local "
+                "num_gparts=%zd inhibited_gparts=%lld)",
+                my_id, part_type_names[gp->type], gp->num_interacted,
+                e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts);
+          }
+        }
+#endif
+      }
+    }
+  }
+  if (timer) TIMER_TOC(timer_end_grav_force);
+}
+
+/**
+ * @brief Write the required particles through the logger.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_logger(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_LOGGER
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  const int count = c->hydro.count;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return;
+
+  /* Recurse? Avoid spending too much time in useless cells. */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be log */
+      /* This is the same function than part_is_active, except for
+       * debugging checks */
+      if (part_is_active(p, e)) {
+
+        if (logger_should_write(&xp->logger_data, e->logger)) {
+          /* Write particle */
+          /* Currently writing everything, should adapt it through time */
+          logger_log_part(e->logger, p,
+                          logger_mask_data[logger_x].mask |
+                              logger_mask_data[logger_v].mask |
+                              logger_mask_data[logger_a].mask |
+                              logger_mask_data[logger_u].mask |
+                              logger_mask_data[logger_h].mask |
+                              logger_mask_data[logger_rho].mask |
+                              logger_mask_data[logger_consts].mask,
+                          &xp->logger_data.last_offset);
+
+          /* Set counter back to zero */
+          xp->logger_data.steps_since_last_output = 0;
+        } else
+          /* Update counter */
+          xp->logger_data.steps_since_last_output += 1;
+      }
+    }
+  }
+
+  if (c->grav.count > 0) error("gparts not implemented");
+
+  if (c->stars.count > 0) error("sparts not implemented");
+
+  if (timer) TIMER_TOC(timer_logger);
+
+#else
+  error("Logger disabled, please enable it during configuration");
+#endif
+}
+
+/**
+ * @brief Recursively search for FOF groups in a single cell.
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_fof_self(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_FOF
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int periodic = s->periodic;
+  const struct gpart *const gparts = s->gparts;
+  const double search_r2 = e->fof_properties->l_x2;
+
+  rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c);
+
+  if (timer) TIMER_TOC(timer_fof_self);
+
+#else
+  error("SWIFT was not compiled with FOF enabled!");
+#endif
+}
+
+/**
+ * @brief Recursively search for FOF groups between a pair of cells.
+ *
+ * @param r runner task
+ * @param ci cell i
+ * @param cj cell j
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj,
+                        int timer) {
+
+#ifdef WITH_FOF
+
+  TIMER_TIC;
+
+  const struct engine *e = r->e;
+  struct space *s = e->s;
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int periodic = s->periodic;
+  const struct gpart *const gparts = s->gparts;
+  const double search_r2 = e->fof_properties->l_x2;
+
+  rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci,
+                      cj);
+
+  if (timer) TIMER_TOC(timer_fof_pair);
+#else
+  error("SWIFT was not compiled with FOF enabled!");
+#endif
+}
diff --git a/src/runner_recv.c b/src/runner_recv.c
new file mode 100644
index 0000000000000000000000000000000000000000..803e68c2106933684109e798e24952a0dbdfea6e
--- /dev/null
+++ b/src/runner_recv.c
@@ -0,0 +1,368 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Construct the cell properties from the received #part.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
+                         int timer) {
+#ifdef WITH_MPI
+
+  const struct part *restrict parts = c->hydro.parts;
+  const size_t nr_parts = c->hydro.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_hydro_end_min = max_nr_timesteps;
+  integertime_t ti_hydro_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+  float h_max = 0.f;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* Clear this cell's sorted mask. */
+  if (clear_sorts) c->hydro.sorted = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_parts; k++) {
+      if (parts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, parts[k].time_bin);
+      time_bin_max = max(time_bin_max, parts[k].time_bin);
+      h_max = max(h_max, parts[k].h);
+    }
+
+    /* Convert into a time */
+    ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
+        runner_do_recv_part(r, c->progeny[k], clear_sorts, 0);
+        ti_hydro_end_min =
+            min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min);
+        ti_hydro_end_max =
+            max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max);
+        h_max = max(h_max, c->progeny[k]->hydro.h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_hydro_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_hydro_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->hydro.ti_end_min = ti_hydro_end_min;
+  // c->hydro.ti_end_max = ti_hydro_end_max;
+  c->hydro.ti_old_part = ti_current;
+  c->hydro.h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_part);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #gpart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
+
+  const struct gpart *restrict gparts = c->grav.parts;
+  const size_t nr_gparts = c->grav.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_gravity_end_min = max_nr_timesteps;
+  integertime_t ti_gravity_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_gparts; k++) {
+      if (gparts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, gparts[k].time_bin);
+      time_bin_max = max(time_bin_max, gparts[k].time_bin);
+    }
+
+    /* Convert into a time */
+    ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) {
+        runner_do_recv_gpart(r, c->progeny[k], 0);
+        ti_gravity_end_min =
+            min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min);
+        ti_gravity_end_max =
+            max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_gravity_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_gravity_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->grav.ti_end_min = ti_gravity_end_min;
+  // c->grav.ti_end_max = ti_gravity_end_max;
+  c->grav.ti_old_part = ti_current;
+
+  if (timer) TIMER_TOC(timer_dorecv_gpart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #spart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer) {
+
+#ifdef WITH_MPI
+
+  struct spart *restrict sparts = c->stars.parts;
+  const size_t nr_sparts = c->stars.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_stars_end_min = max_nr_timesteps;
+  integertime_t ti_stars_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+  float h_max = 0.f;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* Clear this cell's sorted mask. */
+  if (clear_sorts) c->stars.sorted = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_sparts; k++) {
+#ifdef DEBUG_INTERACTIONS_STARS
+      sparts[k].num_ngb_force = 0;
+#endif
+      if (sparts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, sparts[k].time_bin);
+      time_bin_max = max(time_bin_max, sparts[k].time_bin);
+      h_max = max(h_max, sparts[k].h);
+    }
+
+    /* Convert into a time */
+    ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
+        runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0);
+        ti_stars_end_min =
+            min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min);
+        ti_stars_end_max =
+            max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max);
+        h_max = max(h_max, c->progeny[k]->stars.h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_stars_end_min < ti_current &&
+      !(r->e->policy & engine_policy_star_formation))
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_stars_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->grav.ti_end_min = ti_gravity_end_min;
+  // c->grav.ti_end_max = ti_gravity_end_max;
+  c->stars.ti_old_part = ti_current;
+  c->stars.h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_spart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #bpart.
+ *
+ * Note that we do not need to clear the sorts since we do not sort
+ * the black holes.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param clear_sorts Should we clear the sort flag and hence trigger a sort ?
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts,
+                          int timer) {
+
+#ifdef WITH_MPI
+
+  struct bpart *restrict bparts = c->black_holes.parts;
+  const size_t nr_bparts = c->black_holes.count;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_black_holes_end_min = max_nr_timesteps;
+  integertime_t ti_black_holes_end_max = 0;
+  timebin_t time_bin_min = num_time_bins;
+  timebin_t time_bin_max = 0;
+  float h_max = 0.f;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank) error("Updating a local cell!");
+#endif
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_bparts; k++) {
+#ifdef DEBUG_INTERACTIONS_BLACK_HOLES
+      bparts[k].num_ngb_force = 0;
+#endif
+
+      /* message("Receiving bparts id=%lld time_bin=%d", */
+      /* 	      bparts[k].id, bparts[k].time_bin); */
+
+      if (bparts[k].time_bin == time_bin_inhibited) continue;
+      time_bin_min = min(time_bin_min, bparts[k].time_bin);
+      time_bin_max = max(time_bin_max, bparts[k].time_bin);
+      h_max = max(h_max, bparts[k].h);
+    }
+
+    /* Convert into a time */
+    ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max);
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) {
+        runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0);
+        ti_black_holes_end_min =
+            min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min);
+        ti_black_holes_end_max =
+            max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max);
+        h_max = max(h_max, c->progeny[k]->black_holes.h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_black_holes_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_black_holes_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  // c->grav.ti_end_min = ti_gravity_end_min;
+  // c->grav.ti_end_max = ti_gravity_end_max;
+  c->black_holes.ti_old_part = ti_current;
+  c->black_holes.h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_bpart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
diff --git a/src/runner_sort.c b/src/runner_sort.c
new file mode 100644
index 0000000000000000000000000000000000000000..914b64f93b970000885b1b578d762d3f15455332
--- /dev/null
+++ b/src/runner_sort.c
@@ -0,0 +1,708 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "cell.h"
+#include "engine.h"
+#include "timers.h"
+
+/**
+ * @brief Sorts again all the stars in a given cell hierarchy.
+ *
+ * This is intended to be used after the star formation task has been run
+ * to get the cells back into a state where self/pair star tasks can be run.
+ *
+ * @param r The thread #runner.
+ * @param c The top-level cell to run on.
+ * @param timer Are we timing this?
+ */
+void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != r->e->nodeID) error("Task must be run locally!");
+#endif
+
+  TIMER_TIC;
+
+  /* Did we demand a recalculation of the stars'sorts? */
+  if (cell_get_flag(c, cell_flag_do_stars_resort)) {
+    runner_do_all_stars_sort(r, c);
+    cell_clear_flag(c, cell_flag_do_stars_resort);
+  }
+
+  if (timer) TIMER_TOC(timer_do_stars_resort);
+}
+
+/**
+ * @brief Sort the entries in ascending order using QuickSort.
+ *
+ * @param sort The entries
+ * @param N The number of entries.
+ */
+void runner_do_sort_ascending(struct sort_entry *sort, int N) {
+
+  struct {
+    short int lo, hi;
+  } qstack[10];
+  int qpos, i, j, lo, hi, imin;
+  struct sort_entry temp;
+  float pivot;
+
+  /* Sort parts in cell_i in decreasing order with quicksort */
+  qstack[0].lo = 0;
+  qstack[0].hi = N - 1;
+  qpos = 0;
+  while (qpos >= 0) {
+    lo = qstack[qpos].lo;
+    hi = qstack[qpos].hi;
+    qpos -= 1;
+    if (hi - lo < 15) {
+      for (i = lo; i < hi; i++) {
+        imin = i;
+        for (j = i + 1; j <= hi; j++)
+          if (sort[j].d < sort[imin].d) imin = j;
+        if (imin != i) {
+          temp = sort[imin];
+          sort[imin] = sort[i];
+          sort[i] = temp;
+        }
+      }
+    } else {
+      pivot = sort[(lo + hi) / 2].d;
+      i = lo;
+      j = hi;
+      while (i <= j) {
+        while (sort[i].d < pivot) i++;
+        while (sort[j].d > pivot) j--;
+        if (i <= j) {
+          if (i < j) {
+            temp = sort[i];
+            sort[i] = sort[j];
+            sort[j] = temp;
+          }
+          i += 1;
+          j -= 1;
+        }
+      }
+      if (j > (lo + hi) / 2) {
+        if (lo < j) {
+          qpos += 1;
+          qstack[qpos].lo = lo;
+          qstack[qpos].hi = j;
+        }
+        if (i < hi) {
+          qpos += 1;
+          qstack[qpos].lo = i;
+          qstack[qpos].hi = hi;
+        }
+      } else {
+        if (i < hi) {
+          qpos += 1;
+          qstack[qpos].lo = i;
+          qstack[qpos].hi = hi;
+        }
+        if (lo < j) {
+          qpos += 1;
+          qstack[qpos].lo = lo;
+          qstack[qpos].hi = j;
+        }
+      }
+    }
+  }
+}
+
+#ifdef SWIFT_DEBUG_CHECKS
+/**
+ * @brief Recursively checks that the flags are consistent in a cell hierarchy.
+ *
+ * Debugging function. Exists in two flavours: hydro & stars.
+ */
+#define RUNNER_CHECK_SORTS(TYPE)                                               \
+  void runner_check_sorts_##TYPE(struct cell *c, int flags) {                  \
+                                                                               \
+    if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \
+    if (c->split)                                                              \
+      for (int k = 0; k < 8; k++)                                              \
+        if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0)            \
+          runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted);            \
+  }
+#else
+#define RUNNER_CHECK_SORTS(TYPE)                                       \
+  void runner_check_sorts_##TYPE(struct cell *c, int flags) {          \
+    error("Calling debugging code without debugging flag activated."); \
+  }
+#endif
+
+RUNNER_CHECK_SORTS(hydro)
+RUNNER_CHECK_SORTS(stars)
+
+/**
+ * @brief Sort the particles in the given cell along all cardinal directions.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ * @param flags Cell flag.
+ * @param cleanup If true, re-build the sorts for the selected flags instead
+ *        of just adding them.
+ * @param clock Flag indicating whether to record the timing or not, needed
+ *      for recursive calls.
+ */
+void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags,
+                          int cleanup, int clock) {
+
+  struct sort_entry *fingers[8];
+  const int count = c->hydro.count;
+  const struct part *parts = c->hydro.parts;
+  struct xpart *xparts = c->hydro.xparts;
+  float buff[8];
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->hydro.super == NULL) error("Task called above the super level!!!");
+#endif
+
+  /* We need to do the local sorts plus whatever was requested further up. */
+  flags |= c->hydro.do_sort;
+  if (cleanup) {
+    c->hydro.sorted = 0;
+  } else {
+    flags &= ~c->hydro.sorted;
+  }
+  if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return;
+
+  /* Check that the particles have been moved to the current time */
+  if (flags && !cell_are_part_drifted(c, r->e))
+    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_hydro(c, c->hydro.sorted);
+
+  /* Make sure the sort flags are consistent (upard). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->hydro.sorted & ~c->hydro.sorted)
+      error("Inconsistent sort flags (upward).");
+  }
+
+  /* Update the sort timer which represents the last time the sorts
+     were re-set. */
+  if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current;
+#endif
+
+  /* Allocate memory for sorting. */
+  cell_malloc_hydro_sorts(c, flags);
+
+  /* Does this cell have any progeny? */
+  if (c->split) {
+
+    /* Fill in the gaps within the progeny. */
+    float dx_max_sort = 0.0f;
+    float dx_max_sort_old = 0.0f;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+
+        if (c->progeny[k]->hydro.count > 0) {
+
+          /* Only propagate cleanup if the progeny is stale. */
+          runner_do_hydro_sort(
+              r, c->progeny[k], flags,
+              cleanup && (c->progeny[k]->hydro.dx_max_sort_old >
+                          space_maxreldx * c->progeny[k]->dmin),
+              0);
+          dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort);
+          dx_max_sort_old =
+              max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old);
+        } else {
+
+          /* We need to clean up the unused flags that were in case the
+             number of particles in the cell would change */
+          cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
+        }
+      }
+    }
+    c->hydro.dx_max_sort = dx_max_sort;
+    c->hydro.dx_max_sort_old = dx_max_sort_old;
+
+    /* Loop over the 13 different sort arrays. */
+    for (int j = 0; j < 13; j++) {
+
+      /* Has this sort array been flagged? */
+      if (!(flags & (1 << j))) continue;
+
+      /* Init the particle index offsets. */
+      int off[8];
+      off[0] = 0;
+      for (int k = 1; k < 8; k++)
+        if (c->progeny[k - 1] != NULL)
+          off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count;
+        else
+          off[k] = off[k - 1];
+
+      /* Init the entries and indices. */
+      int inds[8];
+      for (int k = 0; k < 8; k++) {
+        inds[k] = k;
+        if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
+          fingers[k] = c->progeny[k]->hydro.sort[j];
+          buff[k] = fingers[k]->d;
+          off[k] = off[k];
+        } else
+          buff[k] = FLT_MAX;
+      }
+
+      /* Sort the buffer. */
+      for (int i = 0; i < 7; i++)
+        for (int k = i + 1; k < 8; k++)
+          if (buff[inds[k]] < buff[inds[i]]) {
+            int temp_i = inds[i];
+            inds[i] = inds[k];
+            inds[k] = temp_i;
+          }
+
+      /* For each entry in the new sort list. */
+      struct sort_entry *finger = c->hydro.sort[j];
+      for (int ind = 0; ind < count; ind++) {
+
+        /* Copy the minimum into the new sort array. */
+        finger[ind].d = buff[inds[0]];
+        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
+
+        /* Update the buffer. */
+        fingers[inds[0]] += 1;
+        buff[inds[0]] = fingers[inds[0]]->d;
+
+        /* Find the smallest entry. */
+        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
+          int temp_i = inds[k - 1];
+          inds[k - 1] = inds[k];
+          inds[k] = temp_i;
+        }
+
+      } /* Merge. */
+
+      /* Add a sentinel. */
+      c->hydro.sort[j][count].d = FLT_MAX;
+      c->hydro.sort[j][count].i = 0;
+
+      /* Mark as sorted. */
+      atomic_or(&c->hydro.sorted, 1 << j);
+
+    } /* loop over sort arrays. */
+
+  } /* progeny? */
+
+  /* Otherwise, just sort. */
+  else {
+
+    /* Reset the sort distance */
+    if (c->hydro.sorted == 0) {
+#ifdef SWIFT_DEBUG_CHECKS
+      if (xparts != NULL && c->nodeID != engine_rank)
+        error("Have non-NULL xparts in foreign cell");
+#endif
+
+      /* And the individual sort distances if we are a local cell */
+      if (xparts != NULL) {
+        for (int k = 0; k < count; k++) {
+          xparts[k].x_diff_sort[0] = 0.0f;
+          xparts[k].x_diff_sort[1] = 0.0f;
+          xparts[k].x_diff_sort[2] = 0.0f;
+        }
+      }
+      c->hydro.dx_max_sort_old = 0.f;
+      c->hydro.dx_max_sort = 0.f;
+    }
+
+    /* Fill the sort array. */
+    for (int k = 0; k < count; k++) {
+      const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]};
+      for (int j = 0; j < 13; j++)
+        if (flags & (1 << j)) {
+          c->hydro.sort[j][k].i = k;
+          c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] +
+                                  px[1] * runner_shift[j][1] +
+                                  px[2] * runner_shift[j][2];
+        }
+    }
+
+    /* Add the sentinel and sort. */
+    for (int j = 0; j < 13; j++)
+      if (flags & (1 << j)) {
+        c->hydro.sort[j][count].d = FLT_MAX;
+        c->hydro.sort[j][count].i = 0;
+        runner_do_sort_ascending(c->hydro.sort[j], count);
+        atomic_or(&c->hydro.sorted, 1 << j);
+      }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify the sorting. */
+  for (int j = 0; j < 13; j++) {
+    if (!(flags & (1 << j))) continue;
+    struct sort_entry *finger = c->hydro.sort[j];
+    for (int k = 1; k < count; k++) {
+      if (finger[k].d < finger[k - 1].d)
+        error("Sorting failed, ascending array.");
+      if (finger[k].i >= count) error("Sorting failed, indices borked.");
+    }
+  }
+
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_hydro(c, flags);
+
+  /* Make sure the sort flags are consistent (upward). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->hydro.sorted & ~c->hydro.sorted)
+      error("Inconsistent sort flags.");
+  }
+#endif
+
+  /* Clear the cell's sort flags. */
+  c->hydro.do_sort = 0;
+  cell_clear_flag(c, cell_flag_do_hydro_sub_sort);
+  c->hydro.requires_sorts = 0;
+
+  if (clock) TIMER_TOC(timer_dosort);
+}
+
+/**
+ * @brief Sort the stars particles in the given cell along all cardinal
+ * directions.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ * @param flags Cell flag.
+ * @param cleanup If true, re-build the sorts for the selected flags instead
+ *        of just adding them.
+ * @param clock Flag indicating whether to record the timing or not, needed
+ *      for recursive calls.
+ */
+void runner_do_stars_sort(struct runner *r, struct cell *c, int flags,
+                          int cleanup, int clock) {
+
+  struct sort_entry *fingers[8];
+  const int count = c->stars.count;
+  struct spart *sparts = c->stars.parts;
+  float buff[8];
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->hydro.super == NULL) error("Task called above the super level!!!");
+#endif
+
+  /* We need to do the local sorts plus whatever was requested further up. */
+  flags |= c->stars.do_sort;
+  if (cleanup) {
+    c->stars.sorted = 0;
+  } else {
+    flags &= ~c->stars.sorted;
+  }
+  if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return;
+
+  /* Check that the particles have been moved to the current time */
+  if (flags && !cell_are_spart_drifted(c, r->e)) {
+    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_stars(c, c->stars.sorted);
+
+  /* Make sure the sort flags are consistent (upward). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->stars.sorted & ~c->stars.sorted)
+      error("Inconsistent sort flags (upward).");
+  }
+
+  /* Update the sort timer which represents the last time the sorts
+     were re-set. */
+  if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current;
+#endif
+
+  /* start by allocating the entry arrays in the requested dimensions. */
+  cell_malloc_stars_sorts(c, flags);
+
+  /* Does this cell have any progeny? */
+  if (c->split) {
+
+    /* Fill in the gaps within the progeny. */
+    float dx_max_sort = 0.0f;
+    float dx_max_sort_old = 0.0f;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+
+        if (c->progeny[k]->stars.count > 0) {
+
+          /* Only propagate cleanup if the progeny is stale. */
+          const int cleanup_prog =
+              cleanup && (c->progeny[k]->stars.dx_max_sort_old >
+                          space_maxreldx * c->progeny[k]->dmin);
+          runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0);
+          dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort);
+          dx_max_sort_old =
+              max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old);
+        } else {
+
+          /* We need to clean up the unused flags that were in case the
+             number of particles in the cell would change */
+          cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
+        }
+      }
+    }
+    c->stars.dx_max_sort = dx_max_sort;
+    c->stars.dx_max_sort_old = dx_max_sort_old;
+
+    /* Loop over the 13 different sort arrays. */
+    for (int j = 0; j < 13; j++) {
+
+      /* Has this sort array been flagged? */
+      if (!(flags & (1 << j))) continue;
+
+      /* Init the particle index offsets. */
+      int off[8];
+      off[0] = 0;
+      for (int k = 1; k < 8; k++)
+        if (c->progeny[k - 1] != NULL)
+          off[k] = off[k - 1] + c->progeny[k - 1]->stars.count;
+        else
+          off[k] = off[k - 1];
+
+      /* Init the entries and indices. */
+      int inds[8];
+      for (int k = 0; k < 8; k++) {
+        inds[k] = k;
+        if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
+          fingers[k] = c->progeny[k]->stars.sort[j];
+          buff[k] = fingers[k]->d;
+          off[k] = off[k];
+        } else
+          buff[k] = FLT_MAX;
+      }
+
+      /* Sort the buffer. */
+      for (int i = 0; i < 7; i++)
+        for (int k = i + 1; k < 8; k++)
+          if (buff[inds[k]] < buff[inds[i]]) {
+            int temp_i = inds[i];
+            inds[i] = inds[k];
+            inds[k] = temp_i;
+          }
+
+      /* For each entry in the new sort list. */
+      struct sort_entry *finger = c->stars.sort[j];
+      for (int ind = 0; ind < count; ind++) {
+
+        /* Copy the minimum into the new sort array. */
+        finger[ind].d = buff[inds[0]];
+        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
+
+        /* Update the buffer. */
+        fingers[inds[0]] += 1;
+        buff[inds[0]] = fingers[inds[0]]->d;
+
+        /* Find the smallest entry. */
+        for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
+          int temp_i = inds[k - 1];
+          inds[k - 1] = inds[k];
+          inds[k] = temp_i;
+        }
+
+      } /* Merge. */
+
+      /* Add a sentinel. */
+      c->stars.sort[j][count].d = FLT_MAX;
+      c->stars.sort[j][count].i = 0;
+
+      /* Mark as sorted. */
+      atomic_or(&c->stars.sorted, 1 << j);
+
+    } /* loop over sort arrays. */
+
+  } /* progeny? */
+
+  /* Otherwise, just sort. */
+  else {
+
+    /* Reset the sort distance */
+    if (c->stars.sorted == 0) {
+
+      /* And the individual sort distances if we are a local cell */
+      for (int k = 0; k < count; k++) {
+        sparts[k].x_diff_sort[0] = 0.0f;
+        sparts[k].x_diff_sort[1] = 0.0f;
+        sparts[k].x_diff_sort[2] = 0.0f;
+      }
+      c->stars.dx_max_sort_old = 0.f;
+      c->stars.dx_max_sort = 0.f;
+    }
+
+    /* Fill the sort array. */
+    for (int k = 0; k < count; k++) {
+      const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]};
+      for (int j = 0; j < 13; j++)
+        if (flags & (1 << j)) {
+          c->stars.sort[j][k].i = k;
+          c->stars.sort[j][k].d = px[0] * runner_shift[j][0] +
+                                  px[1] * runner_shift[j][1] +
+                                  px[2] * runner_shift[j][2];
+        }
+    }
+
+    /* Add the sentinel and sort. */
+    for (int j = 0; j < 13; j++)
+      if (flags & (1 << j)) {
+        c->stars.sort[j][count].d = FLT_MAX;
+        c->stars.sort[j][count].i = 0;
+        runner_do_sort_ascending(c->stars.sort[j], count);
+        atomic_or(&c->stars.sorted, 1 << j);
+      }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify the sorting. */
+  for (int j = 0; j < 13; j++) {
+    if (!(flags & (1 << j))) continue;
+    struct sort_entry *finger = c->stars.sort[j];
+    for (int k = 1; k < count; k++) {
+      if (finger[k].d < finger[k - 1].d)
+        error("Sorting failed, ascending array.");
+      if (finger[k].i >= count) error("Sorting failed, indices borked.");
+    }
+  }
+
+  /* Make sure the sort flags are consistent (downward). */
+  runner_check_sorts_stars(c, flags);
+
+  /* Make sure the sort flags are consistent (upward). */
+  for (struct cell *finger = c->parent; finger != NULL;
+       finger = finger->parent) {
+    if (finger->stars.sorted & ~c->stars.sorted)
+      error("Inconsistent sort flags.");
+  }
+#endif
+
+  /* Clear the cell's sort flags. */
+  c->stars.do_sort = 0;
+  cell_clear_flag(c, cell_flag_do_stars_sub_sort);
+  c->stars.requires_sorts = 0;
+
+  if (clock) TIMER_TOC(timer_do_stars_sort);
+}
+
+/**
+ * @brief Recurse into a cell until reaching the super level and call
+ * the hydro sorting function there.
+ *
+ * This function must be called at or above the super level!
+ *
+ * This function will sort the particles in all 13 directions.
+ *
+ * @param r the #runner.
+ * @param c the #cell.
+ */
+void runner_do_all_hydro_sort(struct runner *r, struct cell *c) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
+#endif
+
+  if (!cell_is_active_hydro(c, r->e)) return;
+
+  /* Shall we sort at this level? */
+  if (c->hydro.super == c) {
+
+    /* Sort everything */
+    runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
+
+  } else {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (c->hydro.super != NULL) error("Function called below the super level!");
+#endif
+
+    /* Ok, then, let's try lower */
+    if (c->split) {
+      for (int k = 0; k < 8; ++k) {
+        if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]);
+      }
+    } else {
+#ifdef SWIFT_DEBUG_CHECKS
+      error("Reached a leaf without encountering a hydro super cell!");
+#endif
+    }
+  }
+}
+
+/**
+ * @brief Recurse into a cell until reaching the super level and call
+ * the star sorting function there.
+ *
+ * This function must be called at or above the super level!
+ *
+ * This function will sort the particles in all 13 directions.
+ *
+ * @param r the #runner.
+ * @param c the #cell.
+ */
+void runner_do_all_stars_sort(struct runner *r, struct cell *c) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
+#endif
+
+  if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return;
+
+  /* Shall we sort at this level? */
+  if (c->hydro.super == c) {
+
+    /* Sort everything */
+    runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
+
+  } else {
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (c->hydro.super != NULL) error("Function called below the super level!");
+#endif
+
+    /* Ok, then, let's try lower */
+    if (c->split) {
+      for (int k = 0; k < 8; ++k) {
+        if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]);
+      }
+    } else {
+#ifdef SWIFT_DEBUG_CHECKS
+      error("Reached a leaf without encountering a hydro super cell!");
+#endif
+    }
+  }
+}
diff --git a/src/runner_time_integration.c b/src/runner_time_integration.c
new file mode 100644
index 0000000000000000000000000000000000000000..e1f5de709da804330953b47a647d0f0ce13de7bb
--- /dev/null
+++ b/src/runner_time_integration.c
@@ -0,0 +1,987 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "runner.h"
+
+/* Local headers. */
+#include "active.h"
+#include "black_holes.h"
+#include "cell.h"
+#include "engine.h"
+#include "kick.h"
+#include "timers.h"
+#include "timestep.h"
+#include "timestep_limiter.h"
+#include "tracers.h"
+
+/**
+ * @brief Initialize the multipoles before the gravity calculation.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_init_grav(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!(e->policy & engine_policy_self_gravity))
+    error("Grav-init task called outside of self-gravity calculation");
+#endif
+
+  /* Anything to do here? */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Reset the gravity acceleration tensors */
+  gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current);
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_init_grav);
+}
+
+/**
+ * @brief Perform the first half-kick on all the active particles in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  struct gpart *restrict gparts = c->grav.parts;
+  struct spart *restrict sparts = c->stars.parts;
+  const int count = c->hydro.count;
+  const int gcount = c->grav.count;
+  const int scount = c->stars.count;
+  const integertime_t ti_current = e->ti_current;
+  const double time_base = e->time_base;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) &&
+      !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e))
+    return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be kicked */
+      if (part_is_starting(p, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->wakeup == time_bin_awake)
+          error("Woken-up particle that has not been processed in kick1");
+#endif
+
+        /* Skip particles that have been woken up and treated by the limiter. */
+        if (p->wakeup != time_bin_not_awake) continue;
+
+        const integertime_t ti_step = get_integer_timestep(p->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current + 1, p->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end = ti_begin + ti_step;
+
+        if (ti_begin != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
+        if (with_cosmology) {
+          dt_kick_hydro = cosmology_get_hydro_kick_factor(
+              cosmo, ti_begin, ti_begin + ti_step / 2);
+          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+          dt_kick_therm = cosmology_get_therm_kick_factor(
+              cosmo, ti_begin, ti_begin + ti_step / 2);
+          dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+        } else {
+          dt_kick_hydro = (ti_step / 2) * time_base;
+          dt_kick_grav = (ti_step / 2) * time_base;
+          dt_kick_therm = (ti_step / 2) * time_base;
+          dt_kick_corr = (ti_step / 2) * time_base;
+        }
+
+        /* do the kick */
+        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
+                  dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin,
+                  ti_begin + ti_step / 2);
+
+        /* Update the accelerations to be used in the drift for hydro */
+        if (p->gpart != NULL) {
+
+          xp->a_grav[0] = p->gpart->a_grav[0];
+          xp->a_grav[1] = p->gpart->a_grav[1];
+          xp->a_grav[2] = p->gpart->a_grav[2];
+        }
+      }
+    }
+
+    /* Loop over the gparts in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if ((gp->type == swift_type_dark_matter ||
+           gp->type == swift_type_dark_matter_background) &&
+          gpart_is_starting(gp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current + 1, gp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current + 1, gp->time_bin);
+
+        if (ti_begin != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, gp->time_bin, ti_current);
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* do the kick */
+        kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
+      }
+    }
+
+    /* Loop over the stars particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the s-part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* If particle needs to be kicked */
+      if (spart_is_starting(sp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current + 1, sp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current + 1, sp->time_bin);
+
+        if (ti_begin != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, sp->time_bin, ti_current);
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin,
+                                                        ti_begin + ti_step / 2);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* do the kick */
+        kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_kick1);
+}
+
+/**
+ * @brief Perform the second half-kick on all the active particles in a cell.
+ *
+ * Also prepares particles to be drifted.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct entropy_floor_properties *entropy_floor = e->entropy_floor;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const int count = c->hydro.count;
+  const int gcount = c->grav.count;
+  const int scount = c->stars.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  struct gpart *restrict gparts = c->grav.parts;
+  struct spart *restrict sparts = c->stars.parts;
+  const integertime_t ti_current = e->ti_current;
+  const double time_base = e->time_base;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
+      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e))
+    return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be kicked */
+      if (part_is_active(p, e)) {
+
+        integertime_t ti_begin, ti_end, ti_step;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->wakeup == time_bin_awake)
+          error("Woken-up particle that has not been processed in kick1");
+#endif
+
+        if (p->wakeup == time_bin_not_awake) {
+
+          /* Time-step from a regular kick */
+          ti_step = get_integer_timestep(p->time_bin);
+          ti_begin = get_integer_time_begin(ti_current, p->time_bin);
+          ti_end = ti_begin + ti_step;
+
+        } else {
+
+          /* Time-step that follows a wake-up call */
+          ti_begin = get_integer_time_begin(ti_current, p->wakeup);
+          ti_end = get_integer_time_end(ti_current, p->time_bin);
+          ti_step = ti_end - ti_begin;
+
+          /* Reset the flag. Everything is back to normal from now on. */
+          p->wakeup = time_bin_awake;
+        }
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
+              "time_bin=%d wakeup=%d ti_current=%lld",
+              ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
+#endif
+        /* Time interval for this half-kick */
+        double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
+        if (with_cosmology) {
+          dt_kick_hydro = cosmology_get_hydro_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+          dt_kick_grav = cosmology_get_grav_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+          dt_kick_therm = cosmology_get_therm_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+          dt_kick_corr = cosmology_get_corr_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_end);
+        } else {
+          dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+        }
+
+        /* Finish the time-step with a second half-kick */
+        kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
+                  dt_kick_corr, cosmo, hydro_props, entropy_floor,
+                  ti_begin + ti_step / 2, ti_end);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that kick and the drift are synchronized */
+        if (p->ti_drift != p->ti_kick) error("Error integrating part in time.");
+#endif
+
+        /* Prepare the values to be drifted */
+        hydro_reset_predicted_values(p, xp, cosmo);
+      }
+    }
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if ((gp->type == swift_type_dark_matter ||
+           gp->type == swift_type_dark_matter_background) &&
+          gpart_is_active(gp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, gp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error("Particle in wrong time-bin");
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* Finish the time-step with a second half-kick */
+        kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2,
+                   ti_begin + ti_step);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that kick and the drift are synchronized */
+        if (gp->ti_drift != gp->ti_kick)
+          error("Error integrating g-part in time.");
+#endif
+
+        /* Prepare the values to be drifted */
+        gravity_reset_predicted_values(gp);
+      }
+    }
+
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* If particle needs to be kicked */
+      if (spart_is_active(sp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, sp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error("Particle in wrong time-bin");
+#endif
+
+        /* Time interval for this half-kick */
+        double dt_kick_grav;
+        if (with_cosmology) {
+          dt_kick_grav = cosmology_get_grav_kick_factor(
+              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+        } else {
+          dt_kick_grav = (ti_step / 2) * time_base;
+        }
+
+        /* Finish the time-step with a second half-kick */
+        kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2,
+                   ti_begin + ti_step);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that kick and the drift are synchronized */
+        if (sp->ti_drift != sp->ti_kick)
+          error("Error integrating s-part in time.");
+#endif
+
+        /* Prepare the values to be drifted */
+        stars_reset_predicted_values(sp);
+      }
+    }
+  }
+  if (timer) TIMER_TOC(timer_kick2);
+}
+
+/**
+ * @brief Computes the next time-step of all active particles in this cell
+ * and update the cell's statistics.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
+  const int count = c->hydro.count;
+  const int gcount = c->grav.count;
+  const int scount = c->stars.count;
+  const int bcount = c->black_holes.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+  struct gpart *restrict gparts = c->grav.parts;
+  struct spart *restrict sparts = c->stars.parts;
+  struct bpart *restrict bparts = c->black_holes.parts;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) &&
+      !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) {
+    c->hydro.updated = 0;
+    c->grav.updated = 0;
+    c->stars.updated = 0;
+    c->black_holes.updated = 0;
+    return;
+  }
+
+  int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+  integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0,
+                ti_stars_beg_max = 0;
+  integertime_t ti_black_holes_end_min = max_nr_timesteps,
+                ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0;
+
+  /* No children? */
+  if (!c->split) {
+
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs updating */
+      if (part_is_active(p, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, p->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_part_timestep(p, xp, e);
+
+        /* Update particle */
+        p->time_bin = get_time_bin(ti_new_step);
+        if (p->gpart != NULL) p->gpart->time_bin = p->time_bin;
+
+        /* Update the tracers properties */
+        tracers_after_timestep(p, xp, e->internal_units, e->physical_constants,
+                               with_cosmology, e->cosmology,
+                               e->hydro_properties, e->cooling_func, e->time);
+
+        /* Number of updated particles */
+        updated++;
+        if (p->gpart != NULL) g_updated++;
+
+        /* What is the next sync-point ? */
+        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
+
+        if (p->gpart != NULL) {
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+        }
+      }
+
+      else { /* part is inactive */
+
+        if (!part_is_inhibited(p, e)) {
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, p->time_bin);
+
+          const integertime_t ti_beg =
+              get_integer_time_begin(ti_current + 1, p->time_bin);
+
+          /* What is the next sync-point ? */
+          ti_hydro_end_min = min(ti_end, ti_hydro_end_min);
+          ti_hydro_end_max = max(ti_end, ti_hydro_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max);
+
+          if (p->gpart != NULL) {
+
+            /* What is the next sync-point ? */
+            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+            /* What is the next starting point for this cell ? */
+            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+          }
+        }
+      }
+    }
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart */
+      if (gp->type == swift_type_dark_matter ||
+          gp->type == swift_type_dark_matter_background) {
+
+        /* need to be updated ? */
+        if (gpart_is_active(gp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Current end of time-step */
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, gp->time_bin);
+
+          if (ti_end != ti_current)
+            error("Computing time-step of rogue particle.");
+#endif
+
+          /* Get new time-step */
+          const integertime_t ti_new_step = get_gpart_timestep(gp, e);
+
+          /* Update particle */
+          gp->time_bin = get_time_bin(ti_new_step);
+
+          /* Number of updated g-particles */
+          g_updated++;
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+
+        } else { /* gpart is inactive */
+
+          if (!gpart_is_inhibited(gp, e)) {
+
+            const integertime_t ti_end =
+                get_integer_time_end(ti_current, gp->time_bin);
+
+            /* What is the next sync-point ? */
+            ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+            ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+            const integertime_t ti_beg =
+                get_integer_time_begin(ti_current + 1, gp->time_bin);
+
+            /* What is the next starting point for this cell ? */
+            ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+          }
+        }
+      }
+    }
+
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* need to be updated ? */
+      if (spart_is_active(sp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, sp->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_spart_timestep(sp, e);
+
+        /* Update particle */
+        sp->time_bin = get_time_bin(ti_new_step);
+        sp->gpart->time_bin = get_time_bin(ti_new_step);
+
+        /* Number of updated s-particles */
+        s_updated++;
+        g_updated++;
+
+        ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min);
+        ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max);
+        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
+        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_stars_beg_max = max(ti_current, ti_stars_beg_max);
+        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+
+        /* star particle is inactive but not inhibited */
+      } else {
+
+        if (!spart_is_inhibited(sp, e)) {
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, sp->time_bin);
+
+          const integertime_t ti_beg =
+              get_integer_time_begin(ti_current + 1, sp->time_bin);
+
+          ti_stars_end_min = min(ti_end, ti_stars_end_min);
+          ti_stars_end_max = max(ti_end, ti_stars_end_max);
+          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_stars_beg_max = max(ti_beg, ti_stars_beg_max);
+          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+        }
+      }
+    }
+
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < bcount; k++) {
+
+      /* Get a handle on the part. */
+      struct bpart *restrict bp = &bparts[k];
+
+      /* need to be updated ? */
+      if (bpart_is_active(bp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, bp->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_bpart_timestep(bp, e);
+
+        /* Update particle */
+        bp->time_bin = get_time_bin(ti_new_step);
+        bp->gpart->time_bin = get_time_bin(ti_new_step);
+
+        /* Number of updated s-particles */
+        b_updated++;
+        g_updated++;
+
+        ti_black_holes_end_min =
+            min(ti_current + ti_new_step, ti_black_holes_end_min);
+        ti_black_holes_end_max =
+            max(ti_current + ti_new_step, ti_black_holes_end_max);
+        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
+        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max);
+        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+
+        /* star particle is inactive but not inhibited */
+      } else {
+
+        if (!bpart_is_inhibited(bp, e)) {
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, bp->time_bin);
+
+          const integertime_t ti_beg =
+              get_integer_time_begin(ti_current + 1, bp->time_bin);
+
+          ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min);
+          ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max);
+          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max);
+          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+        }
+      }
+    }
+
+  } else {
+
+    /* Loop over the progeny. */
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Recurse */
+        runner_do_timestep(r, cp, 0);
+
+        /* And aggregate */
+        updated += cp->hydro.updated;
+        g_updated += cp->grav.updated;
+        s_updated += cp->stars.updated;
+        b_updated += cp->black_holes.updated;
+
+        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
+        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
+        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
+
+        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
+        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
+        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
+
+        ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min);
+        ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max);
+        ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max);
+
+        ti_black_holes_end_min =
+            min(cp->black_holes.ti_end_min, ti_black_holes_end_min);
+        ti_black_holes_end_max =
+            max(cp->grav.ti_end_max, ti_black_holes_end_max);
+        ti_black_holes_beg_max =
+            max(cp->grav.ti_beg_max, ti_black_holes_beg_max);
+      }
+    }
+  }
+
+  /* Store the values. */
+  c->hydro.updated = updated;
+  c->grav.updated = g_updated;
+  c->stars.updated = s_updated;
+  c->black_holes.updated = b_updated;
+
+  c->hydro.ti_end_min = ti_hydro_end_min;
+  c->hydro.ti_end_max = ti_hydro_end_max;
+  c->hydro.ti_beg_max = ti_hydro_beg_max;
+  c->grav.ti_end_min = ti_gravity_end_min;
+  c->grav.ti_end_max = ti_gravity_end_max;
+  c->grav.ti_beg_max = ti_gravity_beg_max;
+  c->stars.ti_end_min = ti_stars_end_min;
+  c->stars.ti_end_max = ti_stars_end_max;
+  c->stars.ti_beg_max = ti_stars_beg_max;
+  c->black_holes.ti_end_min = ti_black_holes_end_min;
+  c->black_holes.ti_end_max = ti_black_holes_end_max;
+  c->black_holes.ti_beg_max = ti_black_holes_beg_max;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->hydro.ti_end_min == e->ti_current &&
+      c->hydro.ti_end_min < max_nr_timesteps)
+    error("End of next hydro step is current time!");
+  if (c->grav.ti_end_min == e->ti_current &&
+      c->grav.ti_end_min < max_nr_timesteps)
+    error("End of next gravity step is current time!");
+  if (c->stars.ti_end_min == e->ti_current &&
+      c->stars.ti_end_min < max_nr_timesteps)
+    error("End of next stars step is current time!");
+  if (c->black_holes.ti_end_min == e->ti_current &&
+      c->black_holes.ti_end_min < max_nr_timesteps)
+    error("End of next black holes step is current time!");
+#endif
+
+  if (timer) TIMER_TOC(timer_timestep);
+}
+
+/**
+ * @brief Apply the time-step limiter to all awaken particles in a cell
+ * hierarchy.
+ *
+ * @param r The task #runner.
+ * @param c The #cell.
+ * @param force Limit the particles irrespective of the #cell flags.
+ * @param timer Are we timing this ?
+ */
+void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int count = c->hydro.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we only limit local cells. */
+  if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope.");
+#endif
+
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+
+  /* Limit irrespective of cell flags? */
+  force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter));
+
+  /* Early abort? */
+  if (c->hydro.count == 0) {
+
+    /* Clear the limiter flags. */
+    cell_clear_flag(
+        c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Recurse */
+        runner_do_limiter(r, cp, force, 0);
+
+        /* And aggregate */
+        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
+        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
+        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
+        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
+        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
+        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
+      }
+    }
+
+    /* Store the updated values */
+    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
+    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
+    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
+    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
+    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
+    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
+
+  } else if (!c->split && force) {
+
+    ti_hydro_end_min = c->hydro.ti_end_min;
+    ti_hydro_end_max = c->hydro.ti_end_max;
+    ti_hydro_beg_max = c->hydro.ti_beg_max;
+    ti_gravity_end_min = c->grav.ti_end_min;
+    ti_gravity_end_max = c->grav.ti_end_max;
+    ti_gravity_beg_max = c->grav.ti_beg_max;
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* Avoid inhibited particles */
+      if (part_is_inhibited(p, e)) continue;
+
+      /* If the particle will be active no need to wake it up */
+      if (part_is_active(p, e) && p->wakeup != time_bin_not_awake)
+        p->wakeup = time_bin_not_awake;
+
+      /* Bip, bip, bip... wake-up time */
+      if (p->wakeup <= time_bin_awake) {
+
+        /* Apply the limiter and get the new time-step size */
+        const integertime_t ti_new_step = timestep_limit_part(p, xp, e);
+
+        /* What is the next sync-point ? */
+        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
+
+        /* Also limit the gpart counter-part */
+        if (p->gpart != NULL) {
+
+          /* Register the time-bin */
+          p->gpart->time_bin = p->time_bin;
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+        }
+      }
+    }
+
+    /* Store the updated values */
+    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
+    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
+    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
+    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
+    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
+    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
+  }
+
+  /* Clear the limiter flags. */
+  cell_clear_flag(c,
+                  cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter);
+
+  if (timer) TIMER_TOC(timer_do_limiter);
+}
diff --git a/src/scheduler.c b/src/scheduler.c
index 85c3727a1ebe9297943bf74a5b407ec5b5e46322..1fad63fd7141db2aad486aaaa7e4dc877a8aa3b8 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -601,7 +601,10 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
           /* Add the self tasks. */
           int first_child = 0;
           while (ci->progeny[first_child] == NULL) first_child++;
+
           t->ci = ci->progeny[first_child];
+          cell_set_flag(t->ci, cell_flag_has_tasks);
+
           for (int k = first_child + 1; k < 8; k++) {
             /* Do we have a non-empty progenitor? */
             if (ci->progeny[k] != NULL &&
@@ -711,8 +714,12 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
           /* Loop over the sub-cell pairs for the current sid and add new tasks
            * for them. */
           struct cell_split_pair *csp = &cell_split_pairs[sid];
+
           t->ci = ci->progeny[csp->pairs[0].pid];
           t->cj = cj->progeny[csp->pairs[0].pjd];
+          cell_set_flag(t->ci, cell_flag_has_tasks);
+          cell_set_flag(t->cj, cell_flag_has_tasks);
+
           t->flags = csp->pairs[0].sid;
           for (int k = 1; k < csp->count; k++) {
             scheduler_splittask_hydro(
@@ -796,7 +803,9 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
           /* Add the self tasks. */
           int first_child = 0;
           while (ci->progeny[first_child] == NULL) first_child++;
+
           t->ci = ci->progeny[first_child];
+          cell_set_flag(t->ci, cell_flag_has_tasks);
 
           for (int k = first_child + 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
@@ -1100,6 +1109,9 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
   t->tic = 0;
   t->toc = 0;
 
+  if (ci != NULL) cell_set_flag(ci, cell_flag_has_tasks);
+  if (cj != NULL) cell_set_flag(cj, cell_flag_has_tasks);
+
   /* Add an index for it. */
   // lock_lock( &s->lock );
   s->tasks_ind[atomic_inc(&s->nr_tasks)] = ind;
@@ -1589,14 +1601,6 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements,
  * @param s The #scheduler.
  */
 void scheduler_start(struct scheduler *s) {
-  /* Reset all task timers. */
-  for (int i = 0; i < s->nr_tasks; ++i) {
-    s->tasks[i].tic = 0;
-    s->tasks[i].toc = 0;
-#ifdef SWIFT_DEBUG_TASKS
-    s->tasks[i].rid = -1;
-#endif
-  }
 
   /* Re-wait the tasks. */
   if (s->active_count > 1000) {
diff --git a/src/space.c b/src/space.c
index a417117f6e3fa92e1a491efbc11f70c7c9e9ef97..eb498035d7c912f331870cfb0bb8bf84ad1559c4 100644
--- a/src/space.c
+++ b/src/space.c
@@ -5670,14 +5670,15 @@ void space_write_cell(const struct space *s, FILE *f, const struct cell *c) {
  * @brief Write a csv file containing the cell hierarchy
  *
  * @param s The #space.
+ * @param j The file number.
  */
-void space_write_cell_hierarchy(const struct space *s) {
+void space_write_cell_hierarchy(const struct space *s, int j) {
 
 #ifdef SWIFT_CELL_GRAPH
 
   /* Open file */
   char filename[200];
-  sprintf(filename, "cell_hierarchy_%04i.csv", engine_rank);
+  sprintf(filename, "cell_hierarchy_%04i_%04i.csv", j, engine_rank);
   FILE *f = fopen(filename, "w");
   if (f == NULL) error("Error opening task level file.");
 
diff --git a/src/space.h b/src/space.h
index 0b332716645e733636b7ab0da57a0a31b28e3d31..ad20641e4dc11559d33f512794fddf1b7453317a 100644
--- a/src/space.h
+++ b/src/space.h
@@ -374,6 +374,6 @@ void space_free_foreign_parts(struct space *s);
 
 void space_struct_dump(struct space *s, FILE *stream);
 void space_struct_restore(struct space *s, FILE *stream);
-void space_write_cell_hierarchy(const struct space *s);
+void space_write_cell_hierarchy(const struct space *s, int j);
 
 #endif /* SWIFT_SPACE_H */
diff --git a/src/stars/EAGLE/stars.h b/src/stars/EAGLE/stars.h
index f102ddb22c0075d33b02c726d0447b64fa9d3df7..82ad2e25f474e6c66e6cf5e09f39188faa09b084 100644
--- a/src/stars/EAGLE/stars.h
+++ b/src/stars/EAGLE/stars.h
@@ -65,7 +65,7 @@ __attribute__((always_inline)) INLINE static void stars_first_init_spart(
   sp->time_bin = 0;
   sp->birth_density = 0.f;
   sp->f_E = -1.f;
-  if (stars_properties->spart_first_init_birth_time != -1.f)
+  if (stars_properties->overwrite_birth_time)
     sp->birth_time = stars_properties->spart_first_init_birth_time;
 
   stars_init_spart(sp);
diff --git a/src/stars/EAGLE/stars_io.h b/src/stars/EAGLE/stars_io.h
index b91b5cf94595a05acd280cfd4f51755f91cce04d..0baafd380addfa1d6f8d60491be3da4c30b2a3aa 100644
--- a/src/stars/EAGLE/stars_io.h
+++ b/src/stars/EAGLE/stars_io.h
@@ -217,10 +217,15 @@ INLINE static void stars_props_init(struct stars_props *sp,
   else
     sp->log_max_h_change = logf(powf(max_volume_change, hydro_dimension_inv));
 
-  /* Read birth time to set all stars in ICs to (defaults to -1 to indicate star
-   * present in ICs) */
-  sp->spart_first_init_birth_time =
-      parser_get_opt_param_float(params, "Stars:birth_time", -1.f);
+  /* Do we want to overwrite the stars' birth time? */
+  sp->overwrite_birth_time =
+      parser_get_opt_param_int(params, "Stars:overwrite_birth_time", 0);
+
+  /* Read birth time to set all stars in ICs */
+  if (sp->overwrite_birth_time) {
+    sp->spart_first_init_birth_time =
+        parser_get_param_float(params, "Stars:birth_time");
+  }
 }
 
 /**
@@ -244,6 +249,10 @@ INLINE static void stars_props_print(const struct stars_props *sp) {
 
   message("Maximal iterations in ghost task set to %d",
           sp->max_smoothing_iterations);
+
+  if (sp->overwrite_birth_time)
+    message("Stars' birth time read from the ICs will be overwritten to %f",
+            sp->spart_first_init_birth_time);
 }
 
 #if defined(HAVE_HDF5)
diff --git a/src/stars/EAGLE/stars_part.h b/src/stars/EAGLE/stars_part.h
index 4502b10edb7b646e9ba845e1ffffbb9255cdc01c..9114cb9107e1259698c365be82e5318d60d37ac7 100644
--- a/src/stars/EAGLE/stars_part.h
+++ b/src/stars/EAGLE/stars_part.h
@@ -144,7 +144,7 @@ struct stars_props {
   /*! Smoothing length tolerance */
   float h_tolerance;
 
-  /*! Tolerance on neighbour number  (for info only)*/
+  /*! Tolerance on neighbour number  (for info only) */
   float delta_neighbours;
 
   /*! Maximal number of iterations to converge h */
@@ -153,7 +153,10 @@ struct stars_props {
   /*! Maximal change of h over one time-step */
   float log_max_h_change;
 
-  /*! Value to set birth time of stars read from ICs if not set to -1 */
+  /*! Are we overwriting the stars' birth time read from the ICs? */
+  int overwrite_birth_time;
+
+  /*! Value to set birth time of stars read from ICs */
   float spart_first_init_birth_time;
 };
 
diff --git a/src/task.c b/src/task.c
index 643f084b1fa4fc530125128e694a7012de3f302f..4d6cfa2482491b1a08f6b28f7188fb94448afb2e 100644
--- a/src/task.c
+++ b/src/task.c
@@ -893,7 +893,7 @@ void task_dump_all(struct engine *e, int step) {
 #ifdef SWIFT_DEBUG_TASKS
 
   /* Need this to convert ticks to seconds. */
-  unsigned long long cpufreq = clocks_get_cpufreq();
+  const unsigned long long cpufreq = clocks_get_cpufreq();
 
 #ifdef WITH_MPI
   /* Make sure output file is empty, only on one rank. */
@@ -926,7 +926,8 @@ void task_dump_all(struct engine *e, int step) {
               e->s_updates, cpufreq);
       int count = 0;
       for (int l = 0; l < e->sched.nr_tasks; l++) {
-        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
+        if (!e->sched.tasks[l].implicit &&
+            e->sched.tasks[l].tic > e->tic_step) {
           fprintf(
               file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
               engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
@@ -966,7 +967,7 @@ void task_dump_all(struct engine *e, int step) {
           (unsigned long long)e->toc_step, e->updates, e->g_updates,
           e->s_updates, 0, cpufreq);
   for (int l = 0; l < e->sched.nr_tasks; l++) {
-    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
+    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
       fprintf(
           file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
           e->sched.tasks[l].rid, e->sched.tasks[l].type,
@@ -1037,8 +1038,8 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header,
   for (int l = 0; l < e->sched.nr_tasks; l++) {
     int type = e->sched.tasks[l].type;
 
-    /* Skip implicit tasks, tasks that didn't run. */
-    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
+    /* Skip implicit tasks, tasks that didn't run this step. */
+    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
       int subtype = e->sched.tasks[l].subtype;
 
       double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
diff --git a/src/timestep_limiter.h b/src/timestep_limiter.h
index d8555a352c8e1a799ac13d268932c9d37f30fe33..01b72daea5599b662c38fdc4b3ada8b2ac5b3d11 100644
--- a/src/timestep_limiter.h
+++ b/src/timestep_limiter.h
@@ -22,6 +22,9 @@
 /* Config parameters. */
 #include "../config.h"
 
+/* Local headers. */
+#include "kick.h"
+
 /**
  * @brief Wakes up a particle by rewinding it's kick1 back in time and applying
  * a new one such that the particle becomes active again in the next time-step.
diff --git a/tests/testLogger.c b/tests/testLogger.c
index c5be0d7cc18742bdc2fa6167462579c45fd43e92..d2c64e7fa3330ebd20cf8abc01a76e1dff08c8fc 100644
--- a/tests/testLogger.c
+++ b/tests/testLogger.c
@@ -32,8 +32,8 @@
 /* Local headers. */
 #include "swift.h"
 
-void test_log_parts(struct logger *log) {
-  struct dump *d = log->dump;
+void test_log_parts(struct logger_writer *log) {
+  struct dump *d = &log->dump;
 
   /* Write several copies of a part to the dump. */
   struct part p;
@@ -45,22 +45,27 @@ void test_log_parts(struct logger *log) {
   size_t offset = d->count;
 
   /* Write the full part. */
-  logger_log_part(log, &p,
-                  logger_mask_x | logger_mask_v | logger_mask_a |
-                      logger_mask_u | logger_mask_h | logger_mask_rho |
-                      logger_mask_consts,
-                  &offset);
+  logger_log_part(
+      log, &p,
+      logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask |
+          logger_mask_data[logger_a].mask | logger_mask_data[logger_u].mask |
+          logger_mask_data[logger_h].mask | logger_mask_data[logger_rho].mask |
+          logger_mask_data[logger_consts].mask,
+      &offset);
   printf("Wrote part at offset %#016zx.\n", offset);
 
   /* Write only the position. */
   p.x[0] = 2.0;
-  logger_log_part(log, &p, logger_mask_x, &offset);
+  logger_log_part(log, &p, logger_mask_data[logger_x].mask, &offset);
   printf("Wrote part at offset %#016zx.\n", offset);
 
   /* Write the position and velocity. */
   p.x[0] = 3.0;
   p.v[0] = 0.3;
-  logger_log_part(log, &p, logger_mask_x | logger_mask_v, &offset);
+  logger_log_part(
+      log, &p,
+      logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask,
+      &offset);
   printf("Wrote part at offset %#016zx.\n", offset);
 
   /* Recover the last part from the dump. */
@@ -103,8 +108,8 @@ void test_log_parts(struct logger *log) {
   }
 }
 
-void test_log_gparts(struct logger *log) {
-  struct dump *d = log->dump;
+void test_log_gparts(struct logger_writer *log) {
+  struct dump *d = &log->dump;
 
   /* Write several copies of a part to the dump. */
   struct gpart p;
@@ -116,21 +121,26 @@ void test_log_gparts(struct logger *log) {
   size_t offset = d->count;
 
   /* Write the full part. */
-  logger_log_gpart(log, &p,
-                   logger_mask_x | logger_mask_v | logger_mask_a |
-                       logger_mask_h | logger_mask_consts,
-                   &offset);
+  logger_log_gpart(
+      log, &p,
+      logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask |
+          logger_mask_data[logger_a].mask | logger_mask_data[logger_h].mask |
+          logger_mask_data[logger_consts].mask,
+      &offset);
   printf("Wrote gpart at offset %#016zx.\n", offset);
 
   /* Write only the position. */
   p.x[0] = 2.0;
-  logger_log_gpart(log, &p, logger_mask_x, &offset);
+  logger_log_gpart(log, &p, logger_mask_data[logger_x].mask, &offset);
   printf("Wrote gpart at offset %#016zx.\n", offset);
 
   /* Write the position and velocity. */
   p.x[0] = 3.0;
   p.v_full[0] = 0.3;
-  logger_log_gpart(log, &p, logger_mask_x | logger_mask_v, &offset);
+  logger_log_gpart(
+      log, &p,
+      logger_mask_data[logger_x].mask | logger_mask_data[logger_v].mask,
+      &offset);
   printf("Wrote gpart at offset %#016zx.\n", offset);
 
   /* Recover the last part from the dump. */
@@ -173,8 +183,8 @@ void test_log_gparts(struct logger *log) {
   }
 }
 
-void test_log_timestamps(struct logger *log) {
-  struct dump *d = log->dump;
+void test_log_timestamps(struct logger_writer *log) {
+  struct dump *d = &log->dump;
 
   /* The timestamp to log. */
   unsigned long long int t = 10;
@@ -245,7 +255,7 @@ void test_log_timestamps(struct logger *log) {
 int main(int argc, char *argv[]) {
 
   /* Prepare a logger. */
-  struct logger log;
+  struct logger_writer log;
   struct swift_params params;
   parser_read_file("logger.yml", &params);
   logger_init(&log, &params);
@@ -265,7 +275,7 @@ int main(int argc, char *argv[]) {
   remove(filename);
 
   /* Clean the logger. */
-  logger_clean(&log);
+  logger_free(&log);
 
   /* Return a happy number. */
   return 0;
diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat
index 95f5f78246a82b7c326c87f9b4edbac4f51c65e9..d6b21204ae9cec00f0d84a20e3c58bc34a4b4be1 100644
--- a/tests/tolerance_125_perturbed.dat
+++ b/tests/tolerance_125_perturbed.dat
@@ -1,4 +1,4 @@
 #   ID    pos_x    pos_y    pos_z      v_x      v_y      v_z        h      rho    div_v        S        u        P        c      a_x      a_y      a_z     h_dt    v_sig    dS/dt    du/dt
     0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 1e-4	  1e-4	   1e-4	   1e-4	   1e-4	    1e-4     1e-4
     0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 3.6e-3	  2e-3	   2e-3	   1e-4	   1e-4	    1e-4     1e-4
-    0	  1e-6	   1e-6	    1e-6       1e-6	1e-6	 1e-6	    1e-6   1e-6	  1e-6	       1e-6	1e-6	 1e-6	  1e-6	 2e-4	  2e-4	   2e-4	   1e-6	   1e-6	    1e-6     1e-6
+    0	  1e-6	   1e-6	    1e-6       1e-6	1e-6	 1e-6	    1e-6   1e-6	  1e-6	       1e-6	1e-6	 1e-6	  1e-6	 5e-4	  5e-4	   5e-4	   1e-6	   1e-6	    1e-6     1e-6
diff --git a/tools/make_cell_hierarchy.sh b/tools/make_cell_hierarchy.sh
old mode 100644
new mode 100755
index 87fbe4c97f4aadcbb9be5867a62e8acb56415820..9d1d3caf7c4e2f0514c3d6ad5b2db48efa8958d5
--- a/tools/make_cell_hierarchy.sh
+++ b/tools/make_cell_hierarchy.sh
@@ -9,7 +9,7 @@ then
    rm $csv_output
 fi
 
-for filename in ./cell_hierarchy_*.csv;
+for filename in $@;
 do
     cat $filename >> cell_hierarchy.csv
 done