diff --git a/.gitignore b/.gitignore
index 5615f036ef0f3a51db7c156afe69b8511e015d4c..5284b8c7208812c41b9044cd482e1047d3b13fd8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,7 +29,7 @@ examples/*/*.h5
 examples/*/*.png
 examples/*/*.mp4
 examples/*/*.txt
-examples/*/dependency_graph_*.csv
+examples/*/dependency_graph.csv
 examples/*/restart/*
 examples/*/used_parameters.yml
 examples/*/unused_parameters.yml
@@ -37,7 +37,6 @@ examples/*/*/*.xmf
 examples/*/*/*.png
 examples/*/*/*.mp4
 examples/*/*/*.txt
-examples/*/*/*.dot
 examples/*/*/*.rst
 examples/*/*/*.hdf5
 examples/*/snapshots*
diff --git a/README b/README
index 272188b3f7926b562ba993da3d24ae547c6a0397..b51abc121f7cc7c1b4baa851c02045b5f4614bbb 100644
--- a/README
+++ b/README
@@ -36,7 +36,8 @@ Parameters:
     -s, --hydro                       Run with hydrodynamics.
     -S, --stars                       Run with stars.
     -x, --velociraptor                Run with structure finding.
-
+    --limiter                         Run with time-step limiter.
+    
   Control options:
   
     -a, --pin                         Pin runners using processor affinity.
diff --git a/README.md b/README.md
index 7a3c1287c79922a751595840295063a8ca347ef7..29415f27ee62f154b01dcd6a65414d7288a0a63f 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,7 @@ Parameters:
     -s, --hydro                       Run with hydrodynamics.
     -S, --stars                       Run with stars.
     -x, --velociraptor                Run with structure finding.
+    --limiter                         Run with time-step limiter.
 
   Control options:
   
diff --git a/configure.ac b/configure.ac
index 04e55b047dd742a00d0629f6b3cce3f1e7fe8371..53ae0a717b60fc0e79b02e684de3e5eaf6b504fd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -957,7 +957,7 @@ if test "x$with_velociraptor" != "xno"; then
    AC_PROG_FC
    AC_FC_LIBRARY_LDFLAGS
    if test "x$with_velociraptor" != "xyes" -a "x$with_velociraptor" != "x"; then
-      VELOCIRAPTOR_LIBS="-L$with_velociraptor -lvelociraptor -lstdc++ -lhdf5_cpp"
+      VELOCIRAPTOR_LIBS="-L$with_velociraptor -lvelociraptor -lmpi -lstdc++ -lhdf5_cpp"
       CFLAGS="$CFLAGS -fopenmp"
    else
       VELOCIRAPTOR_LIBS=""
diff --git a/doc/RTD/source/CommandLineOptions/index.rst b/doc/RTD/source/CommandLineOptions/index.rst
index bd58f031e622272d0245599621fc635891588a8f..e2603532b4ed4e64c86887f2a4f7c35f80cb08bf 100644
--- a/doc/RTD/source/CommandLineOptions/index.rst
+++ b/doc/RTD/source/CommandLineOptions/index.rst
@@ -31,6 +31,7 @@ can be found by typing ``./swift -h``::
     -s, --hydro                       Run with hydrodynamics.
     -S, --stars                       Run with stars.
     -x, --velociraptor                Run with structure finding.
+    --limiter                         Run with time-step limiter.
 
   Control options:
 
diff --git a/doc/RTD/source/GettingStarted/compiling_code.rst b/doc/RTD/source/GettingStarted/compiling_code.rst
index a0ce1c08eaf6b08a298ac4b720017273d4fa6559..696d5a232b53205f9dbd6e03647d9da86e2b1ceb 100644
--- a/doc/RTD/source/GettingStarted/compiling_code.rst
+++ b/doc/RTD/source/GettingStarted/compiling_code.rst
@@ -24,6 +24,15 @@ MPI
 A recent implementation of MPI, such as Open MPI (v2.x or higher), is required,
 or any library that implements at least the MPI 3 standard.
 
+Running SWIFT on OmniPath atchitechtures with Open MPI
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When running SWIFT on an OmniPath system we suggest that Open MPI v3.1.3 or higher
+is used. A bug in the ``psm2`` library causes communications to be lost. It is
+possible to run SWIFT with older versions (tested with v2.1.x) of Open MPI so
+long as ``psm`` is used instead of ``psm2``, i.e. that you invoke ``mpirun``
+with ``--mca btl vader,self -mca mtl psm``.
+
 Libtool
 ~~~~~~~
 The build system depends on libtool.
diff --git a/doc/RTD/source/NewOption/index.rst b/doc/RTD/source/NewOption/index.rst
index 441cd860ed79dabad2005b39ae4549d1496ab98d..08f1ff04efa9508145c1f7e04d72d2f40fe22f0d 100644
--- a/doc/RTD/source/NewOption/index.rst
+++ b/doc/RTD/source/NewOption/index.rst
@@ -7,8 +7,8 @@ General information for adding new schemes
 ==========================================
 
 The following steps are required for any new options (such as new
-:ref:`hydro`, :ref:`chemistry`, :ref:`cooling`,
-:ref:`equation_of_state`, :ref:`stars` or :ref:`gravity`)
+:ref:`hydro`, chemistry, cooling,
+:ref:`equation_of_state`, stars, or gravity)
    
 In order to add a new scheme, you will need to:
 
diff --git a/doc/RTD/source/ParameterFiles/index.rst b/doc/RTD/source/ParameterFiles/index.rst
index 93cf9b6e86895f9f20c8d644d8d24ccab5df93d6..488e8d37d7fa530f6dcd536f6bb39debeaab9f25 100644
--- a/doc/RTD/source/ParameterFiles/index.rst
+++ b/doc/RTD/source/ParameterFiles/index.rst
@@ -1,496 +1,18 @@
 .. Parameter Files
-   Matthieu Schaller, 21st October 2018
+   Josh Borrow 22nd January 2019
 
 .. _Parameter_File_label:
 
 Parameter Files
 ===============
 
-File format and basic information
----------------------------------
-
-The parameter file uses a format similar to the `YAML format
-<https://en.wikipedia.org/wiki/YAML>`_ but reduced to only the
-elements required for the SWIFT parameters. Options are given by a
-name followed by a column and the value of the parameter:
-
-.. code:: YAML
-
-   ICs:        santa_barbara.hdf5	  
-   dt_max:     1.5
-   shift:      [2., 4., 5.]
-
-Comments can be inserted anywhere and start with a hash:
-
-.. code:: YAML
-
-   # Description of the physics
-   viscosity_alpha:     2.0
-   dt_max:              1.5     # seconds
-
-A typical SWIFT parameter file is split into multiple sections that
-may or may not be present depending on the different configuration
-options. The sections start with a label and can contain any number of
-parameters:
-
-.. code:: YAML
-
-   Cosmology:    # Planck13
-     Omega_m:        0.307
-     Omega_lambda:   0.693
-     Omega_b:        0.0455
-     h:              0.6777
-     a_begin:        0.0078125     # z = 127
-
-The options can be integer values, floating point numbers, characters
-or strings. If SWIFT expects a number and string is given, an error
-will be raised. The code can also read an array of values:
-
-.. code:: YAML
-
-   shift:  [2., 4., 5.]
-	  
-Some options in the parameter file are optional and
-when not provided, SWIFT will run with the default value. However, if
-a compulsory parameter is missing an error will be raised at
-start-up.
-
-Finally, SWIFT outputs two YAML files at the start of a run. The first
-one ``used_parameters.yml`` contains all the parameters that were used
-for this run, **including all the optional parameters with their
-default values**. This file can be used to start an exact copy of the
-run. The second file, ``unused_parameters.yml`` contains all the
-values that were not read from the parameter file. This can be used to
-simplify the parameter file or check that nothing important was
-ignored (for instance because the code is not configured to use some
-options).
-
-The rest of this page describes all the SWIFT parameters, split by
-section. A list of all the possible parameters is kept in the file
-``examples/parameter_examples.yml``.
-
-Internal Unit System
---------------------
-
-The ``InternalUnitSystem`` section describes the units used internally by the
-code. This is the system of units in which all the equations are solved. All
-physical constants are converted to this system and if the ICs use a different
-system (see :ref:`ICs_units_label`) the particle quantities will be converted
-when read in.
-
-The system of units is described using the value of the 5 basic units
-of any system with respect to the CGS system. Instead of using a unit
-of time we use a unit of velocity as this is more intuitive. Users
-hence need to provide:
-
-* a unit of length: ``UnitLength_in_cgs``,
-* a unit of mass: ``UnitMass_in_cgs``,
-* a unit of velocity ``UnitVelocity_in_cgs``,
-* a unit of electric current ``UnitCurrent_in_cgs``,
-* a unit of temperature ``UnitTemp_in_cgs``.
-
-All these need to be expressed with respect to their cgs counter-part
-(i.e. :math:`cm`, :math:`g`, :math:`cm/s`, :math:`A` and :math:`K`). Recall
-that there are no h-factors in any of SWIFT's quantities; we, for instance,
-use :math:`cm` and not :math:`cm/h`.
-
-For instance to use the commonly adopted system of 10^10 Msun as a
-unit for mass, mega-parsec as a unit of length and km/s as a unit of
-speed, we would use:
-
-.. code:: YAML
-
-   # Common unit system for cosmo sims
-   InternalUnitSystem:
-     UnitMass_in_cgs:     1.98848e43    # 10^10 M_sun in grams
-     UnitLength_in_cgs:   3.08567758e24 # 1 Mpc in centimeters
-     UnitVelocity_in_cgs: 1e5           # 1 km/s in centimeters per second
-     UnitCurrent_in_cgs:  1             # 1 Ampere
-     UnitTemp_in_cgs:     1             # 1 Kelvin   
-	  
-Note that there are currently no variables in any of the SWIFT physics
-schemes that make use of the unit of electric current. There is also
-no incentive to use anything else than Kelvin but that makes the whole
-system consistent with any possible unit system.
-
-If one is interested in using the more humorous `FFF unit
-system <https://en.wikipedia.org/wiki/FFF_system>`_ one would use
-
-.. code:: YAML
-
-   # FFF unit system
-   InternalUnitSystem:
-     UnitMass_in_cgs:     40823.3133  # 1 Firkin (fir) in grams
-     UnitLength_in_cgs:   20116.8     # 1 Furlong (fur) in cm
-     UnitVelocity_in_cgs: 0.01663095  # 1 Furlong (fur) per Fortnight (ftn) in cm/s
-     UnitCurrent_in_cgs:  1           # 1 Ampere
-     UnitTemp_in_cgs:     1           # 1 Kelvin   
-
-The value of the physical constants in this system is left as an
-exercise for the reader [#f1]_.
-
-Cosmology
----------
-
-When running a cosmological simulation, the section ``Cosmology`` sets the values of the
-cosmological model. The expanded :math:`\Lambda\rm{CDM}` parameters governing the
-background evolution of the Universe need to be specified here. These are:
-
-* The reduced Hubble constant: :math:`h`: ``h``,
-* The matter density parameter :math:`\Omega_m`: ``Omega_m``,
-* The cosmological constant density parameter :math:`\Omega_\Lambda`: ``Omega_lambda``,
-* The baryon density parameter :math:`\Omega_b`: ``Omega_b``,
-* The radiation density parameter :math:`\Omega_r`: ``Omega_r``.
-
-The last parameter can be omitted and will default to :math:`\Omega_r = 0`. Note
-that SWIFT will verify on start-up that the matter content of the initial conditions
-matches the cosmology specified in this section.
-
-This section also specifies the start and end of the simulation expressed in
-terms of scale-factors. The two parameters are:
-
-* Initial scale-factor: ``a_begin``,
-* Final scale-factor: ``a_end``.
-
-Two additional optional parameters can be used to change the equation of
-state of dark energy :math:`w(a)`. We use the evolution law :math:`w(a) =
-w_0 + w_a (1 - a)`. The two parameters in the YAML file are:
-
-* The :math:`z=0` dark energy equation of state parameter :math:`w_0`: ``w_0``
-* The dark energy equation of state evolution parameter :math:`w_a`: ``w_a``
-
-If unspecified these parameters default to the default
-:math:`\Lambda\rm{CDM}` values of :math:`w_0 = -1` and :math:`w_a = 0`.
-
-For a Planck+13 cosmological model (ignoring radiation density as is
-commonly done) and running from :math:`z=127` to :math:`z=0`, one would hence
-use the following parameters:
-
-.. code:: YAML
-
-   Cosmology:
-     a_begin:        0.0078125     # z = 127
-     a_end:          1.0           # z = 0
-     h:              0.6777        
-     Omega_m:        0.307         
-     Omega_lambda:   0.693         
-     Omega_b:        0.0455        
-     Omega_r:        0.            # (Optional)
-     w_0:            -1.0          # (Optional)
-     w_a:            0.            # (Optional)
-
-When running a non-cosmological simulation (i.e. without the ``-c`` run-time
-flag) this section of the YAML file is entirely ignored.
-     
-Gravity
--------
-
-The behaviour of the self-gravity solver can be modified by the parameters
-provided in the ``Gravity`` section. The theory document puts these parameters into the
-context of the equations being solved. We give a brief overview here.
-
-* The Plummer-equivalent co-moving softening length used for all particles :math:`\epsilon_{com}`: ``comoving_softening``,
-* The Plummer-equivalent maximal physical softening length used for all particles :math:`\epsilon_{max}`: ``comoving_softening``, 
-
-At any redshift :math:`z`, the Plummer-equivalent softening length used by the
-code will be :math:`\epsilon=\min(\epsilon_{max},
-\frac{\epsilon_{com}}{z+1})`. This is expressed in internal units.
-
-* The opening angle (multipole acceptance criterion) used in the FMM :math:`\theta`: ``theta``,
-* The time-step size pre-factor :math:`\eta`: ``eta``,
-  
-The time-step of a given particle is given by :math:`\Delta t =
-\eta\sqrt{\frac{\epsilon}{|\overrightarrow{a}|}}`, where
-:math:`\overrightarrow{a}` is the particle's acceleration. Power et al. (2003) recommend using :math:`\eta=0.025`.
-The last tree-related parameter is
-
-* The tree rebuild frequency: ``rebuild_frequency``.
-
-The tree rebuild frequency is an optional parameter defaulting to
-:math:`0.01`. It is used to trigger the re-construction of the tree every time a
-fraction of the particles have been integrated (kicked) forward in time.
-
-Simulations using periodic boundary conditions use additional parameters for the
-Particle-Mesh part of the calculation. The last three are optional:
-
-* The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``,
-* The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm
-  smooth}`: ``a_smooth`` (default: ``1.25``),
-* The scale above which the short-range forces are assumed to be 0 (in units of
-  the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm
-  cut,max}`: ``r_cut_max`` (default: ``4.5``),
-* The scale below which the short-range forces are assumed to be exactly Newtonian (in units of
-  the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm
-  cut,min}`: ``r_cut_min`` (default: ``0.1``),
-  
-For most runs, the default values can be used. Only the number of cells along
-each axis needs to be specified. The remaining three values are best described
-in the context of the full set of equations in the theory documents.
-  
-As a summary, here are the values used for the EAGLE :math:`100^3~{\rm Mpc}^3`
-simulation:
-
-.. code:: YAML
-	  
-   # Parameters for the self-gravity scheme for the EAGLE-100 box
-   Gravity:
-     eta:          0.025              
-     theta:        0.7                
-     comoving_softening:     0.0026994  # 0.7 proper kpc at z=2.8.
-     max_physical_softening: 0.0007     # 0.7 proper kpc
-     rebuild_frequency:      0.01       # Default optional value
-     mesh_side_length:       512       
-     a_smooth:     1.25                 # Default optional value
-     r_cut_max:    4.5                  # Default optional value
-     r_cut_min:    0.1                  # Default optional value
-
-      
-SPH
----
-
-Time Integration
-----------------
-
-The ``TimeIntegration`` section is used to set some general parameters related to time
-integration. In all cases, users have to provide a minimal and maximal time-step
-size:
-
-* Maximal time-step size: ``dt_max``
-* Minimal time-step size: ``dt_min``
-
-These quantities are expressed in internal units. All particles will have their
-time-step limited by the maximal value on top of all the other criteria that may
-apply to them (gravity acceleration, Courant condition, etc.). If a particle
-demands a time-step size smaller than the minimum, SWIFT will abort with an
-error message. This is a safe-guard against simulations that would never
-complete due to the number of steps to run being too large.
-
-When running a non-cosmological simulation, the user also has to provide the
-time of the start and the time of the end of the simulation:
-
-* Start time: ``time_begin``
-* End time: ``time_end``
-
-Both are expressed in internal units. The start time is typically set to ``0``
-but SWIFT can handle any value here. For cosmological runs, these values are
-ignored and the start- and end-points of the runs are specified by the start and
-end scale-factors in the cosmology section of the parameter file.
-
-Additionally, when running a cosmological volume, advanced users can specify the
-value of the dimensionless pre-factor entering the time-step condition linked
-with the motion of particles with respect to the background expansion and mesh
-size. See the theory document for the exact equations.
-
-* Dimensionless pre-factor of the maximal allowed displacement:
-  ``max_dt_RMS_factor`` (default: ``0.25``)
-
-This value rarely needs altering.
-
-A full time-step section for a non-cosmological run would be:
-
-.. code:: YAML
-
-  TimeIntegration:
-    time_begin:   0    # Start time in internal units.
-    time_end:     10.  # End time in internal units.
-    dt_max:       1e-2
-    dt_min:       1e-6
-
-Whilst for a cosmological run, one would need:
-
-.. code:: YAML
-
-  TimeIntegration:
-    dt_max:            1e-4
-    dt_min:            1e-10
-    max_dt_RMS_factor: 0.25     # Default optional value
-
-Initial Conditions
-------------------
-
-This ``InitialConditions`` section of the parameter file contains all the options related to
-the initial conditions. The main two parameters are
-
-* The name of the initial conditions file: ``file_name``,
-* Whether the problem uses periodic boundary conditions or not: ``periodic``.
-
-The file path is relative to where the code is being executed. These
-parameters can be complemented by some optional values to drive some
-specific behaviour of the code.
-
-* Whether to generate gas particles from the DM particles: ``generate_gas_in_ics`` (default: ``0``),
-* Whether to activate an additional clean-up of the SPH smoothing lengths: ``cleanup_smoothing_lengths`` (default: ``0``)
-
-The procedure used to generate gas particles from the DM ones is
-outlined in the theory documents and is too long for a full
-description here.  The cleaning of the smoothing lengths is an
-expensive operation but can be necessary in the cases where the
-initial conditions are of poor quality and the values of the smoothing
-lengths are far from the values they should have.
-
-When starting from initial conditions created for Gadget, some
-additional flags can be used to convert the values from h-full to
-h-free and remove the additional :math:`\sqrt{a}` in the velocities:
-
-* Whether to re-scale all the fields to remove powers of h from the quantities: ``cleanup_h_factors`` (default: ``0``),
-* Whether to re-scale the velocities to remove the :math:`\sqrt{a}` assumed by Gadget : ``cleanup_velocity_factors`` (default: ``0``).
-
-The h-factors are self-consistently removed according to their units
-and this is applied to all the quantities irrespective of particle
-types. The correct power of ``h`` is always calculated for each
-quantity.
-
-Finally, SWIFT also offers these options:
-
-* A factor to re-scale all the smoothing-lengths by a fixed amount: ``smoothing_length_scaling`` (default: ``1.``),
-* A shift to apply to all the particles: ``shift`` (default: ``[0.0,0.0,0.0]``),
-* Whether to replicate the box along each axis: ``replicate`` (default: ``1``).
-
-The shift is expressed in internal units. The option to replicate the
-box is especially useful for weak-scaling tests. When set to an
-integer >1, the box size is multiplied by this integer along each axis
-and the particles are duplicated and shifted such as to create exact
-copies of the simulation volume.
-
-The full section to start a DM+hydro run from Gadget DM-only ICs would
-be:
-
-.. code:: YAML
-
-   InitialConditions:
-     file_name:  my_ics.hdf5
-     periodic:                    1
-     cleanup_h_factors:           1     
-     cleanup_velocity_factors:    1     
-     generate_gas_in_ics:         1     
-     cleanup_smoothing_lengths:   1  
-
-  
-Physical Constants
-------------------
-
-For some idealised test it can be useful to overwrite the value of
-some physical constants; in particular the value of the gravitational
-constant. SWIFT offers an optional parameter to overwrite the value of
-:math:`G_N`. 
-
-.. code:: YAML
-
-   PhysicalConstants:
-     G:   1
-
-Note that this set :math:`G` to the specified value in the internal system
-of units. Setting a value of `1` when using the system of units (10^10 Msun,
-Mpc, km/s) will mean that :math:`G_N=1` in these units [#f2]_ instead of the
-normal value :math:`G_N=43.00927`.
-
-This option is only used for specific tests and debugging. This entire
-section of the YAML file can typically be left out. More constants may
-be handled in the same way in future versions.
-
-Snapshots
----------
-
-Some additional specific options for the snapshot outputs are described in the
-following pages:
+This section desrcibes the options that are available in the
+parameter files.
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
+   :caption: Contents:
 
+   parameter_description
    output_selection
 
-Statistics
-----------
-
-Restarts
---------
-
-SWIFT can write check-pointing files and restart from them. The behaviour of
-this mechanism is driven by the options in the ``Restarts`` section of the YAML
-parameter file. All the parameters are optional but default to values that
-ensure a reasonable behaviour. 
-
-* Whether or not to enable the dump of restart files: ``enable`` (default:
-  ``1``).
-
-This parameter acts a master-switch for the check-pointing capabilities. All the
-other options require the ``enable`` parameter to be set to ``1``.
-  
-* Whether or not to save a copy of the previous set of check-pointing files:
-  ``save`` (default: ``1``),
-* Whether or not to dump a set of restart file on regular exit: ``onexit``
-  (default: ``0``),
-* The wall-clock time in hours between two sets of restart files:
-  ``delta_hours`` (default: ``6.0``).
-  
-Note that there is no buffer time added to the ``delta_hours`` value. If the
-system's batch queue run time limit is set to 6 hours, the user must specify a
-smaller value to allow for enough time to safely dump the check-point files.
-
-* The sub-directory in which to store the restart files: ``subdir`` (default:
-  ``restart``),
-* The basename of the restart files: ``basename`` (default: ``swift``)
-
-If the directory does not exist, SWIFT will create it.  When resuming a run,
-SWIFT, will look for files with the name provided in the sub-directory specified
-here. The files themselves are named ``basename_000001.rst`` where the basename
-is replaced by the user-specified name and the 6-digits number corresponds to
-the MPI-rank. SWIFT writes one file per MPI rank. If the ``save`` option has
-been activated, the previous set of restart files will be named
-``basename_000000.rst.prev``.
-
-SWIFT can also be stopped by creating an empty file called ``stop`` in the
-directory where the code runs. This will make SWIFT dump a fresh set of restart
-file (irrespective of the specified ``delta_time`` between dumps) and exit
-cleanly. One parameter governs this behaviour:
-
-* Number of steps between two checks for the presence of a ``stop`` file:
-  ``stop_steps`` (default: ``100``).
-
-The default value is chosen such that SWIFT does not need to poll the
-file-system to often, which can take a significant amount of time on distributed
-systems. For runs where the small time-steps take a much larger amount of time,
-a smaller value is recommended to allow for a finer control over when the code
-can be stopped.
-
-Finally, SWIFT can automatically stop after a specified amount of wall-clock
-time. The code can also run a command when exiting in this fashion, which can be
-used, for instance, to interact with the batch queue system:
-
-* Maximal wall-clock run time in hours: ``max_run_time`` (default: ``24.0``),
-* Whether or not to run a command on exit: ``resubmit_on_exit`` (default:
-  ``0``),
-* The command to run on exit: ``resubmit_command`` (default: ``./resub.sh``).
-
-Note that no check is performed on the validity of the command to run. SWIFT
-simply calls ``system()`` with the user-specified command.
-
-To run SWIFT, dumping check-pointing files every 6 hours and running for 24
-hours after which a shell command will be run, one would use:
-
-.. code:: YAML
-	  
-  Restarts:
-    enable:             1          
-    save:               1          # Keep copies
-    onexit:             0          
-    subdir:             restart    # Sub-directory of the directory where SWIFT is run
-    basename:           swift      
-    delta_hours:        6.0        
-    stop_steps:         100        
-    max_run_time:       24.0       # In hours 
-    resubmit_on_exit:   1          
-    resubmit_command:   ./resub.sh 
-
-
-
-Scheduler
----------
-
-Domain Decomposition
---------------------
-
-.. [#f1] The thorough reader (or overly keen SWIFT tester) would find  that the speed of light is :math:`c=1.8026\times10^{12}\,\rm{fur}\,\rm{ftn}^{-1}`, Newton's constant becomes :math:`G_N=4.896735\times10^{-4}~\rm{fur}^3\,\rm{fir}^{-1}\,\rm{ftn}^{-2}` and Planck's constant turns into :math:`h=4.851453\times 10^{-34}~\rm{fur}^2\,\rm{fir}\,\rm{ftn}^{-1}`.
-
-
-.. [#f2] which would translate into a constant :math:`G_N=1.5517771\times10^{-9}~cm^{3}\,g^{-1}\,s^{-2}` if expressed in the CGS system.
diff --git a/doc/RTD/source/ParameterFiles/output_selection.rst b/doc/RTD/source/ParameterFiles/output_selection.rst
index 90ab0f9a7c738c28832bc36de83c4034141d4b21..b84a776c7dcac2136dedd2324cfef43d7a5455ea 100644
--- a/doc/RTD/source/ParameterFiles/output_selection.rst
+++ b/doc/RTD/source/ParameterFiles/output_selection.rst
@@ -36,6 +36,10 @@ Example of file with redshift::
   10
   5
 
+If an output list is specified, the basic values for the first
+snapshot (``time_first``, ``scale_factor_first``) and difference
+(``delta_time``) are ignored.
+  
 .. _Output_selection_label:
 
 Output Selection
diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6304b60c5eb6df77d79e2ff50b9ba895d31a7889
--- /dev/null
+++ b/doc/RTD/source/ParameterFiles/parameter_description.rst
@@ -0,0 +1,634 @@
+.. Parameter Description
+   Matthieu Schaller, 21st October 2018
+
+.. _Parameters_basics:
+
+File format and basic information
+---------------------------------
+
+The parameter file uses a format similar to the `YAML format
+<https://en.wikipedia.org/wiki/YAML>`_ but reduced to only the
+elements required for the SWIFT parameters. Options are given by a
+name followed by a column and the value of the parameter:
+
+.. code:: YAML
+
+   ICs:        santa_barbara.hdf5	  
+   dt_max:     1.5
+   shift:      [2., 4., 5.]
+
+Comments can be inserted anywhere and start with a hash:
+
+.. code:: YAML
+
+   # Description of the physics
+   viscosity_alpha:     2.0
+   dt_max:              1.5     # seconds
+
+A typical SWIFT parameter file is split into multiple sections that
+may or may not be present depending on the different configuration
+options. The sections start with a label and can contain any number of
+parameters:
+
+.. code:: YAML
+
+   Cosmology:    # Planck13
+     Omega_m:        0.307
+     Omega_lambda:   0.693
+     Omega_b:        0.0455
+     h:              0.6777
+     a_begin:        0.0078125     # z = 127
+
+The options can be integer values, floating point numbers, characters
+or strings. If SWIFT expects a number and string is given, an error
+will be raised. The code can also read an array of values:
+
+.. code:: YAML
+
+   shift:  [2., 4., 5.]
+	  
+Some options in the parameter file are optional and
+when not provided, SWIFT will run with the default value. However, if
+a compulsory parameter is missing an error will be raised at
+start-up.
+
+Finally, SWIFT outputs two YAML files at the start of a run. The first one
+``used_parameters.yml`` contains all the parameters that were used for this run,
+**including all the optional parameters left unspecified with their default
+values**. This file can be used to start an exact copy of the run. The second
+file, ``unused_parameters.yml`` contains all the values that were not read from
+the parameter file. This can be used to simplify the parameter file or check
+that nothing important was ignored (for instance because the code is not
+configured to use some options).
+
+The rest of this page describes all the SWIFT parameters, split by
+section. A list of all the possible parameters is kept in the file
+``examples/parameter_examples.yml``.
+
+.. _Parameters_units:
+
+Internal Unit System
+--------------------
+
+The ``InternalUnitSystem`` section describes the units used internally by the
+code. This is the system of units in which all the equations are solved. All
+physical constants are converted to this system and if the ICs use a different
+system (see the snapshots' ref:`ICs_units_label` section of the documentation)
+the particle quantities will be converted when read in.
+
+The system of units is described using the value of the 5 basic units
+of any system with respect to the CGS system. Instead of using a unit
+of time we use a unit of velocity as this is more intuitive. Users
+hence need to provide:
+
+* a unit of length: ``UnitLength_in_cgs``,
+* a unit of mass: ``UnitMass_in_cgs``,
+* a unit of velocity ``UnitVelocity_in_cgs``,
+* a unit of electric current ``UnitCurrent_in_cgs``,
+* a unit of temperature ``UnitTemp_in_cgs``.
+
+All these need to be expressed with respect to their cgs counter-part
+(i.e. :math:`cm`, :math:`g`, :math:`cm/s`, :math:`A` and :math:`K`). Recall
+that there are no h-factors in any of SWIFT's quantities; we, for instance,
+use :math:`cm` and not :math:`cm/h`.
+
+For instance to use the commonly adopted system of 10^10 Msun as a
+unit for mass, mega-parsec as a unit of length and km/s as a unit of
+speed, we would use:
+
+.. code:: YAML
+
+   # Common unit system for cosmo sims
+   InternalUnitSystem:
+     UnitMass_in_cgs:     1.98848e43    # 10^10 M_sun in grams
+     UnitLength_in_cgs:   3.08567758e24 # 1 Mpc in centimeters
+     UnitVelocity_in_cgs: 1e5           # 1 km/s in centimeters per second
+     UnitCurrent_in_cgs:  1             # 1 Ampere
+     UnitTemp_in_cgs:     1             # 1 Kelvin   
+	  
+Note that there are currently no variables in any of the SWIFT physics
+schemes that make use of the unit of electric current. There is also
+no incentive to use anything else than Kelvin but that makes the whole
+system consistent with any possible unit system.
+
+If one is interested in using the more humorous `FFF unit
+system <https://en.wikipedia.org/wiki/FFF_system>`_ one would use
+
+.. code:: YAML
+
+   # FFF unit system
+   InternalUnitSystem:
+     UnitMass_in_cgs:     40823.3133  # 1 Firkin (fir) in grams
+     UnitLength_in_cgs:   20116.8     # 1 Furlong (fur) in cm
+     UnitVelocity_in_cgs: 0.01663095  # 1 Furlong (fur) per Fortnight (ftn) in cm/s
+     UnitCurrent_in_cgs:  1           # 1 Ampere
+     UnitTemp_in_cgs:     1           # 1 Kelvin   
+
+The value of the physical constants in this system is left as an
+exercise for the reader [#f1]_.
+
+.. _Parameters_cosmology:
+
+Cosmology
+---------
+
+When running a cosmological simulation, the section ``Cosmology`` sets the values of the
+cosmological model. The expanded :math:`\Lambda\rm{CDM}` parameters governing the
+background evolution of the Universe need to be specified here. These are:
+
+* The reduced Hubble constant: :math:`h`: ``h``,
+* The matter density parameter :math:`\Omega_m`: ``Omega_m``,
+* The cosmological constant density parameter :math:`\Omega_\Lambda`: ``Omega_lambda``,
+* The baryon density parameter :math:`\Omega_b`: ``Omega_b``,
+* The radiation density parameter :math:`\Omega_r`: ``Omega_r``.
+
+The last parameter can be omitted and will default to :math:`\Omega_r = 0`. Note
+that SWIFT will verify on start-up that the matter content of the initial conditions
+matches the cosmology specified in this section.
+
+This section also specifies the start and end of the simulation expressed in
+terms of scale-factors. The two parameters are:
+
+* Initial scale-factor: ``a_begin``,
+* Final scale-factor: ``a_end``.
+
+Two additional optional parameters can be used to change the equation of
+state of dark energy :math:`w(a)`. We use the evolution law :math:`w(a) =
+w_0 + w_a (1 - a)`. The two parameters in the YAML file are:
+
+* The :math:`z=0` dark energy equation of state parameter :math:`w_0`: ``w_0``
+* The dark energy equation of state evolution parameter :math:`w_a`: ``w_a``
+
+If unspecified these parameters default to the default
+:math:`\Lambda\rm{CDM}` values of :math:`w_0 = -1` and :math:`w_a = 0`.
+
+For a Planck+13 cosmological model (ignoring radiation density as is
+commonly done) and running from :math:`z=127` to :math:`z=0`, one would hence
+use the following parameters:
+
+.. code:: YAML
+
+   Cosmology:
+     a_begin:        0.0078125     # z = 127
+     a_end:          1.0           # z = 0
+     h:              0.6777        
+     Omega_m:        0.307         
+     Omega_lambda:   0.693         
+     Omega_b:        0.0455        
+     Omega_r:        0.            # (Optional)
+     w_0:            -1.0          # (Optional)
+     w_a:            0.            # (Optional)
+
+When running a non-cosmological simulation (i.e. without the ``-c`` run-time
+flag) this section of the YAML file is entirely ignored.
+
+.. _Parameters_gravity:
+
+Gravity
+-------
+
+The behaviour of the self-gravity solver can be modified by the parameters
+provided in the ``Gravity`` section. The theory document puts these parameters into the
+context of the equations being solved. We give a brief overview here.
+
+* The Plummer-equivalent co-moving softening length used for all particles :math:`\epsilon_{com}`: ``comoving_softening``,
+* The Plummer-equivalent maximal physical softening length used for all particles :math:`\epsilon_{max}`: ``comoving_softening``, 
+
+At any redshift :math:`z`, the Plummer-equivalent softening length used by the
+code will be :math:`\epsilon=\min(\epsilon_{max},
+\frac{\epsilon_{com}}{z+1})`. This is expressed in internal units.
+
+* The opening angle (multipole acceptance criterion) used in the FMM :math:`\theta`: ``theta``,
+* The time-step size pre-factor :math:`\eta`: ``eta``,
+  
+The time-step of a given particle is given by :math:`\Delta t =
+\eta\sqrt{\frac{\epsilon}{|\overrightarrow{a}|}}`, where
+:math:`\overrightarrow{a}` is the particle's acceleration. Power et al. (2003) recommend using :math:`\eta=0.025`.
+The last tree-related parameter is
+
+* The tree rebuild frequency: ``rebuild_frequency``.
+
+The tree rebuild frequency is an optional parameter defaulting to
+:math:`0.01`. It is used to trigger the re-construction of the tree every time a
+fraction of the particles have been integrated (kicked) forward in time.
+
+Simulations using periodic boundary conditions use additional parameters for the
+Particle-Mesh part of the calculation. The last three are optional:
+
+* The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``,
+* The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm
+  smooth}`: ``a_smooth`` (default: ``1.25``),
+* The scale above which the short-range forces are assumed to be 0 (in units of
+  the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm
+  cut,max}`: ``r_cut_max`` (default: ``4.5``),
+* The scale below which the short-range forces are assumed to be exactly Newtonian (in units of
+  the mesh cell-size multiplied by :math:`a_{\rm smooth}`) :math:`r_{\rm
+  cut,min}`: ``r_cut_min`` (default: ``0.1``),
+  
+For most runs, the default values can be used. Only the number of cells along
+each axis needs to be specified. The remaining three values are best described
+in the context of the full set of equations in the theory documents.
+  
+As a summary, here are the values used for the EAGLE :math:`100^3~{\rm Mpc}^3`
+simulation:
+
+.. code:: YAML
+	  
+   # Parameters for the self-gravity scheme for the EAGLE-100 box
+   Gravity:
+     eta:          0.025              
+     theta:        0.7                
+     comoving_softening:     0.0026994  # 0.7 proper kpc at z=2.8.
+     max_physical_softening: 0.0007     # 0.7 proper kpc
+     rebuild_frequency:      0.01       # Default optional value
+     mesh_side_length:       512       
+     a_smooth:     1.25                 # Default optional value
+     r_cut_max:    4.5                  # Default optional value
+     r_cut_min:    0.1                  # Default optional value
+
+
+.. _Parameters_SPH:
+     
+SPH
+---
+
+.. _Parameters_time_integration:
+
+Time Integration
+----------------
+
+The ``TimeIntegration`` section is used to set some general parameters related to time
+integration. In all cases, users have to provide a minimal and maximal time-step
+size:
+
+* Maximal time-step size: ``dt_max``
+* Minimal time-step size: ``dt_min``
+
+These quantities are expressed in internal units. All particles will have their
+time-step limited by the maximal value on top of all the other criteria that may
+apply to them (gravity acceleration, Courant condition, etc.). If a particle
+demands a time-step size smaller than the minimum, SWIFT will abort with an
+error message. This is a safe-guard against simulations that would never
+complete due to the number of steps to run being too large.
+
+When running a non-cosmological simulation, the user also has to provide the
+time of the start and the time of the end of the simulation:
+
+* Start time: ``time_begin``
+* End time: ``time_end``
+
+Both are expressed in internal units. The start time is typically set to ``0``
+but SWIFT can handle any value here. For cosmological runs, these values are
+ignored and the start- and end-points of the runs are specified by the start and
+end scale-factors in the cosmology section of the parameter file.
+
+Additionally, when running a cosmological volume, advanced users can specify the
+value of the dimensionless pre-factor entering the time-step condition linked
+with the motion of particles with respect to the background expansion and mesh
+size. See the theory document for the exact equations.
+
+* Dimensionless pre-factor of the maximal allowed displacement:
+  ``max_dt_RMS_factor`` (default: ``0.25``)
+
+This value rarely needs altering.
+
+A full time-step section for a non-cosmological run would be:
+
+.. code:: YAML
+
+  TimeIntegration:
+    time_begin:   0    # Start time in internal units.
+    time_end:     10.  # End time in internal units.
+    dt_max:       1e-2
+    dt_min:       1e-6
+
+Whilst for a cosmological run, one would need:
+
+.. code:: YAML
+
+  TimeIntegration:
+    dt_max:            1e-4
+    dt_min:            1e-10
+    max_dt_RMS_factor: 0.25     # Default optional value
+
+.. _Parameters_ICs:
+    
+Initial Conditions
+------------------
+
+The ``InitialConditions`` section of the parameter file contains all the options related to
+the initial conditions. The main two parameters are
+
+* The name of the initial conditions file: ``file_name``,
+* Whether the problem uses periodic boundary conditions or not: ``periodic``.
+
+The file path is relative to where the code is being executed. These
+parameters can be complemented by some optional values to drive some
+specific behaviour of the code.
+
+* Whether to generate gas particles from the DM particles: ``generate_gas_in_ics`` (default: ``0``),
+* Whether to activate an additional clean-up of the SPH smoothing lengths: ``cleanup_smoothing_lengths`` (default: ``0``)
+
+The procedure used to generate gas particles from the DM ones is
+outlined in the theory documents and is too long for a full
+description here.  The cleaning of the smoothing lengths is an
+expensive operation but can be necessary in the cases where the
+initial conditions are of poor quality and the values of the smoothing
+lengths are far from the values they should have.
+
+When starting from initial conditions created for Gadget, some
+additional flags can be used to convert the values from h-full to
+h-free and remove the additional :math:`\sqrt{a}` in the velocities:
+
+* Whether to re-scale all the fields to remove powers of h from the quantities: ``cleanup_h_factors`` (default: ``0``),
+* Whether to re-scale the velocities to remove the :math:`\sqrt{a}` assumed by Gadget : ``cleanup_velocity_factors`` (default: ``0``).
+
+The h-factors are self-consistently removed according to their units
+and this is applied to all the quantities irrespective of particle
+types. The correct power of ``h`` is always calculated for each
+quantity.
+
+Finally, SWIFT also offers these options:
+
+* A factor to re-scale all the smoothing-lengths by a fixed amount: ``smoothing_length_scaling`` (default: ``1.``),
+* A shift to apply to all the particles: ``shift`` (default: ``[0.0,0.0,0.0]``),
+* Whether to replicate the box along each axis: ``replicate`` (default: ``1``).
+
+The shift is expressed in internal units. The option to replicate the
+box is especially useful for weak-scaling tests. When set to an
+integer >1, the box size is multiplied by this integer along each axis
+and the particles are duplicated and shifted such as to create exact
+copies of the simulation volume.
+
+The full section to start a DM+hydro run from Gadget DM-only ICs would
+be:
+
+.. code:: YAML
+
+   InitialConditions:
+     file_name:  my_ics.hdf5
+     periodic:                    1
+     cleanup_h_factors:           1     
+     cleanup_velocity_factors:    1     
+     generate_gas_in_ics:         1     
+     cleanup_smoothing_lengths:   1  
+
+
+.. _Parameters_constants:
+     
+Physical Constants
+------------------
+
+For some idealised test it can be useful to overwrite the value of
+some physical constants; in particular the value of the gravitational
+constant. SWIFT offers an optional parameter to overwrite the value of
+:math:`G_N`. 
+
+.. code:: YAML
+
+   PhysicalConstants:
+     G:   1
+
+Note that this set :math:`G` to the specified value in the internal system
+of units. Setting a value of `1` when using the system of units (10^10 Msun,
+Mpc, km/s) will mean that :math:`G_N=1` in these units [#f2]_ instead of the
+normal value :math:`G_N=43.00927`.
+
+This option is only used for specific tests and debugging. This entire
+section of the YAML file can typically be left out. More constants may
+be handled in the same way in future versions.
+
+.. _Parameters_snapshots:
+
+Snapshots
+---------
+
+The ``Snapshots`` section of the parameter file contains all the options related to
+the dump of simulation outputs in the form of HDF5 :ref:`snapshots`. The main
+parameter is the base name that will be used for all the outputs in the run:
+
+* The base name of the HDF5 snapshots: ``basename``.
+
+This name will then be appended by an under-score and 4 digits followed by
+``.hdf5`` (e.g. ``base_name_1234.hdf5``). The 4 digits are used to label the
+different outputs, starting at ``0000``. In the default setup the digits simply
+increase by one for each snapshot. However, if the optional parameter
+``int_time_label_on`` is switched on, then we use 6 digits and these will the
+physical time of the simulation rounded to the nearest integer
+(e.g. ``base_name_001234.hdf5``) [#f3]_.
+
+The time of the first snapshot is controlled by the two following options:
+
+* Time of the first snapshot (non-cosmological runs): ``time_first``,
+* Scale-factor of the first snapshot (cosmological runs): ``scale_factor_first``.
+
+One of those two parameters has to be provided depending on the type of run. In
+the case of non-cosmological runs, the time of the first snapshot is expressed
+in the internal units of time. Users also have to provide the difference in time
+(or scale-factor) between consecutive outputs:
+
+* Time difference between consecutive outputs: ``delta_time``.
+
+In non-cosmological runs this is also expressed in internal units. For
+cosmological runs, this value is *multiplied* to obtain the
+scale-factor of the next snapshot. This implies that the outputs are
+equally space in :math:`\log(a)` (See :ref:`Output_list_label` to have
+snapshots not regularly spaced in time).
+
+When running the code with structure finding activated, it is often
+useful to have a structure catalog written at the same simulation time
+as the snapshots. To activate this, the following parameter can be
+switched on:
+
+* Run VELOCIraptor every time a snapshot is dumped: ``invoke_stf``
+  (default: ``0``).
+
+This produces catalogs using the options specified for the stand-alone
+VELOCIraptor outputs (see the section :ref:`Parameters_structure_finding`) but
+with a base name and output number that matches the snapshot name
+(e.g. ``stf_base_name_1234.hdf5``) irrespective of the name specified in the
+section dedicated to VELOCIraptor. Note that the invocation of VELOCIraptor at
+every dump is done additionally to the stand-alone dumps that can be specified
+in the corresponding section of the YAML parameter file.
+
+Users can optionally specify the level of compression used by the HDF5 library
+using the parameter:
+
+* GZIP compression level of the HDF5 arrays: ``compression`` (default: ``0``).
+
+The default level of ``0`` implies no compression and values have to be in the
+range :math:`[0-9]`. This integer is passed to the i/o library and used for the
+lossless GZIP compression algorithm. Higher values imply higher compression but
+also more time spent deflating and inflating the data. Note that up until HDF5
+1.10.x this option is not available when using the MPI-parallel version of the
+i/o routines.
+
+Finally, it is possible to specify a different system of units for the snapshots
+than the one that was used internally by SWIFT. The format is identical to the
+one described above (See the :ref:`Parameters_units` section) and read:
+
+* a unit of length: ``UnitLength_in_cgs`` (default: ``InternalUnitSystem:UnitLength_in_cgs``),
+* a unit of mass: ``UnitMass_in_cgs`` (default: ``InternalUnitSystem:UnitMass_in_cgs``),
+* a unit of velocity ``UnitVelocity_in_cgs`` (default: ``InternalUnitSystem:UnitVelocity_in_cgs``),
+* a unit of electric current ``UnitCurrent_in_cgs`` (default: ``InternalUnitSystem:UnitCurrent_in_cgs``),
+* a unit of temperature ``UnitTemp_in_cgs`` (default: ``InternalUnitSystem:UnitTemp_in_cgs``).
+
+When un-specified, these all take the same value as assumed by the internal
+system of units. These are rarely used but can offer a practical alternative to
+converting data in the post-processing of the simulations. 
+
+For a standard cosmological run with structure finding activated, the
+full section would be:
+
+.. code:: YAML
+
+   Snapshots:
+     basename:            output
+     scale_factor_first:  0.02    # z = 49
+     delta_time:          1.02
+     invoke_stf:          1
+	    
+Showing all the parameters for a basic hydro test-case, one would have:
+
+.. code:: YAML
+
+   Snapshots:
+     basename:            sedov
+     time_first:          0.01
+     delta_time:          0.005
+     invoke_stf:          0
+     int_time_label_on:   0
+     compression:         3
+     UnitLength_in_cgs:   1.  # Use cm in outputs
+     UnitMass_in_cgs:     1.  # Use grams in outpus
+     UnitVelocity_in_cgs: 1.  # Use cm/s in outputs
+     UnitCurrent_in_cgs:  1.  # Use Ampere in outputs
+     UnitTemp_in_cgs:     1.  # Use Kelvin in outputs
+
+Some additional specific options for the snapshot outputs are described in the
+following pages:
+
+* :ref:`Output_list_label` (to have snapshots not evenly spaced in time),
+* :ref:`Output_selection_label` (to select what particle fields to write).
+
+
+.. _Parameters_statistics:
+  
+Statistics
+----------
+
+Some additional specific options for the statistics outputs are described in the
+following page:
+
+* :ref:`Output_list_label` (to have statistics outputs not evenly spaced in time).
+
+.. _Parameters_restarts:
+  
+Restarts
+--------
+
+SWIFT can write check-pointing files and restart from them. The behaviour of
+this mechanism is driven by the options in the ``Restarts`` section of the YAML
+parameter file. All the parameters are optional but default to values that
+ensure a reasonable behaviour. 
+
+* Whether or not to enable the dump of restart files: ``enable`` (default:
+  ``1``).
+
+This parameter acts a master-switch for the check-pointing capabilities. All the
+other options require the ``enable`` parameter to be set to ``1``.
+  
+* Whether or not to save a copy of the previous set of check-pointing files:
+  ``save`` (default: ``1``),
+* Whether or not to dump a set of restart file on regular exit: ``onexit``
+  (default: ``0``),
+* The wall-clock time in hours between two sets of restart files:
+  ``delta_hours`` (default: ``6.0``).
+  
+Note that there is no buffer time added to the ``delta_hours`` value. If the
+system's batch queue run time limit is set to 6 hours, the user must specify a
+smaller value to allow for enough time to safely dump the check-point files.
+
+* The sub-directory in which to store the restart files: ``subdir`` (default:
+  ``restart``),
+* The basename of the restart files: ``basename`` (default: ``swift``)
+
+If the directory does not exist, SWIFT will create it.  When resuming a run,
+SWIFT, will look for files with the name provided in the sub-directory specified
+here. The files themselves are named ``basename_000001.rst`` where the basename
+is replaced by the user-specified name and the 6-digits number corresponds to
+the MPI-rank. SWIFT writes one file per MPI rank. If the ``save`` option has
+been activated, the previous set of restart files will be named
+``basename_000000.rst.prev``.
+
+SWIFT can also be stopped by creating an empty file called ``stop`` in the
+directory where the code runs. This will make SWIFT dump a fresh set of restart
+file (irrespective of the specified ``delta_time`` between dumps) and exit
+cleanly. One parameter governs this behaviour:
+
+* Number of steps between two checks for the presence of a ``stop`` file:
+  ``stop_steps`` (default: ``100``).
+
+The default value is chosen such that SWIFT does not need to poll the
+file-system to often, which can take a significant amount of time on distributed
+systems. For runs where the small time-steps take a much larger amount of time,
+a smaller value is recommended to allow for a finer control over when the code
+can be stopped.
+
+Finally, SWIFT can automatically stop after a specified amount of wall-clock
+time. The code can also run a command when exiting in this fashion, which can be
+used, for instance, to interact with the batch queue system:
+
+* Maximal wall-clock run time in hours: ``max_run_time`` (default: ``24.0``),
+* Whether or not to run a command on exit: ``resubmit_on_exit`` (default:
+  ``0``),
+* The command to run on exit: ``resubmit_command`` (default: ``./resub.sh``).
+
+Note that no check is performed on the validity of the command to run. SWIFT
+simply calls ``system()`` with the user-specified command.
+
+To run SWIFT, dumping check-pointing files every 6 hours and running for 24
+hours after which a shell command will be run, one would use:
+
+.. code:: YAML
+	  
+  Restarts:
+    enable:             1          
+    save:               1          # Keep copies
+    onexit:             0          
+    subdir:             restart    # Sub-directory of the directory where SWIFT is run
+    basename:           swift      
+    delta_hours:        6.0        
+    stop_steps:         100        
+    max_run_time:       24.0       # In hours 
+    resubmit_on_exit:   1          
+    resubmit_command:   ./resub.sh 
+
+.. _Parameters_scheduler:
+
+Scheduler
+---------
+
+.. _Parameters_domain_decomposition:
+
+Domain Decomposition
+--------------------
+
+.. _Parameters_structure_finding:
+
+Structure finding (VELOCIraptor)
+--------------------------------
+
+
+.. [#f1] The thorough reader (or overly keen SWIFT tester) would find  that the speed of light is :math:`c=1.8026\times10^{12}\,\rm{fur}\,\rm{ftn}^{-1}`, Newton's constant becomes :math:`G_N=4.896735\times10^{-4}~\rm{fur}^3\,\rm{fir}^{-1}\,\rm{ftn}^{-2}` and Planck's constant turns into :math:`h=4.851453\times 10^{-34}~\rm{fur}^2\,\rm{fir}\,\rm{ftn}^{-1}`.
+
+
+.. [#f2] which would translate into a constant :math:`G_N=1.5517771\times10^{-9}~cm^{3}\,g^{-1}\,s^{-2}` if expressed in the CGS system.
+
+.. [#f3] This feature only makes sense for non-cosmological runs for which the
+         internal time unit is such that when rounded to the nearest integer a
+	 sensible number is obtained. A use-case for this feature would be to
+	 compare runs over the same physical time but with different numbers of
+	 snapshots. Snapshots at a given time would always have the same set of
+	 digits irrespective of the number of snapshots produced before.
+	       
diff --git a/doc/RTD/source/Snapshots/index.rst b/doc/RTD/source/Snapshots/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..30cdc0e1281ae0420b44d88001992ccbbe588136
--- /dev/null
+++ b/doc/RTD/source/Snapshots/index.rst
@@ -0,0 +1,199 @@
+.. Snapshots
+   Matthieu Schaller, 5th January 2019
+
+.. _snapshots:
+
+Snapshots
+=========
+
+The snapshots are stored using the HDF5 format and are almost compatible with
+Gadget-2 (fully compatible outside of cosmological runs). They do, however,
+contain a large set of extensions including units, meta-data about the code and
+runs as well as facilities to quickly access the particles in a specific region
+of the simulation volume.
+
+Header
+------
+
+Meta-data about the code and run
+--------------------------------
+
+Several groups at the root of the files only contain attributes and are used to
+store some meta-data about the simulation and the code itself.
+
+Code
+~~~~
+
+The group ``/Code`` contains basic information about the version of the code
+that was used to run the simulation that dumped this snapshot. Versions of the
+libraries used to compile the code as well as information about the compiler and
+the flags used are stored. The most important element here are the git SHA and
+configuration parameters of the code. Alongside the compiler flags, policies and
+used parameters, these allow to reproduce exactly an older run.
+
+Cosmology
+~~~~~~~~~
+
+The group ``/Cosmology`` contains information about the cosmological model used
+for this simulation. The first important field is the attribute ``Cosmological
+run`` which is set to ``1`` for cosmological runs and to ``0`` otherwise. This
+allows users to quickly distinguish between these two main modes. Most values in
+this section only make sense for cosmological runs.
+
+All quantities are expressed in the internal system of units (note that this may
+differ from the units used in the particle arrays). Values like the look-back
+time are given for the redshift (or scale-factor) of this snapshot.
+
+Policy
+~~~~~~
+
+The group ``/Policy`` list the engine policies (defined in ``src/engine.h``)
+that were activated in the run that dumped this snapshot. The policies roughly
+translate to the main run-time parameters of SWIFT.
+
+GravityScheme
+~~~~~~~~~~~~~
+
+HydroScheme
+~~~~~~~~~~~
+
+StarsScheme
+~~~~~~~~~~~
+
+SubgridScheme
+~~~~~~~~~~~~~
+
+Unit systems
+------------
+
+The snapshots contain *two* groups at the root containing information about the
+unit systems used in the snapshots.
+
+The main one ``Units`` contains the units used in the snapshot. In a similar
+fashion to what is done for the parameter files (see :ref:`Parameters_units`),
+SWIFT specifies only the basic units. These are the units of mass (``U_M``),
+length (``U_L``), time (``U_t``), electric current (``U_I``) and temperature
+(``U_T``). These are specified in units of their CGS equivalents (gram,
+centimeter, second, Ampere, Kelvin). All the quantities present in the particle
+arrays are expressed in this system of units. For each quantity, SWIFT gives the
+conversion factor in terms of these units. For instance, the internal energy per
+unit mass would be expressed as ``U_L^2 U_t^-2``, which in the CGS unit system
+translates to :math:`cm/s^2 = erg/g`.
+
+The second group ``InternalCodeUnits`` contains the unit system that was used
+internally by the code when running the simulation. This is in most cases the
+same system as given in ``Units`` but since users can specify a different
+system for the snapshots, there might be cases where they differ. As this system
+only relates to what was used inside the code and not in the snapshots
+themselves, this group is mostly here to report on the code's run-time behaviour
+and is used to express all the quantities in the meta-data (e.g. in the
+cosmology group or the softening lengths in the gravity group).
+
+Used and unused run-time parameters
+-----------------------------------
+
+The groups ``/Parameters`` and ``UnusedParameters`` located at the root of the file
+contain the list of all the run-time parameters used by the run with their
+values and the list of parameters that were in the YAML but were not read. The
+content of these two groups is identical to the ``used_parameters.yml`` and
+``unused_parameters.yml`` files produced by SWIFT when starting a run (See
+the :ref:`Parameters_basics` section of the documentation).
+
+Structure of the particle arrays
+--------------------------------
+
+There are several groups that contain 'auxiliary' information, such as
+``Header``.  Particle data is placed in separate groups depending of the type of
+the particles. The type use the naming convention of Gadget-2 (with
+the OWLS and EAGLE extensions).
+
++---------------------+------------------------+----------------------------+
+| HDF5 Group Name     | Physical Particle Type | In code ``enum part_type`` |
++=====================+========================+============================+
+| ``/PartType0/``     | Gas                    | ``swift_type_gas``         |
++---------------------+------------------------+----------------------------+
+| ``/PartType1/``     | Dark Matter            | ``swift_type_dark_matter`` |
++---------------------+------------------------+----------------------------+
+| ``/PartType4/``     | Stars                  | ``swift_type_star``        |
++---------------------+------------------------+----------------------------+
+| ``/PartType5/``     | Black Holes            | ``swift_type_black_hole``  |
++---------------------+------------------------+----------------------------+
+
+The last column in the table gives the ``enum`` value from ``part_type.h``
+corresponding to a given entry in the files.
+
+Quick access to particles via hash-tables
+-----------------------------------------
+
+The particles are not sorted in a specific order when they are written to the
+snapshots. However, the particles are sorted into the top-level cell structure
+used internally by the code every time a tree rebuild is triggered. The
+top-level cells are a coarse-grained mesh but knowing which particle belongs to
+which cell can nevertheless be useful to rapidly access particles in a given
+region only.
+
+One important caveat is that particles are free to drift out of their cells
+between rebuilds of the tree (but not by more than one cell-length). If one
+wants to have all the particles in a given cell, one has to read all the
+neighbouring cells as well. We note that for image making purposes, for instance
+to generate a slice, this is typically not necessary and reading just the cells
+of interest is sufficient.
+
+At the root of the HDF5 file, the ``Cells`` group contains all the relevant
+information. The dimension of the top-level grid (a triplet of integers) is
+given by the attribute ``Cells/Meta-data/dimension`` and the size of each cell (a
+triplet of floating-point numbers) is given by the attribute
+``Cells/Meta-data/size``. All the cells have the same size but for non-cubic
+simulation volumes the cells themselves can have different sizes along each
+axis.
+
+The ``/Cells/Centres`` array gives the centre of each of the top-level cells in the
+simulation volume. Both the cell sizes and positions of the centres are
+expressed in the unit system used for the snapshots (see above) and are hence
+consistent with the particle positions themselves.
+
+Once the cell(s) containing the region of interest has been located, users can
+use the ``/Cells/Offsets/PartTypeN/Counts`` and
+``/Cells/Offsets/PartTypeN/Offsets`` to retrieve the location of the particles
+of type ``N`` in the ``/PartTypeN`` arrays. For instance, if one is interested
+in retriving all the densities of the gas particles in the cell around the
+position `[1, 1, 1]` one could use a piece of code similar to:
+
+.. code-block:: python
+   :linenos:
+
+   import numpy as np
+   import h5py
+
+   snapshot_file = h5py.File("snapshot.hdf5", "r")
+
+   my_pos = [1, 1, 1]
+
+   # Read in the cell centres and size
+   nr_cells = f["/Cells/Meta-data"].attrs["nr_cells"]
+   centres = f["/Cells/Centres"][:,:]
+   size = f["/Cells/Meta-data"].attrs["size"]
+   half_size = size / 2.
+
+   # Look for the cell containing the position of interest
+   my_cell = -1
+   for i in range(nr_cells):
+      if my_pos[0] > centres[i, 0] - half_size[0] and my_pos[0] < centres[i, 0] + half_size[0] and
+         my_pos[1] > centres[i, 1] - half_size[1] and my_pos[1] < centres[i, 1] + half_size[1] and
+         my_pos[2] > centres[i, 2] - half_size[2] and my_pos[2] < centres[i, 2] + half_size[2]:
+	 my_cell = i
+	 break
+   
+   # Print the position of the centre of the cell of interest
+   centre = snapshot_file["/Cells/Centres"][my_cell, :]
+   print("Centre of the cell:", centre)
+
+   # Retrieve the offset and counts
+   my_offset = snapshot_file["/Cells/Offsets/PartType0"][my_cell]
+   my_count = snapshot_file["/Cells/Counts/PartType0"][my_cell]
+
+   # Get the densities of the particles in this cell
+   rho = snapshot_file["/PartType0/Density"][my_offset:my_offset + my_count]
+
+For large simulations, this vastly reduces the amount of data that needs to be read
+from the disk.
diff --git a/doc/RTD/source/SubgridModels/EAGLE/index.rst b/doc/RTD/source/SubgridModels/EAGLE/index.rst
index 6388f7d3d42859d8659d2bb13f9dfe5181927807..639d98cd1a994f6f30dfc2430c90294d7486fce0 100644
--- a/doc/RTD/source/SubgridModels/EAGLE/index.rst
+++ b/doc/RTD/source/SubgridModels/EAGLE/index.rst
@@ -9,19 +9,21 @@ This section of the documentation gives a brief description of the
 different components of the EAGLE sub-grid model. We mostly focus on
 the parameters and values output in the snapshots.
 
+.. _EAGLE_chemical_tracers:
+
 Chemical tracers
 ~~~~~~~~~~~~~~~~
 
-The gas particles in the EAGLE model carry metal abundance information
-in the form of metal mass fractions. We follow the following 9
-elements: `H`, `He`, `C`, `N`, `O`, `Ne`, `Mg`, `Si` and `Fe`. We
-additionally follow the total metal mass fraction (i.e. absolute
-metallicity) `Z`. This is typically larger than the sum of the 7
+The gas particles in the EAGLE model carry metal abundance information in the
+form of metal mass fractions. We follow explicitly 9 of the 11 elements that
+`Wiersma et al. (2009)b <http://adsabs.harvard.edu/abs/2009MNRAS.399..574W>`_
+traced in their chemical enrichment model. These are: `H`, `He`, `C`, `N`, `O`,
+`Ne`, `Mg`, `Si` and `Fe` [#f1]_. We additionally follow the total metal mass fraction
+(i.e. absolute metallicity) `Z`. This is typically larger than the sum of the 7
 metals that are individually traced since this will also contain the
-contribution of all the elements that are not individually followed.
-We note that all of definitions are independent of any definition of
-solar the solar metallicity :math:`Z_\odot` or of any solar abundance
-pattern.
+contribution of all the elements that are not individually followed.  We note
+that all of definitions are independent of any definition of solar the solar
+metallicity :math:`Z_\odot` or of any solar abundance pattern.
 
 As part of the diagnostics, we additionally trace the elements coming
 from the different stellar evolution channels. We store for each
@@ -38,12 +40,12 @@ We finally also compute the smoothed versions of the individual
 element mass fractions, of the total metal mass fractions, and of the
 iron gas fraction from SNIa.
 
-The chemistry module in ``src/chemistry/EAGLE`` includes all the arrays
+The chemistry module in ``src/chemistry/EAGLE/`` includes all the arrays
 that are added to the particles and the functions used to compute the
 smoothed elements.
 
-When a star is formed (see below), it inherits all the chemical
-tracers of its parent gas particle.
+When a star is formed (see the section :ref:`EAGLE_star_formation` below), it
+inherits all the chemical tracers of its parent gas particle.
 
 In the snapshots, we output for each gas and star particle:
 
@@ -100,7 +102,8 @@ In the snapshots, we output for each gas and star particle:
 
 The stars will lose mass over their lifetime (up to ~45%). The fractions will
 remain unchanged but if one is interested in computing an absolute metal mass
-(say) for a star, the ``InitialMass`` (see below) of the star must be used.
+(say) for a star, the ``InitialMass`` (see the section
+:ref:`EAGLE_star_formation` below) of the star must be used.
 
 The chemistry model only requires a small number of parameters to be specified
 in the `EAGLEChemistry` section of the YAML file. These are the initial values
@@ -141,12 +144,13 @@ Whilst one would use the following values for solar abundances
      init_abundance_Iron:         1.1032152e-3 # Mass fraction in Iron
 
 
+.. _EAGLE_cooling:
      
 Gas cooling: Wiersma+2009a
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The gas cooling is based on the redshift-dependent tables of `Wiersma et
-al. (2009) <http://adsabs.harvard.edu/abs/2009MNRAS.393...99W>`_ that include
+al. (2009)a <http://adsabs.harvard.edu/abs/2009MNRAS.393...99W>`_ that include
 element-by-element cooling rates for the 11 elements (`H`, `He`, `C`, `N`, `O`,
 `Ne`, `Mg`, `Si`, `S`, `Ca` and `Fe`) that dominate the total rates. The tables
 assume that the gas is in ionization equilibrium with the cosmic microwave
@@ -157,7 +161,8 @@ ignores *local* sources of ionization, self-shielding and non-equilibrium
 cooling/heating. The tables can be obtained from this `link
 <http://virgodb.cosma.dur.ac.uk/swift-webstorage/CoolingTables/EAGLE/coolingtables.tar.gz>`_
 which is a re-packaged version of the `original tables
-<http://www.strw.leidenuniv.nl/WSS08/>`_
+<http://www.strw.leidenuniv.nl/WSS08/>`_. The code reading and interpolating the
+table is located in the directory ``src/cooling/EAGLE/``.
 
 The Wiersma tables containing the cooling rates as a function of redshift,
 Hydrogen number density, Helium fraction (:math:`X_{He} / (X_{He} + X_{H})`) and
@@ -197,6 +202,27 @@ We note that the EAGLE cooling model does not impose any restriction on the
 particles' individual time-steps. The cooling takes place over the time span
 given by the other conditions (e.g the Courant condition).
 
+Finelly, the cooling module also provides a function to compute the temperature
+of a given gas particle based on its density, internal energy, abundances and
+the current redshift. This temperature is the one used to compute the cooling
+rate from the tables and similarly to the cooling rates, they assume that the
+gas is in collisional equilibrium with the background radiation. The
+temperatures are, in particular, computed every time a snapshot is written and
+they are listed for every gas particle:
+
++---------------------+-------------------------------------+-----------+-------------------------------------+
+| Name                | Description                         | Units     | Comments                            |
++=====================+=====================================+===========+=====================================+
+| ``Temperature``     | | Temperature of the gas as         | [U_T]     | | The calculation is performed      |
+|                     | | computed from the tables.         |           | | using quantities at the last      |
+|                     |                                     |           | | time-step the particle was active |
++---------------------+-------------------------------------+-----------+-------------------------------------+
+
+Note that if one is running without cooling switched on at runtime, the
+temperatures can be computed by passing the ``--temparature`` runtime flag (see
+:ref:`cmdline-options`). Note that the tables then have to be available as in
+the case with cooling switched on.
+
 The cooling model is driven by a small number of parameter files in the
 `EAGLECooling` section of the YAML file. These are the re-ionization parameters,
 the path to the tables and optionally the modified abundances of `Ca` and `S` as
@@ -221,25 +247,67 @@ And the optional parameters are:
      S_over_Si_in_solar:        1.0 # (Optional) Value of the Sulphur mass abundance ratio to solar in units of the Silicon ratio to solar. Default value: 1.
      newton_integration:        0   # (Optional) Set to 1 to use the Newton-Raphson scheme for the explicit cooling problem.
 
-
-
+.. _EAGLE_tracers:
+     
 Particle tracers
 ~~~~~~~~~~~~~~~~
 
+Over the course of the simulation, the gas particles record some information
+about their evolution. These are updated for a given particle every time it is
+active. The EAGLE tracers module is located in the directory
+``src/tracers/EAGLE/``. 
+
+In the EAGLE model, we trace the maximal tempearature a particle has reached and
+the time at which this happened. When a star is formed (see the section
+:ref:`EAGLE_star_formation` below), it inherits all the tracer values of its parent
+gas particle.  There are no parameters to the model but two values are added to
+the snapshots for each gas and star particle:
+
++----------------------------------------+---------------------------------------+-----------+-----------------------------+
+| Name                                   | Description                           | Units     | Comments                    |
++========================================+=======================================+===========+=============================+
+| | ``Maximal Temperature``              | | Mximal temperature reached by       | | [U_T]   |                             |
+|                                        | | this particle.                      |           |                             |
++----------------------------------------+---------------------------------------+-----------+-----------------------------+
+| | ``Maximal Temperature scale-factor`` | | Scale-factor (cosmological runs)    | | [-]     |                             |
+| | OR                                   | | or time (non-cosmological runs) at  | | OR      |                             |
+| | ``Maximal Temperature time``         | | which the maximum value was reached.| | [U_t]   |                             |
++----------------------------------------+---------------------------------------+-----------+-----------------------------+
+
+
+.. _EAGLE_star_formation:
+
 Star formation: Schaye+2008
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _EAGLE_enrichment:
+
 Stellar enrichment: Wiersma+2009b
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _EAGLE_feedback:
+
 Supernova feedback: Dalla Vecchia+2012
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _EAGLE_black_hole_seeding:
+
 Black-hole creation
 ~~~~~~~~~~~~~~~~~~~
 
+.. _EAGLE_black_hole_accretion:
+
 Black-hole accretion
 ~~~~~~~~~~~~~~~~~~~~
 
+.. _EAGLE_black_hole_feedback:
+
 AGN feedback
 ~~~~~~~~~~~~
+
+.. [#f1] `Wiersma et al. (2009)b
+	 <http://adsabs.harvard.edu/abs/2009MNRAS.399..574W>`_ originally also
+	 followed explicitly `Ca` and and `S`. They are omitted in the EAGLE
+	 model but, when needed, their abundance with respect to solar is
+	 assumed to be the same as the abundance of `Si` with respect to solar
+	 (See the section :ref:`EAGLE_cooling`)
diff --git a/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst b/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst
index a663c37f93a6cede8c4528583c44183059414432..ed261b76abbcefaf5643a69069bb4b8ea1a0894c 100644
--- a/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst
+++ b/doc/RTD/source/VELOCIraptorInterface/stfwithswift.rst
@@ -50,8 +50,10 @@ HDF5 library, not a parallel build.
 Compiling SWIFT
 ---------------
 The next part is compiling SWIFT with VELOCIraptor and assumes you already
-downloaded SWIFT from the GitLab_, this can be done by running::
+downloaded SWIFT from the GitLab_, this can be done by running
 
+.. code:: bash
+  
   ./autogen.sh 
   ./configure --with-velociraptor=/path/to/VELOCIraptor-STF/src 
   make 
@@ -60,16 +62,16 @@ In which ``./autogen.sh`` only needs to be run once after the code is cloned
 from the GitLab_, and ``/path/to/`` is the path to the ``VELOCIraptor-STF``
 directory on your machine. In general ``./configure`` can be run with other
 options as desired. After this we can run SWIFT with VELOCIraptor, but for this
-we first need to add several lines to the yaml file of our simulation::
+we first need to add several lines to the yaml file of our simulation
 
     
-  #structure finding options
-  StructureFinding:
-  config_file_name:     stf_input_6dfof_dmonly_sub.cfg
-  basename:             ./stf
-  output_time_format:   1
-  scale_factor_first:   0.02
-  delta_time:           1.02
+.. code:: YAML
+
+   StructureFinding:      
+     config_file_name:     stf_input_6dfof_dmonly_sub.cfg
+     basename:             ./stf
+     scale_factor_first:   0.02
+     delta_time:           1.02
 
 In which we specify the ``.cfg`` file that is used by VELOCIraptor and the 
 other parameters which SWIFT needs to use. In the case of 
diff --git a/doc/RTD/source/conf.py b/doc/RTD/source/conf.py
index 46cff147efff3e7f23ff3f618898a17da3f85459..2249faa2851846c28e743400b2c826bfa6780c0a 100644
--- a/doc/RTD/source/conf.py
+++ b/doc/RTD/source/conf.py
@@ -87,7 +87,7 @@ html_theme = 'sphinx_rtd_theme'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['.static']
+# html_static_path = ['.static']
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
diff --git a/doc/RTD/source/index.rst b/doc/RTD/source/index.rst
index b9370c3f24b2ffb3c5174f2fe99fb9ec610e18f6..e04efe8c889fb8a005c88f691f1e01a387f19ebb 100644
--- a/doc/RTD/source/index.rst
+++ b/doc/RTD/source/index.rst
@@ -18,6 +18,7 @@ difference is the parameter file that will need to be adapted for SWIFT.
    CommandLineOptions/index
    ParameterFiles/index
    InitialConditions/index
+   Snapshots/index
    HydroSchemes/index
    SubgridModels/index
    EquationOfState/index
diff --git a/examples/DwarfGalaxy/dwarf_galaxy.yml b/examples/DwarfGalaxy/dwarf_galaxy.yml
index 0d815a99c42249bcbbdaf21dbaa34a55f61731aa..4c5e2a82b017725929138de011b1f3ed1fe9f1ef 100644
--- a/examples/DwarfGalaxy/dwarf_galaxy.yml
+++ b/examples/DwarfGalaxy/dwarf_galaxy.yml
@@ -10,10 +10,8 @@ InternalUnitSystem:
 StructureFinding:
   config_file_name:     stf_input.cfg # Name of the STF config file.
   basename:             ./stf         # Common part of the name of output files.
-  output_time_format:   0             # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
   scale_factor_first:   0.92          # Scale-factor of the first snaphot (cosmological run)
   time_first:           0.01        # Time of the first structure finding output (in internal units).
-  delta_step:           1000          # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
   delta_time:           1.10          # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
 
 # Cosmological parameters
diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml
index 0aec970db486a164696b23fdc1e281fbe4853486..cab0dbcd5efc0528ddc65a6dde1e5c2d7cb6b9a9 100644
--- a/examples/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_25/eagle_25.yml
@@ -10,10 +10,8 @@ InternalUnitSystem:
 StructureFinding:
   config_file_name:     stf_input.cfg    # Name of the STF config file.
   basename:             ./stf         # Common part of the name of output files.
-  output_time_format:   0             # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
   scale_factor_first:   0.92          # Scale-factor of the first snaphot (cosmological run)
   time_first:           0.01        # Time of the first structure finding output (in internal units).
-  delta_step:           1000          # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
   delta_time:           1.10          # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
 
 # Cosmological parameters
diff --git a/examples/EAGLE_25/run.sh b/examples/EAGLE_25/run.sh
index af1218f70729663d8efe337c312f6ef2fe8d6620..5961cf01a3a011ee26f0b411e619dd7207d5db47 100755
--- a/examples/EAGLE_25/run.sh
+++ b/examples/EAGLE_25/run.sh
@@ -7,5 +7,5 @@ then
     ./getIC.sh
 fi
 
-../swift --cosmology --hydro --self-gravity --stars--threads=16 eagle_25.yml 2>&1 | tee output.log
+../swift --cosmology --hydro --self-gravity --stars --threads=16 eagle_25.yml 2>&1 | tee output.log
 
diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml
index 7c64c1cdedb6c8e9714471f4bad9611f548d05fa..e80fac8167a832c17cd10e1d2ae7cd854f314d17 100644
--- a/examples/EAGLE_6/eagle_6.yml
+++ b/examples/EAGLE_6/eagle_6.yml
@@ -10,10 +10,8 @@ InternalUnitSystem:
 StructureFinding:
   config_file_name:     stf_input.cfg # Name of the STF config file.
   basename:             ./stf         # Common part of the name of output files.
-  output_time_format:   0             # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
   scale_factor_first:   0.92          # Scale-factor of the first snaphot (cosmological run)
   time_first:           0.01        # Time of the first structure finding output (in internal units).
-  delta_step:           1000          # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
   delta_time:           1.10          # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
 
 # Cosmological parameters
diff --git a/examples/SedovBlast_1D/run.sh b/examples/SedovBlast_1D/run.sh
index ba479214961c5957a2b19d6aa118e0f0e7ee0f63..e5674dc15e8fac1b36f43da07b829720c0ecd5f1 100755
--- a/examples/SedovBlast_1D/run.sh
+++ b/examples/SedovBlast_1D/run.sh
@@ -8,7 +8,7 @@ then
 fi
 
 # Run SWIFT
-../swift --hydro --threads=1 sedov.yml 2>&1 | tee output.log
+../swift --hydro --limiter --threads=1 sedov.yml 2>&1 | tee output.log
 
 # Plot the solution
 python plotSolution.py 5
diff --git a/examples/SedovBlast_1D/sedov.yml b/examples/SedovBlast_1D/sedov.yml
index b4912a95e797440dc6eb0c9f48806a5954adbc41..b4252581d6eb3b2932a074e7545b2d308be51865 100644
--- a/examples/SedovBlast_1D/sedov.yml
+++ b/examples/SedovBlast_1D/sedov.yml
@@ -11,7 +11,7 @@ TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
   time_end:   5e-2  # The end time of the simulation (in internal units).
   dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-5  # The maximal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
 
 # Parameters governing the snapshots
 Snapshots:
@@ -21,7 +21,7 @@ Snapshots:
 
 # Parameters governing the conserved quantities statistics
 Statistics:
-  delta_time:          1e-5 # Time between statistics output
+  delta_time:          1e-3 # Time between statistics output
 
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/SedovBlast_2D/run.sh b/examples/SedovBlast_2D/run.sh
index b481d4555241c17015452a2139c04c541ccf1cdc..e2136f8f5e6ee9bde61d5189ed7955d53a3a9a6e 100755
--- a/examples/SedovBlast_2D/run.sh
+++ b/examples/SedovBlast_2D/run.sh
@@ -13,7 +13,7 @@ then
 fi
 
 # Run SWIFT
-../swift --hydro --threads=1 sedov.yml 2>&1 | tee output.log
+../swift --hydro --limiter --threads=1 sedov.yml 2>&1 | tee output.log
 
 # Plot the solution
 python plotSolution.py 5
diff --git a/examples/SedovBlast_2D/sedov.yml b/examples/SedovBlast_2D/sedov.yml
index 84177ece31ef98ec55c41513276c9c0158e69bcf..b4252581d6eb3b2932a074e7545b2d308be51865 100644
--- a/examples/SedovBlast_2D/sedov.yml
+++ b/examples/SedovBlast_2D/sedov.yml
@@ -11,7 +11,7 @@ TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
   time_end:   5e-2  # The end time of the simulation (in internal units).
   dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
 
 # Parameters governing the snapshots
 Snapshots:
diff --git a/examples/SedovBlast_3D/run.sh b/examples/SedovBlast_3D/run.sh
index 88aec36a7b96b5fd2a7fde41f0e0c9dc7185f1e8..7f0788cc822f1a6427fb6dbee4a921f79c942808 100755
--- a/examples/SedovBlast_3D/run.sh
+++ b/examples/SedovBlast_3D/run.sh
@@ -13,7 +13,7 @@ then
 fi
 
 # Run SWIFT
-../swift --hydro --threads=4 sedov.yml 2>&1 | tee output.log
+../swift --hydro --limiter --threads=4 sedov.yml 2>&1 | tee output.log
 
 # Plot the solution
 python plotSolution.py 5
diff --git a/examples/SedovBlast_3D/sedov.yml b/examples/SedovBlast_3D/sedov.yml
index 6cf5b02427b8004787b646e6bcdd4bacaa25bc06..19e8c72538a748304ca4da076458c9ae27dc8f46 100644
--- a/examples/SedovBlast_3D/sedov.yml
+++ b/examples/SedovBlast_3D/sedov.yml
@@ -11,7 +11,7 @@ TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
   time_end:   5e-2  # The end time of the simulation (in internal units).
   dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
 
 # Parameters governing the snapshots
 Snapshots:
diff --git a/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml b/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml
index 910137edc442c994a9f31a8c62e16818ca4ae97d..ebe3a78ee0d03eb53752b1dfa8fa749931a754a9 100644
--- a/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml
+++ b/examples/SmallCosmoVolume_DM/small_cosmo_volume_dm.yml
@@ -10,7 +10,6 @@ InternalUnitSystem:
 StructureFinding:
   config_file_name:     stf_input_6dfof_dmonly_sub.cfg
   basename:             ./stf
-  output_time_format:   1
   scale_factor_first:   0.02
   delta_time:           1.02
 
diff --git a/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml b/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml
index c8157a7a0e0065b1f58667fb8437b9e3883eda75..d6b9a78fe3c2a891492affbdea9787d62916d3ed 100644
--- a/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml
+++ b/examples/SmallCosmoVolume_VELOCIraptor/small_cosmo_volume.yml
@@ -37,8 +37,9 @@ SPH:
 # Parameters governing the snapshots
 Snapshots:
   basename:            snap
-  delta_time:          1.02
+  delta_time:          1.05
   scale_factor_first:  0.02
+  invoke_stf:          1
   
 # Parameters governing the conserved quantities statistics
 Statistics:
@@ -52,16 +53,16 @@ Scheduler:
 # Parameters related to the initial conditions
 InitialConditions:
   file_name:  small_cosmo_volume.hdf5
+  periodic:                    1
   cleanup_h_factors:           1    
   cleanup_velocity_factors:    1  
-  generate_gas_in_ics: 1            # Generate gas particles from the DM-only ICs
-  cleanup_smoothing_lengths: 1      # Since we generate gas, make use of the (expensive) cleaning-up procedure.
+  generate_gas_in_ics:         1     # Generate gas particles from the DM-only ICs
+  cleanup_smoothing_lengths:   1     # Since we generate gas, make use of the (expensive) cleaning-up procedure.
 
 # Structure finding options (requires velociraptor)
 StructureFinding:
   config_file_name:     stfconfig_input.cfg
   basename:             ./stf
-  output_time_format:   1
   scale_factor_first:   0.02
   delta_time:           1.02
  
diff --git a/examples/main.c b/examples/main.c
index eaa94cfc699000234bd0010c32181cf9bace5651..3f558fe240b5efec5f6797837a415b8bc5b762ef 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -155,6 +155,7 @@ int main(int argc, char *argv[]) {
   int with_stars = 0;
   int with_star_formation = 0;
   int with_feedback = 0;
+  int with_limiter = 0;
   int with_fp_exceptions = 0;
   int with_drift_all = 0;
   int with_mpole_reconstruction = 0;
@@ -204,6 +205,8 @@ int main(int argc, char *argv[]) {
       OPT_BOOLEAN('S', "stars", &with_stars, "Run with stars.", NULL, 0, 0),
       OPT_BOOLEAN('x', "velociraptor", &with_structure_finding,
                   "Run with structure finding.", NULL, 0, 0),
+      OPT_BOOLEAN(0, "limiter", &with_limiter, "Run with time-step limiter.",
+                  NULL, 0, 0),
 
       OPT_GROUP("  Control options:\n"),
       OPT_BOOLEAN('a', "pin", &with_aff,
@@ -458,11 +461,7 @@ int main(int argc, char *argv[]) {
   if (with_feedback) error("Can't run with feedback over MPI (yet).");
   if (with_star_formation)
     error("Can't run with star formation over MPI (yet)");
-#endif
-
-#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR)
-  if (with_structure_finding && nr_nodes > 1)
-    error("VEOCIraptor not yet enabled over MPI.");
+  if (with_limiter) error("Can't run with time-step limiter over MPI (yet)");
 #endif
 
     /* Temporary early aborts for modes not supported with hand-vec. */
@@ -910,6 +909,7 @@ int main(int argc, char *argv[]) {
       engine_policies |= engine_policy_external_gravity;
     if (with_cosmology) engine_policies |= engine_policy_cosmology;
     if (with_temperature) engine_policies |= engine_policy_temperature;
+    if (with_limiter) engine_policies |= engine_policy_limiter;
     if (with_cooling) engine_policies |= engine_policy_cooling;
     if (with_stars) engine_policies |= engine_policy_stars;
     if (with_star_formation) engine_policies |= engine_policy_star_formation;
@@ -934,6 +934,10 @@ int main(int argc, char *argv[]) {
       fflush(stdout);
     }
 
+#ifdef HAVE_VELOCIRAPTOR
+    if (with_structure_finding) velociraptor_init(&e);
+#endif
+
     /* Get some info to the user. */
     if (myrank == 0) {
       long long N_DM = N_total[1] - N_total[2] - N_total[0];
@@ -1216,14 +1220,6 @@ int main(int argc, char *argv[]) {
 #endif
     // write a final snapshot with logger, in order to facilitate a restart
     engine_dump_snapshot(&e);
-
-#ifdef HAVE_VELOCIRAPTOR
-    /* Call VELOCIraptor at the end of the run to find groups. */
-    if (e.policy & engine_policy_structure_finding) {
-      velociraptor_init(&e);
-      velociraptor_invoke(&e);
-    }
-#endif
   }
 
 #ifdef WITH_MPI
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index c1a4c867c77b5d770dc1349c4218e5b7c2e10a9b..67b5d051e0c837764a13d7bc45a7ab25f528a96b 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -27,8 +27,11 @@ SPH:
   resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
   h_tolerance:           1e-4     # (Optional) Relative accuracy of the Netwon-Raphson scheme for the smoothing lengths.
+  h_max:                 10.      # (Optional) Maximal allowed smoothing length in internal units. Defaults to FLT_MAX if unspecified.
   max_volume_change:     1.4      # (Optional) Maximal allowed change of kernel volume over one time-step.
   max_ghost_iterations:  30       # (Optional) Maximal number of iterations allowed to converge towards the smoothing length.
+  initial_temperature:   0        # (Optional) Initial temperature (in internal units) to set the gas particles at start-up. Value is ignored if set to 0.
+  minimal_temperature:   0        # (Optional) Minimal temperature (in internal units) allowed for the gas particles. Value is ignored if set to 0.
   H_mass_fraction:       0.755    # (Optional) Hydrogen mass fraction used for initial conversion from temp to internal energy. Default value is derived from the physical constants.
   H_ionization_temperature: 1e4   # (Optional) Temperature of the transition from neutral to ionized Hydrogen for primoridal gas.
   viscosity_alpha:       0.8      # (Optional) Override for the initial value of the artificial viscosity. In schemes that have a fixed AV, this remains as alpha throughout the run.
@@ -65,6 +68,7 @@ Scheduler:
   cell_extra_sparts:         400       # (Optional) Number of spare sparts per top-level allocated at rebuild time for on-the-fly creation.
   max_top_level_cells:       12        # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value).
   tasks_per_cell:            0         # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...).
+  links_per_tasks:           10        # (Optional) The average number of links per tasks (before adding the communication tasks). If not large enough the simulation will fail (means guess...). Defaults to 10.
   mpi_message_limit:         4096      # (Optional) Maximum MPI task message size to send non-buffered, KB.
 
 # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
@@ -81,6 +85,7 @@ Snapshots:
   scale_factor_first: 0.1 # (Optional) Scale-factor of the first snapshot if cosmological time-integration.
   time_first: 0.          # (Optional) Time of the first output if non-cosmological time-integration (in internal units)
   delta_time: 0.01        # Time difference between consecutive outputs (in internal units)
+  invoke_stf: 0           # (Optional) Call VELOCIraptor every time a snapshot is written irrespective of the VELOCIraptor output strategy.
   compression: 0          # (Optional) Set the level of compression of the HDF5 datasets [0-9]. 0 does no compression.
   int_time_label_on:   0  # (Optional) Enable to label the snapshots using the time rounded to an integer (in internal units)
   UnitMass_in_cgs:     1  # (Optional) Unit system for the outputs (Grams)
@@ -153,6 +158,16 @@ DomainDecomposition:
   itr:              100     # When adaptive defines the ratio of inter node communication time to data redistribution time, in the range 0.00001 to 10000000.0.
                             # Lower values give less data movement during redistributions, at the cost of global balance which may require more communication.
 
+# Structure finding options (requires velociraptor)
+StructureFinding:
+  config_file_name:     stf_input.cfg # Name of the STF config file.
+  basename:             ./stf         # Common part of the name of output files.
+  scale_factor_first:   0.92          # (Optional) Scale-factor of the first snaphot (cosmological run)
+  time_first:           0.01          # (Optional) Time of the first structure finding output (in internal units).
+  delta_time:           1.10          # (Optional) Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
+  output_list_on:       0   	      # (Optional) Enable the output list
+  output_list:          stflist.txt   # (Optional) File containing the output times (see documentation in "Parameter File" section)
+
 # Parameters related to the equation of state ------------------------------------------
 
 EoS:
@@ -296,6 +311,8 @@ EAGLEChemistry:
   init_abundance_Silicon:   0.000        # Inital fraction of particle mass in Silicon
   init_abundance_Iron:      0.000        # Inital fraction of particle mass in Iron
 
+# Parameters related to star formation models  -----------------------------------------------
+
 # Schaye and Dalla Vecchia 2008 star formation
 SchayeSF:
   thresh_MinOverDens:               57.7      # The critical density contrast to form stars
@@ -315,14 +332,3 @@ SchayeSF:
   EOS_Jeans_TemperatureNorm_K:      1e3     # No idea how this works
   EOS_JEANS_DensityNorm_HpCM3:      0.1      # No idea what the value is.
 
-# Structure finding options (requires velociraptor)
-StructureFinding:
-  config_file_name:     stf_input.cfg # Name of the STF config file.
-  basename:             ./stf         # Common part of the name of output files.
-  output_time_format:   0             # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
-  scale_factor_first:   0.92          # Scale-factor of the first snaphot (cosmological run)
-  time_first:           0.01          # Time of the first structure finding output (in internal units).
-  delta_step:           1000          # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
-  delta_time:           1.10          # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
-  output_list_on:      0   	      # (Optional) Enable the output list
-  output_list:         stflist.txt    # (Optional) File containing the output times (see documentation in "Parameter File" section)
diff --git a/src/Makefile.am b/src/Makefile.am
index 276345b9e20bb29c23234a6e9a2aed0a5c320b88..1a975903082d47963125b175dd4967f76384bae0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -50,7 +50,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \
     mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \
     logger_io.h tracers_io.h tracers.h tracers_struct.h sftracers_io.h sftracers.h \
-    sftracers_struct.h
+    sftracers_struct.h velociraptor_struct.h velociraptor_io.h
 
 # source files for EAGLE cooling
 EAGLE_COOLING_SOURCES =
@@ -76,7 +76,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h
 		 gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h  \
                  runner_doiact_nosort.h runner_doiact_stars.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h \
 		 adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h dump.h logger.h sign.h \
-		 logger_io.h \
+		 logger_io.h timestep_limiter.h \
 		 gravity.h gravity_io.h gravity_cache.h \
 		 gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
 		 gravity/Default/gravity_debug.h gravity/Default/gravity_part.h  \
diff --git a/src/cache.h b/src/cache.h
index 5dd8164b1dc80795a8593cc2af42c2c9e7e68885..92bf908a400eb3c2a5425fb7a31753e0c1f719fa 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -179,8 +179,9 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
  *
  * @param ci The #cell.
  * @param ci_cache The cache.
+ * @return uninhibited_count The no. of uninhibited particles.
  */
-__attribute__((always_inline)) INLINE void cache_read_particles(
+__attribute__((always_inline)) INLINE int cache_read_particles(
     const struct cell *restrict const ci,
     struct cache *restrict const ci_cache) {
 
@@ -197,12 +198,29 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
   swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
   swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
 
+  const int count = ci->hydro.count;
   const struct part *restrict parts = ci->hydro.parts;
   const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
+  const double max_dx = ci->hydro.dx_max_part;
+  const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
+                               -(2. * ci->width[1] + max_dx),
+                               -(2. * ci->width[2] + max_dx)};
+  const float h_padded = ci->hydro.h_max / 4.;
 
   /* Shift the particles positions to a local frame so single precision can be
    * used instead of double precision. */
-  for (int i = 0; i < ci->hydro.count; i++) {
+  for (int i = 0; i < count; i++) {
+
+    /* Pad inhibited particles. */
+    if (parts[i].time_bin >= time_bin_inhibited) {
+      x[i] = pos_padded[0];
+      y[i] = pos_padded[1];
+      z[i] = pos_padded[2];
+      h[i] = h_padded;
+
+      continue;
+    }
+
     x[i] = (float)(parts[i].x[0] - loc[0]);
     y[i] = (float)(parts[i].x[1] - loc[1]);
     z[i] = (float)(parts[i].x[2] - loc[2]);
@@ -213,6 +231,26 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
     vz[i] = parts[i].v[2];
   }
 
+  /* Pad cache if the no. of particles is not a multiple of double the vector
+   * length. */
+  int count_align = count;
+  const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
+  if (rem != 0) {
+    count_align += (NUM_VEC_PROC * VEC_SIZE) - rem;
+
+    /* Set positions to something outside of the range of any particle */
+    for (int i = count; i < count_align; i++) {
+      x[i] = pos_padded[0];
+      y[i] = pos_padded[1];
+      z[i] = pos_padded[2];
+    }
+  }
+
+  return count_align;
+
+#else
+  error("Can't call the cache reading function with this flavour of SPH!");
+  return 0;
 #endif
 }
 
@@ -261,10 +299,32 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
       if (*last_pi + pad < ci->hydro.count) *last_pi += pad;
     }
 
+    const double max_dx = ci->hydro.dx_max_part;
+    const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
+                                 -(2. * ci->width[1] + max_dx),
+                                 -(2. * ci->width[2] + max_dx)};
+    const float h_padded = ci->hydro.h_max / 4.;
+
     /* Shift the particles positions to a local frame so single precision can be
      * used instead of double precision. */
     for (int i = 0; i < *last_pi; i++) {
       const int idx = sort_i[i].i;
+
+      /* Put inhibited particles out of range. */
+      if (parts[idx].time_bin >= time_bin_inhibited) {
+        x[i] = pos_padded[0];
+        y[i] = pos_padded[1];
+        z[i] = pos_padded[2];
+        h[i] = h_padded;
+
+        m[i] = 1.f;
+        vx[i] = 1.f;
+        vy[i] = 1.f;
+        vz[i] = 1.f;
+
+        continue;
+      }
+
       x[i] = (float)(parts[idx].x[0] - loc[0]);
       y[i] = (float)(parts[idx].x[1] - loc[1]);
       z[i] = (float)(parts[idx].x[2] - loc[2]);
@@ -278,12 +338,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
     /* Pad cache with fake particles that exist outside the cell so will not
      * interact. We use values of the same magnitude (but negative!) as the real
      * particles to avoid overflow problems. */
-    const double max_dx = ci->hydro.dx_max_part;
-    const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-                                 -(2. * ci->width[1] + max_dx),
-                                 -(2. * ci->width[2] + max_dx)};
-    const float h_padded = ci->hydro.parts[0].h;
-
     for (int i = *last_pi; i < *last_pi + VEC_SIZE; i++) {
       x[i] = pos_padded[0];
       y[i] = pos_padded[1];
@@ -308,11 +362,32 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
     }
 
     const int ci_cache_count = ci->hydro.count - *first_pi;
+    const double max_dx = ci->hydro.dx_max_part;
+    const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
+                                 -(2. * ci->width[1] + max_dx),
+                                 -(2. * ci->width[2] + max_dx)};
+    const float h_padded = ci->hydro.h_max / 4.;
 
     /* Shift the particles positions to a local frame so single precision can be
      * used instead of double precision. */
     for (int i = 0; i < ci_cache_count; i++) {
       const int idx = sort_i[i + *first_pi].i;
+
+      /* Put inhibited particles out of range. */
+      if (parts[idx].time_bin >= time_bin_inhibited) {
+        x[i] = pos_padded[0];
+        y[i] = pos_padded[1];
+        z[i] = pos_padded[2];
+        h[i] = h_padded;
+
+        m[i] = 1.f;
+        vx[i] = 1.f;
+        vy[i] = 1.f;
+        vz[i] = 1.f;
+
+        continue;
+      }
+
       x[i] = (float)(parts[idx].x[0] - loc[0]);
       y[i] = (float)(parts[idx].x[1] - loc[1]);
       z[i] = (float)(parts[idx].x[2] - loc[2]);
@@ -326,12 +401,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
     /* Pad cache with fake particles that exist outside the cell so will not
      * interact. We use values of the same magnitude (but negative!) as the real
      * particles to avoid overflow problems. */
-    const double max_dx = ci->hydro.dx_max_part;
-    const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-                                 -(2. * ci->width[1] + max_dx),
-                                 -(2. * ci->width[2] + max_dx)};
-    const float h_padded = ci->hydro.parts[0].h;
-
     for (int i = ci->hydro.count - *first_pi;
          i < ci->hydro.count - *first_pi + VEC_SIZE; i++) {
       x[i] = pos_padded[0];
@@ -355,8 +424,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
  *
  * @param ci The #cell.
  * @param ci_cache The cache.
+ * @return uninhibited_count The no. of uninhibited particles.
  */
-__attribute__((always_inline)) INLINE void cache_read_force_particles(
+__attribute__((always_inline)) INLINE int cache_read_force_particles(
     const struct cell *restrict const ci,
     struct cache *restrict const ci_cache) {
 
@@ -382,12 +452,34 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
   swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed,
                             SWIFT_CACHE_ALIGNMENT);
 
+  const int count = ci->hydro.count;
   const struct part *restrict parts = ci->hydro.parts;
   const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
+  const double max_dx = ci->hydro.dx_max_part;
+  const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
+                               -(2. * ci->width[1] + max_dx),
+                               -(2. * ci->width[2] + max_dx)};
+  const float h_padded = ci->hydro.h_max / 4.;
 
   /* Shift the particles positions to a local frame so single precision can be
    * used instead of double precision. */
-  for (int i = 0; i < ci->hydro.count; i++) {
+  for (int i = 0; i < count; i++) {
+
+    /* Skip inhibited particles. */
+    if (parts[i].time_bin >= time_bin_inhibited) {
+      x[i] = pos_padded[0];
+      y[i] = pos_padded[1];
+      z[i] = pos_padded[2];
+      h[i] = h_padded;
+      rho[i] = 1.f;
+      grad_h[i] = 1.f;
+      pOrho2[i] = 1.f;
+      balsara[i] = 1.f;
+      soundspeed[i] = 1.f;
+
+      continue;
+    }
+
     x[i] = (float)(parts[i].x[0] - loc[0]);
     y[i] = (float)(parts[i].x[1] - loc[1]);
     z[i] = (float)(parts[i].x[2] - loc[2]);
@@ -403,6 +495,32 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
     soundspeed[i] = parts[i].force.soundspeed;
   }
 
+  /* Pad cache if there is a serial remainder. */
+  int count_align = count;
+  const int rem = count % VEC_SIZE;
+  if (rem != 0) {
+    count_align += VEC_SIZE - rem;
+
+    /* Set positions to the same as particle pi so when the r2 > 0 mask is
+     * applied these extra contributions are masked out.*/
+    for (int i = count; i < count_align; i++) {
+      x[i] = pos_padded[0];
+      y[i] = pos_padded[1];
+      z[i] = pos_padded[2];
+      h[i] = h_padded;
+      rho[i] = 1.f;
+      grad_h[i] = 1.f;
+      pOrho2[i] = 1.f;
+      balsara[i] = 1.f;
+      soundspeed[i] = 1.f;
+    }
+  }
+
+  return count_align;
+
+#else
+  error("Can't call the cache reading function with this flavour of SPH!");
+  return 0;
 #endif
 }
 
@@ -472,11 +590,32 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
   swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
 
   int ci_cache_count = ci->hydro.count - first_pi_align;
+  const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
+  const float pos_padded_i[3] = {-(2. * ci->width[0] + max_dx),
+                                 -(2. * ci->width[1] + max_dx),
+                                 -(2. * ci->width[2] + max_dx)};
+  const float h_padded_i = ci->hydro.h_max / 4.;
 
   /* Shift the particles positions to a local frame (ci frame) so single
    * precision can be used instead of double precision.  */
   for (int i = 0; i < ci_cache_count; i++) {
     const int idx = sort_i[i + first_pi_align].i;
+
+    /* Put inhibited particles out of range. */
+    if (parts_i[idx].time_bin >= time_bin_inhibited) {
+      x[i] = pos_padded_i[0];
+      y[i] = pos_padded_i[1];
+      z[i] = pos_padded_i[2];
+      h[i] = h_padded_i;
+
+      m[i] = 1.f;
+      vx[i] = 1.f;
+      vy[i] = 1.f;
+      vz[i] = 1.f;
+
+      continue;
+    }
+
     x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
     y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
     z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
@@ -532,18 +671,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
   /* Pad cache with fake particles that exist outside the cell so will not
    * interact. We use values of the same magnitude (but negative!) as the real
    * particles to avoid overflow problems. */
-  const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
-  const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-                               -(2. * ci->width[1] + max_dx),
-                               -(2. * ci->width[2] + max_dx)};
-  const float h_padded = ci->hydro.parts[0].h;
-
   for (int i = ci->hydro.count - first_pi_align;
        i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) {
-    x[i] = pos_padded[0];
-    y[i] = pos_padded[1];
-    z[i] = pos_padded[2];
-    h[i] = h_padded;
+    x[i] = pos_padded_i[0];
+    y[i] = pos_padded_i[1];
+    z[i] = pos_padded_i[2];
+    h[i] = h_padded_i;
 
     m[i] = 1.f;
     vx[i] = 1.f;
@@ -562,8 +695,29 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
   swift_declare_aligned_ptr(float, vyj, cj_cache->vy, SWIFT_CACHE_ALIGNMENT);
   swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
 
+  const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
+                                 -(2. * cj->width[1] + max_dx),
+                                 -(2. * cj->width[2] + max_dx)};
+  const float h_padded_j = cj->hydro.h_max / 4.;
+
   for (int i = 0; i <= last_pj_align; i++) {
     const int idx = sort_j[i].i;
+
+    /* Put inhibited particles out of range. */
+    if (parts_j[idx].time_bin >= time_bin_inhibited) {
+      xj[i] = pos_padded_j[0];
+      yj[i] = pos_padded_j[1];
+      zj[i] = pos_padded_j[2];
+      hj[i] = h_padded_j;
+
+      mj[i] = 1.f;
+      vxj[i] = 1.f;
+      vyj[i] = 1.f;
+      vzj[i] = 1.f;
+
+      continue;
+    }
+
     xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
     yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
     zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
@@ -609,11 +763,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
   /* Pad cache with fake particles that exist outside the cell so will not
    * interact. We use values of the same magnitude (but negative!) as the real
    * particles to avoid overflow problems. */
-  const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-                                 -(2. * cj->width[1] + max_dx),
-                                 -(2. * cj->width[2] + max_dx)};
-  const float h_padded_j = cj->hydro.parts[0].h;
-
   for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) {
     xj[i] = pos_padded_j[0];
     yj[i] = pos_padded_j[1];
@@ -701,11 +850,37 @@ cache_read_two_partial_cells_sorted_force(
                             SWIFT_CACHE_ALIGNMENT);
 
   int ci_cache_count = ci->hydro.count - first_pi_align;
+  const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
+  const float pos_padded_i[3] = {-(2. * ci->width[0] + max_dx),
+                                 -(2. * ci->width[1] + max_dx),
+                                 -(2. * ci->width[2] + max_dx)};
+  const float h_padded_i = ci->hydro.h_max / 4.;
+
   /* Shift the particles positions to a local frame (ci frame) so single
    * precision can be  used instead of double precision.  */
   for (int i = 0; i < ci_cache_count; i++) {
 
     const int idx = sort_i[i + first_pi_align].i;
+
+    /* Put inhibited particles out of range. */
+    if (parts_i[idx].time_bin >= time_bin_inhibited) {
+      x[i] = pos_padded_i[0];
+      y[i] = pos_padded_i[1];
+      z[i] = pos_padded_i[2];
+      h[i] = h_padded_i;
+      m[i] = 1.f;
+      vx[i] = 1.f;
+      vy[i] = 1.f;
+      vz[i] = 1.f;
+      rho[i] = 1.f;
+      grad_h[i] = 1.f;
+      pOrho2[i] = 1.f;
+      balsara[i] = 1.f;
+      soundspeed[i] = 1.f;
+
+      continue;
+    }
+
     x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
     y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
     z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
@@ -726,18 +901,12 @@ cache_read_two_partial_cells_sorted_force(
   /* Pad cache with fake particles that exist outside the cell so will not
    * interact. We use values of the same magnitude (but negative!) as the real
    * particles to avoid overflow problems. */
-  const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
-  const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-                               -(2. * ci->width[1] + max_dx),
-                               -(2. * ci->width[2] + max_dx)};
-  const float h_padded = ci->hydro.parts[0].h;
-
   for (int i = ci->hydro.count - first_pi_align;
        i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) {
-    x[i] = pos_padded[0];
-    y[i] = pos_padded[1];
-    z[i] = pos_padded[2];
-    h[i] = h_padded;
+    x[i] = pos_padded_i[0];
+    y[i] = pos_padded_i[1];
+    z[i] = pos_padded_i[2];
+    h[i] = h_padded_i;
     m[i] = 1.f;
     vx[i] = 1.f;
     vy[i] = 1.f;
@@ -769,8 +938,33 @@ cache_read_two_partial_cells_sorted_force(
   swift_declare_aligned_ptr(float, soundspeedj, cj_cache->soundspeed,
                             SWIFT_CACHE_ALIGNMENT);
 
+  const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
+                                 -(2. * cj->width[1] + max_dx),
+                                 -(2. * cj->width[2] + max_dx)};
+  const float h_padded_j = cj->hydro.h_max / 4.;
+
   for (int i = 0; i <= last_pj_align; i++) {
     const int idx = sort_j[i].i;
+
+    /* Put inhibited particles out of range. */
+    if (parts_j[idx].time_bin == time_bin_inhibited) {
+      xj[i] = pos_padded_j[0];
+      yj[i] = pos_padded_j[1];
+      zj[i] = pos_padded_j[2];
+      hj[i] = h_padded_j;
+      mj[i] = 1.f;
+      vxj[i] = 1.f;
+      vyj[i] = 1.f;
+      vzj[i] = 1.f;
+      rhoj[i] = 1.f;
+      grad_hj[i] = 1.f;
+      pOrho2j[i] = 1.f;
+      balsaraj[i] = 1.f;
+      soundspeedj[i] = 1.f;
+
+      continue;
+    }
+
     xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
     yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
     zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
@@ -791,11 +985,6 @@ cache_read_two_partial_cells_sorted_force(
   /* Pad cache with fake particles that exist outside the cell so will not
    * interact. We use values of the same magnitude (but negative!) as the real
    * particles to avoid overflow problems. */
-  const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-                                 -(2. * cj->width[1] + max_dx),
-                                 -(2. * cj->width[2] + max_dx)};
-  const float h_padded_j = cj->hydro.parts[0].h;
-
   for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) {
     xj[i] = pos_padded_j[0];
     yj[i] = pos_padded_j[1];
@@ -813,7 +1002,8 @@ cache_read_two_partial_cells_sorted_force(
   }
 }
 
-/* @brief Clean the memory allocated by a #cache object.
+/**
+ * @brief Clean the memory allocated by a #cache object.
  *
  * @param c The #cache to clean.
  */
diff --git a/src/cell.c b/src/cell.c
index a7914a8a5a20d596a5516d61959e5c826c737b15..bd1022f1fa23b5911c4056b602008601fa36ce68 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -98,6 +98,14 @@ int cell_getsize(struct cell *c) {
  */
 int cell_link_parts(struct cell *c, struct part *parts) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Linking foreign particles in a local cell!");
+
+  if (c->hydro.parts != NULL)
+    error("Linking parts into a cell that was already linked");
+#endif
+
   c->hydro.parts = parts;
 
   /* Fill the progeny recursively, depth-first. */
@@ -123,6 +131,14 @@ int cell_link_parts(struct cell *c, struct part *parts) {
  */
 int cell_link_gparts(struct cell *c, struct gpart *gparts) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Linking foreign particles in a local cell!");
+
+  if (c->grav.parts != NULL)
+    error("Linking gparts into a cell that was already linked");
+#endif
+
   c->grav.parts = gparts;
 
   /* Fill the progeny recursively, depth-first. */
@@ -148,6 +164,14 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) {
  */
 int cell_link_sparts(struct cell *c, struct spart *sparts) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Linking foreign particles in a local cell!");
+
+  if (c->stars.parts != NULL)
+    error("Linking sparts into a cell that was already linked");
+#endif
+
   c->stars.parts = sparts;
 
   /* Fill the progeny recursively, depth-first. */
@@ -163,6 +187,182 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) {
   return c->stars.count;
 }
 
+/**
+ * @brief Recurse down foreign cells until reaching one with hydro
+ * tasks; then trigger the linking of the #part array from that
+ * level.
+ *
+ * @param c The #cell.
+ * @param parts The #part array.
+ *
+ * @return The number of particles linked.
+ */
+int cell_link_foreign_parts(struct cell *c, struct part *parts) {
+
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Linking foreign particles in a local cell!");
+#endif
+
+  /* Do we have a hydro task at this level? */
+  if (c->mpi.hydro.recv_xv != NULL) {
+
+    /* Recursively attach the parts */
+    const int counts = cell_link_parts(c, parts);
+#ifdef SWIFT_DEBUG_CHECKS
+    if (counts != c->hydro.count)
+      error("Something is wrong with the foreign counts");
+#endif
+    return counts;
+  }
+
+  /* Go deeper to find the level where the tasks are */
+  if (c->split) {
+    int count = 0;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        count += cell_link_foreign_parts(c->progeny[k], &parts[count]);
+      }
+    }
+    return count;
+  } else {
+    return 0;
+  }
+
+#else
+  error("Calling linking of foregin particles in non-MPI mode.");
+#endif
+}
+
+/**
+ * @brief Recurse down foreign cells until reaching one with gravity
+ * tasks; then trigger the linking of the #gpart array from that
+ * level.
+ *
+ * @param c The #cell.
+ * @param gparts The #gpart array.
+ *
+ * @return The number of particles linked.
+ */
+int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts) {
+
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Linking foreign particles in a local cell!");
+#endif
+
+  /* Do we have a hydro task at this level? */
+  if (c->mpi.grav.recv != NULL) {
+
+    /* Recursively attach the gparts */
+    const int counts = cell_link_gparts(c, gparts);
+#ifdef SWIFT_DEBUG_CHECKS
+    if (counts != c->grav.count)
+      error("Something is wrong with the foreign counts");
+#endif
+    return counts;
+  }
+
+  /* Go deeper to find the level where the tasks are */
+  if (c->split) {
+    int count = 0;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        count += cell_link_foreign_gparts(c->progeny[k], &gparts[count]);
+      }
+    }
+    return count;
+  } else {
+    return 0;
+  }
+
+#else
+  error("Calling linking of foregin particles in non-MPI mode.");
+#endif
+}
+
+/**
+ * @brief Recursively count the number of #part in foreign cells that
+ * are in cells with hydro-related tasks.
+ *
+ * @param c The #cell.
+ *
+ * @return The number of particles linked.
+ */
+int cell_count_parts_for_tasks(const struct cell *c) {
+
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Counting foreign particles in a local cell!");
+#endif
+
+  /* Do we have a hydro task at this level? */
+  if (c->mpi.hydro.recv_xv != NULL) {
+    return c->hydro.count;
+  }
+
+  if (c->split) {
+    int count = 0;
+    for (int k = 0; k < 8; ++k) {
+      if (c->progeny[k] != NULL) {
+        count += cell_count_parts_for_tasks(c->progeny[k]);
+      }
+    }
+    return count;
+  } else {
+    return 0;
+  }
+
+#else
+  error("Calling linking of foregin particles in non-MPI mode.");
+#endif
+}
+
+/**
+ * @brief Recursively count the number of #gpart in foreign cells that
+ * are in cells with gravity-related tasks.
+ *
+ * @param c The #cell.
+ *
+ * @return The number of particles linked.
+ */
+int cell_count_gparts_for_tasks(const struct cell *c) {
+
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->nodeID == engine_rank)
+    error("Counting foreign particles in a local cell!");
+#endif
+
+  /* Do we have a hydro task at this level? */
+  if (c->mpi.grav.recv != NULL) {
+    return c->grav.count;
+  }
+
+  if (c->split) {
+    int count = 0;
+    for (int k = 0; k < 8; ++k) {
+      if (c->progeny[k] != NULL) {
+        count += cell_count_gparts_for_tasks(c->progeny[k]);
+      }
+    }
+    return count;
+  } else {
+    return 0;
+  }
+
+#else
+  error("Calling linking of foregin particles in non-MPI mode.");
+#endif
+}
+
 /**
  * @brief Pack the data of the given cell and all it's sub-cells.
  *
@@ -1232,8 +1432,11 @@ void cell_clean_links(struct cell *c, void *data) {
   c->hydro.density = NULL;
   c->hydro.gradient = NULL;
   c->hydro.force = NULL;
+  c->hydro.limiter = NULL;
   c->grav.grav = NULL;
   c->grav.mm = NULL;
+  c->stars.density = NULL;
+  c->stars.feedback = NULL;
 }
 
 /**
@@ -1599,6 +1802,14 @@ void cell_clear_drift_flags(struct cell *c, void *data) {
   c->grav.do_sub_drift = 0;
 }
 
+/**
+ * @brief Clear the limiter flags on the given cell.
+ */
+void cell_clear_limiter_flags(struct cell *c, void *data) {
+  c->hydro.do_limiter = 0;
+  c->hydro.do_sub_limiter = 0;
+}
+
 /**
  * @brief Activate the #part drifts on the given cell.
  */
@@ -1622,7 +1833,10 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) {
     for (struct cell *parent = c->parent;
          parent != NULL && !parent->hydro.do_sub_drift;
          parent = parent->parent) {
+
+      /* Mark this cell for drifting */
       parent->hydro.do_sub_drift = 1;
+
       if (parent == c->hydro.super) {
 #ifdef SWIFT_DEBUG_CHECKS
         if (parent->hydro.drift == NULL)
@@ -1686,6 +1900,45 @@ void cell_activate_drift_spart(struct cell *c, struct scheduler *s) {
   cell_activate_drift_gpart(c, s);
 }
 
+/**
+ * @brief Activate the drifts on the given cell.
+ */
+void cell_activate_limiter(struct cell *c, struct scheduler *s) {
+
+  /* If this cell is already marked for drift, quit early. */
+  if (c->hydro.do_limiter) return;
+
+  /* Mark this cell for limiting. */
+  c->hydro.do_limiter = 1;
+
+  /* Set the do_sub_limiter all the way up and activate the super limiter
+     if this has not yet been done. */
+  if (c == c->super) {
+#ifdef SWIFT_DEBUG_CHECKS
+    if (c->timestep_limiter == NULL)
+      error("Trying to activate un-existing c->timestep_limiter");
+#endif
+    scheduler_activate(s, c->timestep_limiter);
+  } else {
+    for (struct cell *parent = c->parent;
+         parent != NULL && !parent->hydro.do_sub_limiter;
+         parent = parent->parent) {
+
+      /* Mark this cell for limiting */
+      parent->hydro.do_sub_limiter = 1;
+
+      if (parent == c->super) {
+#ifdef SWIFT_DEBUG_CHECKS
+        if (parent->timestep_limiter == NULL)
+          error("Trying to activate un-existing parent->timestep_limiter");
+#endif
+        scheduler_activate(s, parent->timestep_limiter);
+        break;
+      }
+    }
+  }
+}
+
 /**
  * @brief Activate the sorts up a cell hierarchy.
  */
@@ -1816,6 +2069,7 @@ void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s) {
 void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj,
                                        struct scheduler *s) {
   const struct engine *e = s->space->e;
+  const int with_limiter = (e->policy & engine_policy_limiter);
 
   /* Store the current dx_max and h_max values. */
   ci->hydro.dx_max_part_old = ci->hydro.dx_max_part;
@@ -1849,6 +2103,7 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj,
 
       /* We have reached the bottom of the tree: activate drift */
       cell_activate_drift_part(ci, s);
+      if (with_limiter) cell_activate_limiter(ci, s);
     }
   }
 
@@ -2154,6 +2409,12 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj,
       if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s);
       if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s);
 
+      /* Also activate the time-step limiter */
+      if (ci->nodeID == engine_rank && with_limiter)
+        cell_activate_limiter(ci, s);
+      if (cj->nodeID == engine_rank && with_limiter)
+        cell_activate_limiter(cj, s);
+
       /* Do we need to sort the cells? */
       cell_activate_hydro_sorts(ci, sid, s);
       cell_activate_hydro_sorts(cj, sid, s);
@@ -2718,6 +2979,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
 
   struct engine *e = s->space->e;
   const int nodeID = e->nodeID;
+  const int with_limiter = (e->policy & engine_policy_limiter);
   int rebuild = 0;
 
   /* Un-skip the density tasks involved with this cell. */
@@ -2743,6 +3005,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
       /* Activate hydro drift */
       if (t->type == task_type_self) {
         if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s);
+        if (ci_nodeID == nodeID && with_limiter) cell_activate_limiter(ci, s);
       }
 
       /* Set the correct sorting flags and activate hydro drifts */
@@ -2757,6 +3020,10 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s);
         if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s);
 
+        /* Activate the limiter tasks. */
+        if (ci_nodeID == nodeID && with_limiter) cell_activate_limiter(ci, s);
+        if (cj_nodeID == nodeID && with_limiter) cell_activate_limiter(cj, s);
+
         /* Check the sorts and activate them if needed. */
         cell_activate_hydro_sorts(ci, t->flags, s);
         cell_activate_hydro_sorts(cj, t->flags, s);
@@ -2791,7 +3058,11 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         }
 
         /* If the foreign cell is active, we want its ti_end values. */
-        if (ci_active) scheduler_activate(s, ci->mpi.recv_ti);
+        if (ci_active || with_limiter) scheduler_activate(s, ci->mpi.recv_ti);
+
+        if (with_limiter) scheduler_activate(s, ci->mpi.limiter.recv);
+        if (with_limiter)
+          scheduler_activate_send(s, cj->mpi.limiter.send, ci->nodeID);
 
         /* Is the foreign cell active and will need stuff from us? */
         if (ci_active) {
@@ -2801,6 +3072,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
           /* Drift the cell which will be sent; note that not all sent
              particles will be drifted, only those that are needed. */
           cell_activate_drift_part(cj, s);
+          if (with_limiter) cell_activate_limiter(cj, s);
 
           /* If the local cell is also active, more stuff will be needed. */
           if (cj_active) {
@@ -2813,7 +3085,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         }
 
         /* If the local cell is active, send its ti_end values. */
-        if (cj_active) scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID);
+        if (cj_active || with_limiter)
+          scheduler_activate_send(s, cj->mpi.send_ti, ci_nodeID);
 
       } else if (cj_nodeID != nodeID) {
 
@@ -2830,7 +3103,11 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         }
 
         /* If the foreign cell is active, we want its ti_end values. */
-        if (cj_active) scheduler_activate(s, cj->mpi.recv_ti);
+        if (cj_active || with_limiter) scheduler_activate(s, cj->mpi.recv_ti);
+
+        if (with_limiter) scheduler_activate(s, cj->mpi.limiter.recv);
+        if (with_limiter)
+          scheduler_activate_send(s, ci->mpi.limiter.send, cj->nodeID);
 
         /* Is the foreign cell active and will need stuff from us? */
         if (cj_active) {
@@ -2840,6 +3117,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
           /* Drift the cell which will be sent; note that not all sent
              particles will be drifted, only those that are needed. */
           cell_activate_drift_part(ci, s);
+          if (with_limiter) cell_activate_limiter(ci, s);
 
           /* If the local cell is also active, more stuff will be needed. */
           if (ci_active) {
@@ -2853,7 +3131,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         }
 
         /* If the local cell is active, send its ti_end values. */
-        if (ci_active) scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID);
+        if (ci_active || with_limiter)
+          scheduler_activate_send(s, ci->mpi.send_ti, cj_nodeID);
       }
 #endif
     }
@@ -2866,6 +3145,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
       scheduler_activate(s, l->t);
     for (struct link *l = c->hydro.force; l != NULL; l = l->next)
       scheduler_activate(s, l->t);
+    for (struct link *l = c->hydro.limiter; l != NULL; l = l->next)
+      scheduler_activate(s, l->t);
 
     if (c->hydro.extra_ghost != NULL)
       scheduler_activate(s, c->hydro.extra_ghost);
@@ -2879,7 +3160,6 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
     if (c->hydro.cooling != NULL) scheduler_activate(s, c->hydro.cooling);
     if (c->hydro.star_formation != NULL)
       scheduler_activate(s, c->hydro.star_formation);
-    if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
     if (c->logger != NULL) scheduler_activate(s, c->logger);
   }
 
diff --git a/src/cell.h b/src/cell.h
index 9452accbab6312f235764e689522ac3edbaafe61..c5fbc9b8c02b0e008d337189fdbf582faf4fa600 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -263,6 +263,9 @@ struct cell {
     /*! Linked list of the tasks computing this cell's hydro forces. */
     struct link *force;
 
+    /*! Linked list of the tasks computing this cell's limiter. */
+    struct link *limiter;
+
     /*! Dependency implicit task for the ghost  (in->ghost->out)*/
     struct task *ghost_in;
 
@@ -348,6 +351,12 @@ struct cell {
     /*! Do any of this cell's sub-cells need to be sorted? */
     char do_sub_sort;
 
+    /*! Does this cell need to be limited? */
+    char do_limiter;
+
+    /*! Do any of this cell's sub-cells need to be limited? */
+    char do_sub_limiter;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
     /*! Last (integer) time the cell's sort arrays were updated. */
@@ -570,6 +579,15 @@ struct cell {
       struct link *send;
     } grav;
 
+    struct {
+
+      /* Task receiving gpart data. */
+      struct task *recv;
+
+      /* Linked list for sending gpart data. */
+      struct link *send;
+    } limiter;
+
     /* Task receiving data (time-step). */
     struct task *recv_ti;
 
@@ -603,8 +621,8 @@ struct cell {
   /*! The task to compute time-steps */
   struct task *timestep;
 
-  /*! Task for source terms */
-  struct task *sourceterms;
+  /*! The task to limit the time-step of inactive particles */
+  struct task *timestep_limiter;
 
   /*! The logger task */
   struct task *logger;
@@ -673,6 +691,10 @@ int cell_getsize(struct cell *c);
 int cell_link_parts(struct cell *c, struct part *parts);
 int cell_link_gparts(struct cell *c, struct gpart *gparts);
 int cell_link_sparts(struct cell *c, struct spart *sparts);
+int cell_link_foreign_parts(struct cell *c, struct part *parts);
+int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts);
+int cell_count_parts_for_tasks(const struct cell *c);
+int cell_count_gparts_for_tasks(const struct cell *c);
 void cell_clean_links(struct cell *c, void *data);
 void cell_make_multipoles(struct cell *c, integertime_t ti_current);
 void cell_check_multipole(struct cell *c);
@@ -705,7 +727,9 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s);
 void cell_activate_drift_spart(struct cell *c, struct scheduler *s);
 void cell_activate_hydro_sorts(struct cell *c, int sid, struct scheduler *s);
 void cell_activate_stars_sorts(struct cell *c, int sid, struct scheduler *s);
+void cell_activate_limiter(struct cell *c, struct scheduler *s);
 void cell_clear_drift_flags(struct cell *c, void *data);
+void cell_clear_limiter_flags(struct cell *c, void *data);
 void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data);
 void cell_check_spart_pos(const struct cell *c,
                           const struct spart *global_sparts);
diff --git a/src/common_io.c b/src/common_io.c
index 24e74014fd52936023b5c7a41378faf3268bfdb3..733cf1dacac5f0c73ea401a584e2aa40eadd4a23 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -142,7 +142,7 @@ void io_read_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type,
  * Calls #error() if an error occurs.
  */
 void io_write_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type,
-                        void* data, int num) {
+                        const void* data, int num) {
 
   const hid_t h_space = H5Screate(H5S_SIMPLE);
   if (h_space < 0)
@@ -387,6 +387,332 @@ void io_write_engine_policy(hid_t h_file, const struct engine* e) {
   H5Gclose(h_grp);
 }
 
+void io_write_cell_offsets(hid_t h_grp, const int cdim[3],
+                           const struct cell* cells_top, const int nr_cells,
+                           const double width[3], const int nodeID,
+                           const long long global_counts[swift_type_count],
+                           const long long global_offsets[swift_type_count],
+                           const struct unit_system* internal_units,
+                           const struct unit_system* snapshot_units) {
+
+  double cell_width[3] = {width[0], width[1], width[2]};
+
+  /* Temporary memory for the cell-by-cell information */
+  double* centres = NULL;
+  centres = (double*)malloc(3 * nr_cells * sizeof(double));
+
+  /* Count of particles in each cell */
+  long long *count_part = NULL, *count_gpart = NULL, *count_spart = NULL;
+  count_part = (long long*)malloc(nr_cells * sizeof(long long));
+  count_gpart = (long long*)malloc(nr_cells * sizeof(long long));
+  count_spart = (long long*)malloc(nr_cells * sizeof(long long));
+
+  /* Global offsets of particles in each cell */
+  long long *offset_part = NULL, *offset_gpart = NULL, *offset_spart = NULL;
+  offset_part = (long long*)malloc(nr_cells * sizeof(long long));
+  offset_gpart = (long long*)malloc(nr_cells * sizeof(long long));
+  offset_spart = (long long*)malloc(nr_cells * sizeof(long long));
+
+  /* Offsets of the 0^th element */
+  offset_part[0] = 0;
+  offset_gpart[0] = 0;
+  offset_spart[0] = 0;
+
+  /* Collect the cell information of *local* cells */
+  long long local_offset_part = 0;
+  long long local_offset_gpart = 0;
+  long long local_offset_spart = 0;
+  for (int i = 0; i < nr_cells; ++i) {
+
+    if (cells_top[i].nodeID == nodeID) {
+
+      /* Centre of each cell */
+      centres[i * 3 + 0] = cells_top[i].loc[0] + cell_width[0] * 0.5;
+      centres[i * 3 + 1] = cells_top[i].loc[1] + cell_width[1] * 0.5;
+      centres[i * 3 + 2] = cells_top[i].loc[2] + cell_width[2] * 0.5;
+
+      /* Count real particles that will be written */
+      count_part[i] = cells_top[i].hydro.count - cells_top[i].hydro.inhibited;
+      count_gpart[i] = cells_top[i].grav.count - cells_top[i].grav.inhibited;
+      count_spart[i] = cells_top[i].stars.count - cells_top[i].stars.inhibited;
+
+      /* Only count DM gpart (gpart without friends) */
+      count_gpart[i] -= count_part[i];
+      count_gpart[i] -= count_spart[i];
+
+      /* Offsets including the global offset of all particles on this MPI rank
+       */
+      offset_part[i] = local_offset_part + global_offsets[swift_type_gas];
+      offset_gpart[i] =
+          local_offset_gpart + global_offsets[swift_type_dark_matter];
+      offset_spart[i] = local_offset_spart + global_offsets[swift_type_stars];
+
+      local_offset_part += count_part[i];
+      local_offset_gpart += count_gpart[i];
+      local_offset_spart += count_spart[i];
+
+    } else {
+
+      /* Just zero everything for the foregin cells */
+
+      centres[i * 3 + 0] = 0.;
+      centres[i * 3 + 1] = 0.;
+      centres[i * 3 + 2] = 0.;
+
+      count_part[i] = 0;
+      count_gpart[i] = 0;
+      count_spart[i] = 0;
+
+      offset_part[i] = 0;
+      offset_gpart[i] = 0;
+      offset_spart[i] = 0;
+    }
+  }
+
+#ifdef WITH_MPI
+  /* Now, reduce all the arrays. Note that we use a bit-wise OR here. This
+     is safe as we made sure only local cells have non-zero values. */
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, count_part, nr_cells, MPI_LONG_LONG_INT, MPI_BOR,
+               0, MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(count_part, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0,
+               MPI_COMM_WORLD);
+  }
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, count_gpart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR,
+               0, MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(count_gpart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0,
+               MPI_COMM_WORLD);
+  }
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, count_spart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR,
+               0, MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(count_spart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0,
+               MPI_COMM_WORLD);
+  }
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, offset_part, nr_cells, MPI_LONG_LONG_INT, MPI_BOR,
+               0, MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(offset_part, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0,
+               MPI_COMM_WORLD);
+  }
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, offset_gpart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR,
+               0, MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(offset_gpart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0,
+               MPI_COMM_WORLD);
+  }
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, offset_spart, nr_cells, MPI_LONG_LONG_INT, MPI_BOR,
+               0, MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(offset_spart, NULL, nr_cells, MPI_LONG_LONG_INT, MPI_BOR, 0,
+               MPI_COMM_WORLD);
+  }
+
+  /* For the centres we use a sum as MPI does not like bit-wise operations
+     on floating point numbers */
+  if (nodeID == 0) {
+    MPI_Reduce(MPI_IN_PLACE, centres, 3 * nr_cells, MPI_DOUBLE, MPI_SUM, 0,
+               MPI_COMM_WORLD);
+  } else {
+    MPI_Reduce(centres, NULL, 3 * nr_cells, MPI_DOUBLE, MPI_SUM, 0,
+               MPI_COMM_WORLD);
+  }
+#endif
+
+  /* Only rank 0 actually writes */
+  if (nodeID == 0) {
+
+    /* Unit conversion if necessary */
+    const double factor = units_conversion_factor(
+        internal_units, snapshot_units, UNIT_CONV_LENGTH);
+    if (factor != 1.) {
+
+      /* Convert the cell centres */
+      for (int i = 0; i < nr_cells; ++i) {
+        centres[i * 3 + 0] *= factor;
+        centres[i * 3 + 1] *= factor;
+        centres[i * 3 + 2] *= factor;
+      }
+
+      /* Convert the cell widths */
+      cell_width[0] *= factor;
+      cell_width[1] *= factor;
+      cell_width[2] *= factor;
+    }
+
+    /* Write some meta-information first */
+    hid_t h_subgrp =
+        H5Gcreate(h_grp, "Meta-data", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    if (h_subgrp < 0) error("Error while creating meta-data sub-group");
+    io_write_attribute(h_subgrp, "nr_cells", INT, &nr_cells, 1);
+    io_write_attribute(h_subgrp, "size", DOUBLE, cell_width, 3);
+    io_write_attribute(h_subgrp, "dimension", INT, cdim, 3);
+    H5Gclose(h_subgrp);
+
+    /* Write the centres to the group */
+    hsize_t shape[2] = {nr_cells, 3};
+    hid_t h_space = H5Screate(H5S_SIMPLE);
+    if (h_space < 0) error("Error while creating data space for cell centres");
+    hid_t h_err = H5Sset_extent_simple(h_space, 2, shape, shape);
+    if (h_err < 0)
+      error("Error while changing shape of gas offsets data space.");
+    hid_t h_data = H5Dcreate(h_grp, "Centres", io_hdf5_type(DOUBLE), h_space,
+                             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    if (h_data < 0) error("Error while creating dataspace for gas offsets.");
+    h_err = H5Dwrite(h_data, io_hdf5_type(DOUBLE), h_space, H5S_ALL,
+                     H5P_DEFAULT, centres);
+    if (h_err < 0) error("Error while writing centres.");
+    H5Dclose(h_data);
+    H5Sclose(h_space);
+
+    /* Group containing the offsets for each particle type */
+    h_subgrp =
+        H5Gcreate(h_grp, "Offsets", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    if (h_subgrp < 0) error("Error while creating offsets sub-group");
+
+    if (global_counts[swift_type_gas] > 0) {
+
+      shape[0] = nr_cells;
+      shape[1] = 1;
+      h_space = H5Screate(H5S_SIMPLE);
+      if (h_space < 0) error("Error while creating data space for gas offsets");
+      h_err = H5Sset_extent_simple(h_space, 1, shape, shape);
+      if (h_err < 0)
+        error("Error while changing shape of gas offsets data space.");
+      h_data = H5Dcreate(h_subgrp, "PartType0", io_hdf5_type(LONGLONG), h_space,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      if (h_data < 0) error("Error while creating dataspace for gas offsets.");
+      h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL,
+                       H5P_DEFAULT, offset_part);
+      if (h_err < 0) error("Error while writing gas offsets.");
+      H5Dclose(h_data);
+      H5Sclose(h_space);
+    }
+
+    if (global_counts[swift_type_dark_matter] > 0) {
+
+      shape[0] = nr_cells;
+      shape[1] = 1;
+      h_space = H5Screate(H5S_SIMPLE);
+      if (h_space < 0) error("Error while creating data space for DM offsets");
+      h_err = H5Sset_extent_simple(h_space, 1, shape, shape);
+      if (h_err < 0)
+        error("Error while changing shape of DM offsets data space.");
+      h_data = H5Dcreate(h_subgrp, "PartType1", io_hdf5_type(LONGLONG), h_space,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      if (h_data < 0) error("Error while creating dataspace for DM offsets.");
+      h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL,
+                       H5P_DEFAULT, offset_gpart);
+      if (h_err < 0) error("Error while writing DM offsets.");
+      H5Dclose(h_data);
+      H5Sclose(h_space);
+    }
+
+    if (global_counts[swift_type_stars] > 0) {
+
+      shape[0] = nr_cells;
+      shape[1] = 1;
+      h_space = H5Screate(H5S_SIMPLE);
+      if (h_space < 0)
+        error("Error while creating data space for stars offsets");
+      h_err = H5Sset_extent_simple(h_space, 1, shape, shape);
+      if (h_err < 0)
+        error("Error while changing shape of stars offsets data space.");
+      h_data = H5Dcreate(h_subgrp, "PartType4", io_hdf5_type(LONGLONG), h_space,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      if (h_data < 0) error("Error while creating dataspace for star offsets.");
+      h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL,
+                       H5P_DEFAULT, offset_spart);
+      if (h_err < 0) error("Error while writing star offsets.");
+      H5Dclose(h_data);
+      H5Sclose(h_space);
+    }
+
+    H5Gclose(h_subgrp);
+
+    /* Group containing the counts for each particle type */
+    h_subgrp =
+        H5Gcreate(h_grp, "Counts", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    if (h_subgrp < 0) error("Error while creating counts sub-group");
+
+    if (global_counts[swift_type_gas] > 0) {
+
+      shape[0] = nr_cells;
+      shape[1] = 1;
+      h_space = H5Screate(H5S_SIMPLE);
+      if (h_space < 0) error("Error while creating data space for gas counts");
+      h_err = H5Sset_extent_simple(h_space, 1, shape, shape);
+      if (h_err < 0)
+        error("Error while changing shape of gas counts data space.");
+      h_data = H5Dcreate(h_subgrp, "PartType0", io_hdf5_type(LONGLONG), h_space,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      if (h_data < 0) error("Error while creating dataspace for gas counts.");
+      h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL,
+                       H5P_DEFAULT, count_part);
+      if (h_err < 0) error("Error while writing gas counts.");
+      H5Dclose(h_data);
+      H5Sclose(h_space);
+    }
+
+    if (global_counts[swift_type_dark_matter] > 0) {
+
+      shape[0] = nr_cells;
+      shape[1] = 1;
+      h_space = H5Screate(H5S_SIMPLE);
+      if (h_space < 0) error("Error while creating data space for DM counts");
+      h_err = H5Sset_extent_simple(h_space, 1, shape, shape);
+      if (h_err < 0)
+        error("Error while changing shape of DM counts data space.");
+      h_data = H5Dcreate(h_subgrp, "PartType1", io_hdf5_type(LONGLONG), h_space,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      if (h_data < 0) error("Error while creating dataspace for DM counts.");
+      h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL,
+                       H5P_DEFAULT, count_gpart);
+      if (h_err < 0) error("Error while writing DM counts.");
+      H5Dclose(h_data);
+      H5Sclose(h_space);
+    }
+
+    if (global_counts[swift_type_stars] > 0) {
+
+      shape[0] = nr_cells;
+      shape[1] = 1;
+      h_space = H5Screate(H5S_SIMPLE);
+      if (h_space < 0)
+        error("Error while creating data space for stars counts");
+      h_err = H5Sset_extent_simple(h_space, 1, shape, shape);
+      if (h_err < 0)
+        error("Error while changing shape of stars counts data space.");
+      h_data = H5Dcreate(h_subgrp, "PartType4", io_hdf5_type(LONGLONG), h_space,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+      if (h_data < 0) error("Error while creating dataspace for star counts.");
+      h_err = H5Dwrite(h_data, io_hdf5_type(LONGLONG), h_space, H5S_ALL,
+                       H5P_DEFAULT, count_spart);
+      if (h_err < 0) error("Error while writing star counts.");
+      H5Dclose(h_data);
+      H5Sclose(h_space);
+    }
+
+    H5Gclose(h_subgrp);
+  }
+
+  /* Free everything we allocated */
+  free(centres);
+  free(count_part);
+  free(count_gpart);
+  free(count_spart);
+  free(offset_part);
+  free(offset_gpart);
+  free(offset_spart);
+}
+
 #endif /* HAVE_HDF5 */
 
 /**
@@ -482,6 +808,28 @@ void io_convert_part_d_mapper(void* restrict temp, int N,
                          &temp_d[i * dim]);
 }
 
+/**
+ * @brief Mapper function to copy #part into a buffer of doubles using a
+ * conversion function.
+ */
+void io_convert_part_l_mapper(void* restrict temp, int N,
+                              void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct part* restrict parts = props.parts;
+  const struct xpart* restrict xparts = props.xparts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  long long* restrict temp_l = (long long*)temp;
+  const ptrdiff_t delta = (temp_l - props.start_temp_l) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_part_l(e, parts + delta + i, xparts + delta + i,
+                         &temp_l[i * dim]);
+}
+
 /**
  * @brief Mapper function to copy #gpart into a buffer of floats using a
  * conversion function.
@@ -522,6 +870,26 @@ void io_convert_gpart_d_mapper(void* restrict temp, int N,
     props.convert_gpart_d(e, gparts + delta + i, &temp_d[i * dim]);
 }
 
+/**
+ * @brief Mapper function to copy #gpart into a buffer of doubles using a
+ * conversion function.
+ */
+void io_convert_gpart_l_mapper(void* restrict temp, int N,
+                               void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct gpart* restrict gparts = props.gparts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  long long* restrict temp_l = (long long*)temp;
+  const ptrdiff_t delta = (temp_l - props.start_temp_l) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_gpart_l(e, gparts + delta + i, &temp_l[i * dim]);
+}
+
 /**
  * @brief Mapper function to copy #spart into a buffer of floats using a
  * conversion function.
@@ -562,6 +930,26 @@ void io_convert_spart_d_mapper(void* restrict temp, int N,
     props.convert_spart_d(e, sparts + delta + i, &temp_d[i * dim]);
 }
 
+/**
+ * @brief Mapper function to copy #spart into a buffer of doubles using a
+ * conversion function.
+ */
+void io_convert_spart_l_mapper(void* restrict temp, int N,
+                               void* restrict extra_data) {
+
+  const struct io_props props = *((const struct io_props*)extra_data);
+  const struct spart* restrict sparts = props.sparts;
+  const struct engine* e = props.e;
+  const size_t dim = props.dimension;
+
+  /* How far are we with this chunk? */
+  long long* restrict temp_l = (long long*)temp;
+  const ptrdiff_t delta = (temp_l - props.start_temp_l) / dim;
+
+  for (int i = 0; i < N; i++)
+    props.convert_spart_l(e, sparts + delta + i, &temp_l[i * dim]);
+}
+
 /**
  * @brief Copy the particle data into a temporary buffer ready for i/o.
  *
@@ -619,6 +1007,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e,
                      io_convert_part_d_mapper, temp_d, N, copySize, 0,
                      (void*)&props);
 
+    } else if (props.convert_part_l != NULL) {
+
+      /* Prepare some parameters */
+      long long* temp_l = (long long*)temp;
+      props.start_temp_l = (long long*)temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_part_l_mapper, temp_l, N, copySize, 0,
+                     (void*)&props);
+
     } else if (props.convert_gpart_f != NULL) {
 
       /* Prepare some parameters */
@@ -643,6 +1043,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e,
                      io_convert_gpart_d_mapper, temp_d, N, copySize, 0,
                      (void*)&props);
 
+    } else if (props.convert_gpart_l != NULL) {
+
+      /* Prepare some parameters */
+      long long* temp_l = (long long*)temp;
+      props.start_temp_l = (long long*)temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_gpart_l_mapper, temp_l, N, copySize, 0,
+                     (void*)&props);
+
     } else if (props.convert_spart_f != NULL) {
 
       /* Prepare some parameters */
@@ -667,6 +1079,18 @@ void io_copy_temp_buffer(void* temp, const struct engine* e,
                      io_convert_spart_d_mapper, temp_d, N, copySize, 0,
                      (void*)&props);
 
+    } else if (props.convert_spart_l != NULL) {
+
+      /* Prepare some parameters */
+      long long* temp_l = (long long*)temp;
+      props.start_temp_l = (long long*)temp;
+      props.e = e;
+
+      /* Copy the whole thing into a buffer */
+      threadpool_map((struct threadpool*)&e->threadpool,
+                     io_convert_spart_l_mapper, temp_l, N, copySize, 0,
+                     (void*)&props);
+
     } else {
       error("Missing conversion function");
     }
@@ -928,15 +1352,21 @@ void io_collect_sparts_to_write(const struct spart* restrict sparts,
  * @brief Copy every non-inhibited DM #gpart into the gparts_written array.
  *
  * @param gparts The array of #gpart containing all particles.
+ * @param vr_data The array of gpart-related VELOCIraptor output.
  * @param gparts_written The array of #gpart to fill with particles we want to
  * write.
+ * @param vr_data_written The array of gpart-related VELOCIraptor with particles
+ * we want to write.
  * @param Ngparts The total number of #part.
  * @param Ngparts_written The total number of #part to write.
+ * @param with_stf Are we running with STF? i.e. do we want to collect vr data?
  */
-void io_collect_gparts_to_write(const struct gpart* restrict gparts,
-                                struct gpart* restrict gparts_written,
-                                const size_t Ngparts,
-                                const size_t Ngparts_written) {
+void io_collect_gparts_to_write(
+    const struct gpart* restrict gparts,
+    const struct velociraptor_gpart_data* restrict vr_data,
+    struct gpart* restrict gparts_written,
+    struct velociraptor_gpart_data* restrict vr_data_written,
+    const size_t Ngparts, const size_t Ngparts_written, const int with_stf) {
 
   size_t count = 0;
 
@@ -948,6 +1378,8 @@ void io_collect_gparts_to_write(const struct gpart* restrict gparts,
         (gparts[i].time_bin != time_bin_not_created) &&
         (gparts[i].type == swift_type_dark_matter)) {
 
+      if (with_stf) vr_data_written[count] = vr_data[i];
+
       gparts_written[count] = gparts[i];
       count++;
     }
@@ -955,7 +1387,7 @@ void io_collect_gparts_to_write(const struct gpart* restrict gparts,
 
   /* Check that everything is fine */
   if (count != Ngparts_written)
-    error("Collected the wrong number of s-particles (%zu vs. %zu expected)",
+    error("Collected the wrong number of g-particles (%zu vs. %zu expected)",
           count, Ngparts_written);
 }
 
diff --git a/src/common_io.h b/src/common_io.h
index 016c5138e18ae8636834c35d659e07d8fcd46e36..eb1ee0a804f324d897842fb2a0ca33fc07e769d6 100644
--- a/src/common_io.h
+++ b/src/common_io.h
@@ -24,6 +24,7 @@
 #include "../config.h"
 
 /* Local includes. */
+#include "part_type.h"
 #include "units.h"
 
 #define FIELD_BUFFER_SIZE 200
@@ -32,8 +33,10 @@
 #define IO_BUFFER_ALIGNMENT 1024
 
 /* Avoid cyclic inclusion problems */
+struct cell;
 struct part;
 struct gpart;
+struct velociraptor_gpart_data;
 struct spart;
 struct xpart;
 struct io_props;
@@ -65,7 +68,7 @@ void io_read_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type,
                        void* data);
 
 void io_write_attribute(hid_t grp, const char* name, enum IO_DATA_TYPE type,
-                        void* data, int num);
+                        const void* data, int num);
 
 void io_write_attribute_d(hid_t grp, const char* name, double data);
 void io_write_attribute_f(hid_t grp, const char* name, float data);
@@ -76,6 +79,14 @@ void io_write_attribute_s(hid_t grp, const char* name, const char* str);
 void io_write_code_description(hid_t h_file);
 void io_write_engine_policy(hid_t h_file, const struct engine* e);
 
+void io_write_cell_offsets(hid_t h_grp, const int cdim[3],
+                           const struct cell* cells_top, const int nr_cells,
+                           const double width[3], const int nodeID,
+                           const long long global_counts[swift_type_count],
+                           const long long global_offsets[swift_type_count],
+                           const struct unit_system* internal_units,
+                           const struct unit_system* snapshot_units);
+
 void io_read_unit_system(hid_t h_file, struct unit_system* ic_units,
                          const struct unit_system* internal_units,
                          int mpi_rank);
@@ -103,9 +114,11 @@ void io_collect_sparts_to_write(const struct spart* restrict sparts,
                                 const size_t Nsparts,
                                 const size_t Nsparts_written);
 void io_collect_gparts_to_write(const struct gpart* restrict gparts,
+                                const struct velociraptor_gpart_data* vr_data,
                                 struct gpart* restrict gparts_written,
+                                struct velociraptor_gpart_data* vr_data_written,
                                 const size_t Ngparts,
-                                const size_t Ngparts_written);
+                                const size_t Ngparts_written, int with_stf);
 void io_prepare_dm_gparts(struct threadpool* tp, struct gpart* const gparts,
                           size_t Ndm);
 void io_duplicate_hydro_gparts(struct threadpool* tp, struct part* const parts,
diff --git a/src/const.h b/src/const.h
index e417b8ca3827ef87396706c56df36bb9bd3aed75..613a48920e6f26c209faf6e354b82c2ed5be0bf1 100644
--- a/src/const.h
+++ b/src/const.h
@@ -33,6 +33,9 @@
 /* Time integration constants. */
 #define const_max_u_change 0.1f
 
+/* Time-step limiter maximal difference in signal velocity */
+#define const_limiter_max_v_sig_ratio 4.1f
+
 /* Type of gradients to use (GIZMO_SPH only) */
 /* If no option is chosen, no gradients are used (first order scheme) */
 //#define GRADIENTS_SPH
diff --git a/src/cooling/EAGLE/cooling.c b/src/cooling/EAGLE/cooling.c
index ed38917ad458214f07e7fa52391ab52546a5f4e0..60eba23a5e8927d271c157b1dc0a598bcdcfb1ff 100644
--- a/src/cooling/EAGLE/cooling.c
+++ b/src/cooling/EAGLE/cooling.c
@@ -499,14 +499,13 @@ void cooling_cool_part(const struct phys_const *restrict phys_const,
   float abundance_ratio[chemistry_element_count + 2];
   abundance_ratio_to_solar(p, cooling, abundance_ratio);
 
-  /* Get the Hydrogen mass fraction */
+  /* Get the Hydrogen and Helium mass fractions */
   const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H];
+  const float XHe = p->chemistry_data.metal_mass_fraction[chemistry_element_He];
 
   /* Get the Helium mass fraction. Note that this is He / (H + He), i.e. a
    * metal-free Helium mass fraction as per the Wiersma+08 definition */
-  const float HeFrac =
-      p->chemistry_data.metal_mass_fraction[chemistry_element_He] /
-      (XH + p->chemistry_data.metal_mass_fraction[chemistry_element_He]);
+  const float HeFrac = XHe / (XH + XHe);
 
   /* convert Hydrogen mass fraction into Hydrogen number density */
   const double n_H =
@@ -707,14 +706,13 @@ float cooling_get_temperature(
   const float u = hydro_get_physical_internal_energy(p, xp, cosmo);
   const double u_cgs = u * cooling->internal_energy_to_cgs;
 
-  /* Get the Hydrogen mass fraction */
+  /* Get the Hydrogen and Helium mass fractions */
   const float XH = p->chemistry_data.metal_mass_fraction[chemistry_element_H];
+  const float XHe = p->chemistry_data.metal_mass_fraction[chemistry_element_He];
 
   /* Get the Helium mass fraction. Note that this is He / (H + He), i.e. a
    * metal-free Helium mass fraction as per the Wiersma+08 definition */
-  const float HeFrac =
-      p->chemistry_data.metal_mass_fraction[chemistry_element_He] /
-      (XH + p->chemistry_data.metal_mass_fraction[chemistry_element_He]);
+  const float HeFrac = XHe / (XH + XHe);
 
   /* Convert Hydrogen mass fraction into Hydrogen number density */
   const float rho = hydro_get_physical_density(p, cosmo);
diff --git a/src/cosmology.c b/src/cosmology.c
index 4718ed5b316e514476e3ec38dd8771136f3a2f69..be23343d0d62584cd3a811e547b327120db744ef 100644
--- a/src/cosmology.c
+++ b/src/cosmology.c
@@ -576,6 +576,8 @@ void cosmology_init_no_cosmo(struct cosmology *c) {
   c->a_dot = 0.;
   c->time = 0.;
   c->universe_age_at_present_day = 0.;
+  c->Hubble_time = 0.;
+  c->lookback_time = 0.;
 
   /* Initialise the interpolation tables */
   c->drift_fac_interp_table = NULL;
diff --git a/src/dimension.h b/src/dimension.h
index 0b2093d718a61c6ce850db1970412af3e2e462b9..7084d70f5794853557539862091809071af2e790 100644
--- a/src/dimension.h
+++ b/src/dimension.h
@@ -89,6 +89,34 @@ __attribute__((always_inline)) INLINE static float pow_dimension(float x) {
 #endif
 }
 
+/**
+ * @brief Returns the argument to the power given by the inverse of the
+ * dimension
+ *
+ * Computes \f$x^{1/d}\f$.
+ */
+__attribute__((always_inline)) INLINE static float pow_inv_dimension(float x) {
+
+#if defined(HYDRO_DIMENSION_3D)
+
+  return cbrtf(x);
+
+#elif defined(HYDRO_DIMENSION_2D)
+
+  return sqrtf(x);
+
+#elif defined(HYDRO_DIMENSION_1D)
+
+  return x;
+
+#else
+
+  error("The dimension is not defined !");
+  return 0.f;
+
+#endif
+}
+
 /**
  * @brief Returns the argument to the power given by the dimension plus one
  *
diff --git a/src/engine.c b/src/engine.c
index eea21bc58f2baea1620ed8bf8fb4dfb66f75bf10..b88c1f1c5a1cbad456405b2c5f97e501ee7dc745 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -115,7 +115,8 @@ const char *engine_policy_names[] = {"none",
                                      "stars",
                                      "structure finding",
                                      "star formation",
-                                     "feedback"};
+                                     "feedback",
+                                     "time-step limiter"};
 
 /** The rank of the engine as a global variable (for messages). */
 int engine_rank;
@@ -147,7 +148,9 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) {
   /* Get the next free link. */
   const size_t ind = atomic_inc(&e->nr_links);
   if (ind >= e->size_links) {
-    error("Link table overflow.");
+    error(
+        "Link table overflow. Increase the value of "
+        "`Scheduler:links_per_tasks`.");
   }
   struct link *res = &e->links[ind];
 
@@ -1131,84 +1134,12 @@ void engine_exchange_cells(struct engine *e) {
 
 #ifdef WITH_MPI
 
-  struct space *s = e->s;
-  const int nr_proxies = e->nr_proxies;
   const int with_gravity = e->policy & engine_policy_self_gravity;
   const ticks tic = getticks();
 
   /* Exchange the cell structure with neighbouring ranks. */
   proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
 
-  ticks tic2 = getticks();
-
-  /* Count the number of particles we need to import and re-allocate
-     the buffer if needed. */
-  size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0;
-  for (int k = 0; k < nr_proxies; k++)
-    for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
-      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro)
-        count_parts_in += e->proxies[k].cells_in[j]->hydro.count;
-      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity)
-        count_gparts_in += e->proxies[k].cells_in[j]->grav.count;
-      count_sparts_in += e->proxies[k].cells_in[j]->stars.count;
-    }
-  if (count_parts_in > s->size_parts_foreign) {
-    if (s->parts_foreign != NULL) free(s->parts_foreign);
-    s->size_parts_foreign = 1.1 * count_parts_in;
-    if (posix_memalign((void **)&s->parts_foreign, part_align,
-                       sizeof(struct part) * s->size_parts_foreign) != 0)
-      error("Failed to allocate foreign part data.");
-  }
-  if (count_gparts_in > s->size_gparts_foreign) {
-    if (s->gparts_foreign != NULL) free(s->gparts_foreign);
-    s->size_gparts_foreign = 1.1 * count_gparts_in;
-    if (posix_memalign((void **)&s->gparts_foreign, gpart_align,
-                       sizeof(struct gpart) * s->size_gparts_foreign) != 0)
-      error("Failed to allocate foreign gpart data.");
-  }
-  if (count_sparts_in > s->size_sparts_foreign) {
-    if (s->sparts_foreign != NULL) free(s->sparts_foreign);
-    s->size_sparts_foreign = 1.1 * count_sparts_in;
-    if (posix_memalign((void **)&s->sparts_foreign, spart_align,
-                       sizeof(struct spart) * s->size_sparts_foreign) != 0)
-      error("Failed to allocate foreign spart data.");
-  }
-
-  if (e->verbose)
-    message("Counting and allocating arrays took %.3f %s.",
-            clocks_from_ticks(getticks() - tic2), clocks_getunit());
-
-  tic2 = getticks();
-
-  /* Unpack the cells and link to the particle data. */
-  struct part *parts = s->parts_foreign;
-  struct gpart *gparts = s->gparts_foreign;
-  struct spart *sparts = s->sparts_foreign;
-  for (int k = 0; k < nr_proxies; k++) {
-    for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
-
-      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) {
-        cell_link_parts(e->proxies[k].cells_in[j], parts);
-        parts = &parts[e->proxies[k].cells_in[j]->hydro.count];
-      }
-
-      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) {
-        cell_link_gparts(e->proxies[k].cells_in[j], gparts);
-        gparts = &gparts[e->proxies[k].cells_in[j]->grav.count];
-      }
-
-      cell_link_sparts(e->proxies[k].cells_in[j], sparts);
-      sparts = &sparts[e->proxies[k].cells_in[j]->stars.count];
-    }
-  }
-  s->nr_parts_foreign = parts - s->parts_foreign;
-  s->nr_gparts_foreign = gparts - s->gparts_foreign;
-  s->nr_sparts_foreign = sparts - s->sparts_foreign;
-
-  if (e->verbose)
-    message("Recursively linking arrays took %.3f %s.",
-            clocks_from_ticks(getticks() - tic2), clocks_getunit());
-
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -1823,15 +1754,133 @@ void engine_exchange_proxy_multipoles(struct engine *e) {
 #endif
 }
 
+/**
+ * @brief Allocate memory for the foreign particles.
+ *
+ * We look into the proxies for cells that have tasks and count
+ * the number of particles in these cells. We then allocate
+ * memory and link all the cells that have tasks and all cells
+ * deeper in the tree.
+ *
+ * @param e The #engine.
+ */
+void engine_allocate_foreign_particles(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  const int nr_proxies = e->nr_proxies;
+  struct space *s = e->s;
+  ticks tic = getticks();
+
+  /* Count the number of particles we need to import and re-allocate
+     the buffer if needed. */
+  size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0;
+  for (int k = 0; k < nr_proxies; k++) {
+    for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
+
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) {
+        count_parts_in += cell_count_parts_for_tasks(e->proxies[k].cells_in[j]);
+      }
+
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) {
+        count_gparts_in +=
+            cell_count_gparts_for_tasks(e->proxies[k].cells_in[j]);
+      }
+
+      /* For stars, we just use the numbers in the top-level cells */
+      count_sparts_in += e->proxies[k].cells_in[j]->stars.count;
+    }
+  }
+
+  if (e->verbose)
+    message("Counting number of foreign particles took %.3f %s.",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
+
+  tic = getticks();
+
+  /* Allocate space for the foreign particles we will receive */
+  if (count_parts_in > s->size_parts_foreign) {
+    if (s->parts_foreign != NULL) free(s->parts_foreign);
+    s->size_parts_foreign = engine_foreign_alloc_margin * count_parts_in;
+    if (posix_memalign((void **)&s->parts_foreign, part_align,
+                       sizeof(struct part) * s->size_parts_foreign) != 0)
+      error("Failed to allocate foreign part data.");
+  }
+  /* Allocate space for the foreign particles we will receive */
+  if (count_gparts_in > s->size_gparts_foreign) {
+    if (s->gparts_foreign != NULL) free(s->gparts_foreign);
+    s->size_gparts_foreign = engine_foreign_alloc_margin * count_gparts_in;
+    if (posix_memalign((void **)&s->gparts_foreign, gpart_align,
+                       sizeof(struct gpart) * s->size_gparts_foreign) != 0)
+      error("Failed to allocate foreign gpart data.");
+  }
+  /* Allocate space for the foreign particles we will receive */
+  if (count_sparts_in > s->size_sparts_foreign) {
+    if (s->sparts_foreign != NULL) free(s->sparts_foreign);
+    s->size_sparts_foreign = engine_foreign_alloc_margin * count_sparts_in;
+    if (posix_memalign((void **)&s->sparts_foreign, spart_align,
+                       sizeof(struct spart) * s->size_sparts_foreign) != 0)
+      error("Failed to allocate foreign spart data.");
+  }
+
+  if (e->verbose)
+    message("Allocating %zd/%zd/%zd foreign part/gpart/spart (%zd/%zd/%zd MB)",
+            s->size_parts_foreign, s->size_gparts_foreign,
+            s->size_sparts_foreign,
+            s->size_parts_foreign * sizeof(struct part) / (1024 * 1024),
+            s->size_gparts_foreign * sizeof(struct gpart) / (1024 * 1024),
+            s->size_sparts_foreign * sizeof(struct spart) / (1024 * 1024));
+
+  /* Unpack the cells and link to the particle data. */
+  struct part *parts = s->parts_foreign;
+  struct gpart *gparts = s->gparts_foreign;
+  struct spart *sparts = s->sparts_foreign;
+  for (int k = 0; k < nr_proxies; k++) {
+    for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
+
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) {
+
+        const size_t count_parts =
+            cell_link_foreign_parts(e->proxies[k].cells_in[j], parts);
+        parts = &parts[count_parts];
+      }
+
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) {
+
+        const size_t count_gparts =
+            cell_link_foreign_gparts(e->proxies[k].cells_in[j], gparts);
+        gparts = &gparts[count_gparts];
+      }
+
+      /* For stars, we just use the numbers in the top-level cells */
+      cell_link_sparts(e->proxies[k].cells_in[j], sparts);
+      sparts = &sparts[e->proxies[k].cells_in[j]->stars.count];
+    }
+  }
+
+  /* Update the counters */
+  s->nr_parts_foreign = parts - s->parts_foreign;
+  s->nr_gparts_foreign = gparts - s->gparts_foreign;
+  s->nr_sparts_foreign = sparts - s->sparts_foreign;
+
+  if (e->verbose)
+    message("Recursively linking foreign arrays took %.3f %s.",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
 /**
  * @brief Prints the number of tasks in the engine
  *
  * @param e The #engine.
  */
-void engine_print_task_counts(struct engine *e) {
+void engine_print_task_counts(const struct engine *e) {
 
   const ticks tic = getticks();
-  struct scheduler *const sched = &e->sched;
+  const struct scheduler *sched = &e->sched;
   const int nr_tasks = sched->nr_tasks;
   const struct task *const tasks = sched->tasks;
 
@@ -1878,7 +1927,7 @@ void engine_print_task_counts(struct engine *e) {
  *
  * @return the estimated total number of tasks
  */
-int engine_estimate_nr_tasks(struct engine *e) {
+int engine_estimate_nr_tasks(const struct engine *e) {
 
   int tasks_per_cell = e->tasks_per_cell;
   if (tasks_per_cell > 0) return e->s->tot_cells * tasks_per_cell;
@@ -1887,8 +1936,7 @@ int engine_estimate_nr_tasks(struct engine *e) {
    * basically use a formula <n1>*ntopcells + <n2>*(totcells - ntopcells).
    * Where <n1> is the expected maximum tasks per top-level/super cell, and
    * <n2> the expected maximum tasks for all other cells. These should give
-   * a safe upper limit.
-   */
+   * a safe upper limit. */
   int n1 = 0;
   int n2 = 0;
   if (e->policy & engine_policy_hydro) {
@@ -1909,6 +1957,10 @@ int engine_estimate_nr_tasks(struct engine *e) {
 #endif
 #endif
   }
+  if (e->policy & engine_policy_limiter) {
+    n1 += 18;
+    n2 += 1;
+  }
   if (e->policy & engine_policy_self_gravity) {
     n1 += 125;
     n2 += 8;
@@ -2587,8 +2639,11 @@ void engine_skip_force_and_kick(struct engine *e) {
     /* Skip everything that updates the particles */
     if (t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
         t->type == task_type_kick1 || t->type == task_type_kick2 ||
-        t->type == task_type_timestep || t->subtype == task_subtype_force ||
-        t->subtype == task_subtype_grav || t->type == task_type_end_force ||
+        t->type == task_type_timestep ||
+        t->type == task_type_timestep_limiter ||
+        t->subtype == task_subtype_force ||
+        t->subtype == task_subtype_limiter || t->subtype == task_subtype_grav ||
+        t->type == task_type_end_force ||
         t->type == task_type_grav_long_range || t->type == task_type_grav_mm ||
         t->type == task_type_grav_down || t->type == task_type_cooling ||
         t->type == task_type_star_formation)
@@ -2597,6 +2652,7 @@ void engine_skip_force_and_kick(struct engine *e) {
 
   /* Run through the cells and clear some flags. */
   space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL);
+  space_map_cells_pre(e->s, 1, cell_clear_limiter_flags, NULL);
 }
 
 /**
@@ -2806,6 +2862,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
     gravity_exact_force_check(e->s, e, 1e-1);
 #endif
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Make sure all woken-up particles have been processed */
+  space_check_limiter(e->s);
+#endif
+
   /* Recover the (integer) end of the next time-step */
   engine_collect_end_of_step(e, 1);
 
@@ -3063,6 +3124,11 @@ void engine_step(struct engine *e) {
     gravity_exact_force_check(e->s, e, 1e-1);
 #endif
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Make sure all woken-up particles have been processed */
+  space_check_limiter(e->s);
+#endif
+
   /* Collect information about the next time-step */
   engine_collect_end_of_step(e, 1);
   e->forcerebuild = e->collect_group1.forcerebuild;
@@ -3106,94 +3172,87 @@ void engine_step(struct engine *e) {
  */
 void engine_check_for_dumps(struct engine *e) {
 
+  const int with_cosmology = (e->policy & engine_policy_cosmology);
   const int with_stf = (e->policy & engine_policy_structure_finding);
-  const int stf_time_output = (e->stf_output_freq_format == io_stf_time);
+
+  /* What kind of output are we getting? */
+  enum output_type {
+    output_none,
+    output_snapshot,
+    output_statistics,
+    output_stf
+  };
+
+  /* What kind of output do we want? And at which time ?
+   * Find the earliest output (amongst all kinds) that takes place
+   * before the next time-step */
+  enum output_type type = output_none;
+  integertime_t ti_output = max_nr_timesteps;
 
   /* Save some statistics ? */
-  int save_stats = 0;
-  if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) save_stats = 1;
+  if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) {
+    if (e->ti_next_stats < ti_output) {
+      ti_output = e->ti_next_stats;
+      type = output_statistics;
+    }
+  }
 
   /* Do we want a snapshot? */
-  int dump_snapshot = 0;
-  if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0)
-    dump_snapshot = 1;
+  if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) {
+    if (e->ti_next_snapshot < ti_output) {
+      ti_output = e->ti_next_snapshot;
+      type = output_snapshot;
+    }
+  }
 
   /* Do we want to perform structure finding? */
-  int run_stf = 0;
-  if (with_stf && stf_time_output) {
-    if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) run_stf = 1;
-  }
-  if (with_stf && !stf_time_output) {
-    if (e->step % e->delta_step_stf == 0) run_stf = 1;
+  if (with_stf) {
+    if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) {
+      if (e->ti_next_stf < ti_output) {
+        ti_output = e->ti_next_stf;
+        type = output_stf;
+      }
+    }
   }
 
   /* Store information before attempting extra dump-related drifts */
-  integertime_t ti_current = e->ti_current;
-  timebin_t max_active_bin = e->max_active_bin;
-  double time = e->time;
+  const integertime_t ti_current = e->ti_current;
+  const timebin_t max_active_bin = e->max_active_bin;
+  const double time = e->time;
+
+  while (type != output_none) {
+
+    /* Let's fake that we are at the dump time */
+    e->ti_current = ti_output;
+    e->max_active_bin = 0;
+    if (with_cosmology) {
+      cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
+      e->time = e->cosmology->time;
+    } else {
+      e->time = ti_output * e->time_base + e->time_begin;
+    }
 
-  while (save_stats || dump_snapshot || run_stf) {
+    /* Drift everyone */
+    engine_drift_all(e, /*drift_mpole=*/0);
 
     /* Write some form of output */
-    if (dump_snapshot && save_stats) {
-
-      /* If both, need to figure out which one occurs first */
-      if (e->ti_next_stats == e->ti_next_snapshot) {
-
-        /* Let's fake that we are at the common dump time */
-        e->ti_current = e->ti_next_snapshot;
-        e->max_active_bin = 0;
-        if ((e->policy & engine_policy_cosmology)) {
-          cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-          e->time = e->cosmology->time;
-        } else {
-          e->time = e->ti_next_stats * e->time_base + e->time_begin;
-        }
+    switch (type) {
+      case output_snapshot:
 
-        /* Drift everyone */
-        engine_drift_all(e, /*drift_mpole=*/0);
+        /* Do we want a corresponding VELOCIraptor output? */
+        if (with_stf && e->snapshot_invoke_stf) {
 
-        /* Dump everything */
-        engine_print_stats(e);
-#ifdef WITH_LOGGER
-        /* Write a file containing the offsets in the particle logger. */
-        engine_dump_index(e);
+#ifdef HAVE_VELOCIRAPTOR
+          velociraptor_invoke(e, /*linked_with_snap=*/1);
+          e->step_props |= engine_step_prop_stf;
 #else
-        engine_dump_snapshot(e);
+          error(
+              "Asking for a VELOCIraptor output but SWIFT was compiled without "
+              "the interface!");
 #endif
-
-      } else if (e->ti_next_stats < e->ti_next_snapshot) {
-
-        /* Let's fake that we are at the stats dump time */
-        e->ti_current = e->ti_next_stats;
-        e->max_active_bin = 0;
-        if ((e->policy & engine_policy_cosmology)) {
-          cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-          e->time = e->cosmology->time;
-        } else {
-          e->time = e->ti_next_stats * e->time_base + e->time_begin;
-        }
-
-        /* Drift everyone */
-        engine_drift_all(e, /*drift_mpole=*/0);
-
-        /* Dump stats */
-        engine_print_stats(e);
-
-        /* Let's fake that we are at the snapshot dump time */
-        e->ti_current = e->ti_next_snapshot;
-        e->max_active_bin = 0;
-        if ((e->policy & engine_policy_cosmology)) {
-          cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-          e->time = e->cosmology->time;
-        } else {
-          e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
         }
 
-        /* Drift everyone */
-        engine_drift_all(e, /*drift_mpole=*/0);
-
-        /* Dump snapshot */
+          /* Dump... */
 #ifdef WITH_LOGGER
         /* Write a file containing the offsets in the particle logger. */
         engine_dump_index(e);
@@ -3201,118 +3260,60 @@ void engine_check_for_dumps(struct engine *e) {
         engine_dump_snapshot(e);
 #endif
 
-      } else if (e->ti_next_stats > e->ti_next_snapshot) {
-
-        /* Let's fake that we are at the snapshot dump time */
-        e->ti_current = e->ti_next_snapshot;
-        e->max_active_bin = 0;
-        if ((e->policy & engine_policy_cosmology)) {
-          cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-          e->time = e->cosmology->time;
-        } else {
-          e->time = e->ti_next_stats * e->time_base + e->time_begin;
-        }
-
-        /* Drift everyone */
-        engine_drift_all(e, /*drift_mpole=*/0);
-
-        /* Dump snapshot */
-#ifdef WITH_LOGGER
-        /* Write a file containing the offsets in the particle logger. */
-        engine_dump_index(e);
-#else
-        engine_dump_snapshot(e);
+        /* Free the memory allocated for VELOCIraptor i/o. */
+        if (with_stf && e->snapshot_invoke_stf) {
+#ifdef HAVE_VELOCIRAPTOR
+          free(e->s->gpart_group_data);
+          e->s->gpart_group_data = NULL;
 #endif
-
-        /* Let's fake that we are at the stats dump time */
-        e->ti_current = e->ti_next_stats;
-        e->max_active_bin = 0;
-        if ((e->policy & engine_policy_cosmology)) {
-          cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-          e->time = e->cosmology->time;
-        } else {
-          e->time = e->ti_next_stats * e->time_base + e->time_begin;
         }
 
-        /* Drift everyone */
-        engine_drift_all(e, /*drift_mpole=*/0);
+        /* ... and find the next output time */
+        engine_compute_next_snapshot_time(e);
+        break;
+
+      case output_statistics:
 
-        /* Dump stats */
+        /* Dump */
         engine_print_stats(e);
       }
 
-      /* Let's compute the time of the next outputs */
-      engine_compute_next_snapshot_time(e);
-      engine_compute_next_statistics_time(e);
+        /* and move on */
+        engine_compute_next_statistics_time(e);
 
-    } else if (dump_snapshot) {
+        break;
 
-      /* Let's fake that we are at the snapshot dump time */
-      e->ti_current = e->ti_next_snapshot;
-      e->max_active_bin = 0;
-      if ((e->policy & engine_policy_cosmology)) {
-        cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-        e->time = e->cosmology->time;
-      } else {
-        e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
-      }
+      case output_stf:
 
-      /* Drift everyone */
-      engine_drift_all(e, /*drift_mpole=*/0);
+#ifdef HAVE_VELOCIRAPTOR
+        /* Unleash the raptor! */
+        velociraptor_invoke(e, /*linked_with_snap=*/0);
+        e->step_props |= engine_step_prop_stf;
 
-      /* Dump... */
-#ifdef WITH_LOGGER
-      /* Write a file containing the offsets in the particle logger. */
-      engine_dump_index(e);
+        /* ... and find the next output time */
+        engine_compute_next_stf_time(e);
 #else
-      engine_dump_snapshot(e);
+        error(
+            "Asking for a VELOCIraptor output but SWIFT was compiled without "
+            "the interface!");
 #endif
+        break;
 
-      /* ... and find the next output time */
-      engine_compute_next_snapshot_time(e);
-
-    } else if (save_stats) {
-
-      /* Let's fake that we are at the stats dump time */
-      e->ti_current = e->ti_next_stats;
-      e->max_active_bin = 0;
-      if ((e->policy & engine_policy_cosmology)) {
-        cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-        e->time = e->cosmology->time;
-      } else {
-        e->time = e->ti_next_stats * e->time_base + e->time_begin;
-      }
-
-      /* Drift everyone */
-      engine_drift_all(e, /*drift_mpole=*/0);
-
-      /* Dump */
-      engine_print_stats(e);
-
-      /* and move on */
-      engine_compute_next_statistics_time(e);
+      default:
+        error("Invalid dump type");
     }
 
-    /* Perform structure finding? */
-    if (run_stf) {
-
-#ifdef HAVE_VELOCIRAPTOR
-
-      // MATTHIEU: Check the order with the other i/o options.
-      if (!dump_snapshot && !save_stats) {
+    /* We need to see whether whether we are in the pathological case
+     * where there can be another dump before the next step. */
 
-        /* Let's fake that we are at the stats dump time */
-        e->ti_current = e->ti_next_stf;
-        e->max_active_bin = 0;
-        if ((e->policy & engine_policy_cosmology)) {
-          cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
-          e->time = e->cosmology->time;
-        } else {
-          e->time = e->ti_next_stats * e->time_base + e->time_begin;
-        }
+    type = output_none;
+    ti_output = max_nr_timesteps;
 
-        /* Drift everyone */
-        engine_drift_all(e, /*drift_mpole=*/0);
+    /* Save some statistics ? */
+    if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) {
+      if (e->ti_next_stats < ti_output) {
+        ti_output = e->ti_next_stats;
+        type = output_statistics;
       }
 
       velociraptor_init(e);
@@ -3333,16 +3334,24 @@ void engine_check_for_dumps(struct engine *e) {
       save_stats = 1;
 
     /* Do we want a snapshot? */
-    dump_snapshot = 0;
-    if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0)
-      dump_snapshot = 1;
+    if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0) {
+      if (e->ti_next_snapshot < ti_output) {
+        ti_output = e->ti_next_snapshot;
+        type = output_snapshot;
+      }
+    }
 
     /* Do we want to perform structure finding? */
-    run_stf = 0;
-    if (with_stf && stf_time_output) {
-      if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) run_stf = 1;
+    if (with_stf) {
+      if (e->ti_end_min > e->ti_next_stf && e->ti_next_stf > 0) {
+        if (e->ti_next_stf < ti_output) {
+          ti_output = e->ti_next_stf;
+          type = output_stf;
+        }
+      }
     }
-  }
+
+  } /* While loop over output types */
 
   /* Restore the information we stored */
   e->ti_current = ti_current;
@@ -4104,9 +4113,12 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params,
       parser_get_opt_param_int(params, "Snapshots:compression", 0);
   e->snapshot_int_time_label_on =
       parser_get_opt_param_int(params, "Snapshots:int_time_label_on", 0);
+  e->snapshot_invoke_stf =
+      parser_get_opt_param_int(params, "Snapshots:invoke_stf", 0);
   e->snapshot_units = (struct unit_system *)malloc(sizeof(struct unit_system));
   units_init_default(e->snapshot_units, params, "Snapshots", internal_units);
   e->snapshot_output_count = 0;
+  e->stf_output_count = 0;
   e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min");
   e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max");
   e->dt_max_RMS_displacement = FLT_MAX;
@@ -4133,7 +4145,6 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params,
   e->star_formation = starform;
   e->chemistry = chemistry;
   e->parameter_file = params;
-  e->cell_loc = NULL;
 #ifdef WITH_MPI
   e->cputime_last_step = 0;
   e->last_repartition = 0;
@@ -4174,28 +4185,16 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params,
   /* Initialise VELOCIraptor output. */
   if (e->policy & engine_policy_structure_finding) {
     parser_get_param_string(params, "StructureFinding:basename",
-                            e->stfBaseName);
+                            e->stf_base_name);
+    parser_get_param_string(params, "StructureFinding:config_file_name",
+                            e->stf_config_file_name);
+
     e->time_first_stf_output =
         parser_get_opt_param_double(params, "StructureFinding:time_first", 0.);
     e->a_first_stf_output = parser_get_opt_param_double(
         params, "StructureFinding:scale_factor_first", 0.1);
-    e->stf_output_freq_format = (enum io_stf_output_format)parser_get_param_int(
-        params, "StructureFinding:output_time_format");
-
-    if (e->stf_output_freq_format == io_stf_steps) {
-      e->delta_step_stf =
-          parser_get_param_int(params, "StructureFinding:delta_step");
-    } else if (e->stf_output_freq_format == io_stf_time) {
-      e->delta_time_stf =
-          parser_get_param_double(params, "StructureFinding:delta_time");
-    } else {
-      error(
-          "Invalid flag (%d) set for output time format of structure finding.",
-          e->stf_output_freq_format);
-    }
-
-    /* overwrite input if outputlist */
-    if (e->output_list_stf) e->stf_output_freq_format = io_stf_time;
+    e->delta_time_stf =
+        parser_get_opt_param_double(params, "StructureFinding:delta_time", -1.);
   }
 
   engine_init_output_lists(e, params);
@@ -4441,10 +4440,11 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
 
       fprintf(e->file_timesteps,
               "# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, "
-              "Statistics=%d, Snapshot=%d, Restarts=%d\n",
+              "Statistics=%d, Snapshot=%d, Restarts=%d STF=%d, logger=%d\n",
               engine_step_prop_rebuild, engine_step_prop_redistribute,
               engine_step_prop_repartition, engine_step_prop_statistics,
-              engine_step_prop_snapshot, engine_step_prop_restarts);
+              engine_step_prop_snapshot, engine_step_prop_restarts,
+              engine_step_prop_stf, engine_step_prop_logger_index);
 
       fprintf(e->file_timesteps,
               "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %16s [%s] %6s\n",
@@ -4537,17 +4537,18 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
           "simulation start a=%e.",
           e->a_first_statistics, e->cosmology->a_begin);
 
-    if ((e->policy & engine_policy_structure_finding) &&
-        (e->stf_output_freq_format == io_stf_time)) {
+    if (e->policy & engine_policy_structure_finding) {
+
+      if (e->delta_time_stf == -1. && !e->snapshot_invoke_stf)
+        error("A value for `StructureFinding:delta_time` must be specified");
 
-      if (e->delta_time_stf <= 1.)
+      if (e->delta_time_stf <= 1. && e->delta_time_stf != -1.)
         error("Time between STF (%e) must be > 1.", e->delta_time_stf);
 
       if (e->a_first_stf_output < e->cosmology->a_begin)
         error(
             "Scale-factor of first stf output (%e) must be after the "
-            "simulation "
-            "start a=%e.",
+            "simulation start a=%e.",
             e->a_first_stf_output, e->cosmology->a_begin);
     }
   } else {
@@ -4573,10 +4574,12 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
           "t=%e.",
           e->time_first_statistics, e->time_begin);
 
-    if ((e->policy & engine_policy_structure_finding) &&
-        (e->stf_output_freq_format == io_stf_time)) {
+    if (e->policy & engine_policy_structure_finding) {
 
-      if (e->delta_time_stf <= 0.)
+      if (e->delta_time_stf == -1. && !e->snapshot_invoke_stf)
+        error("A value for `StructureFinding:delta_time` must be specified");
+
+      if (e->delta_time_stf <= 0. && e->delta_time_stf != -1.)
         error("Time between STF (%e) must be positive.", e->delta_time_stf);
 
       if (e->time_first_stf_output < e->time_begin)
@@ -4585,12 +4588,6 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
     }
   }
 
-  if (e->policy & engine_policy_structure_finding) {
-    /* Find the time of the first stf output */
-    if (e->stf_output_freq_format == io_stf_time)
-      engine_compute_next_stf_time(e);
-  }
-
   /* Get the total mass */
   e->total_mass = 0.;
   for (size_t i = 0; i < e->s->nr_gparts; ++i)
@@ -4615,6 +4612,19 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
   /* Find the time of the first statistics output */
   engine_compute_next_statistics_time(e);
 
+  /* Find the time of the first stf output */
+  if (e->policy & engine_policy_structure_finding) {
+    engine_compute_next_stf_time(e);
+  }
+
+  /* Check that we are invoking VELOCIraptor only if we have it */
+  if (e->snapshot_invoke_stf &&
+      !(e->policy & engine_policy_structure_finding)) {
+    error(
+        "Invoking VELOCIraptor after snapshots but structure finding wasn't "
+        "activated at runtime (Use --velociraptor).");
+  }
+
   /* Whether restarts are enabled. Yes by default. Can be changed on restart. */
   e->restart_dump = parser_get_opt_param_int(params, "Restarts:enable", 1);
 
@@ -4678,6 +4688,10 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
   else
     maxtasks = engine_estimate_nr_tasks(e);
 
+  /* Estimated number of links per tasks */
+  e->links_per_tasks =
+      parser_get_opt_param_int(params, "Scheduler:links_per_tasks", 10);
+
   /* Init the scheduler. */
   scheduler_init(&e->sched, e->s, maxtasks, nr_queues,
                  (e->policy & scheduler_flag_steal), e->nodeID, &e->threadpool);
@@ -5200,7 +5214,6 @@ void engine_clean(struct engine *e) {
   output_list_clean(&e->output_list_stf);
 
   free(e->links);
-  free(e->cell_loc);
 #if defined(WITH_LOGGER)
   logger_clean(e->logger);
   free(e->logger);
diff --git a/src/engine.h b/src/engine.h
index f585b8fd74960048c41c60abfe687973ff1eaedb..b5f0799f037b07995cfa96dc6510d5a5957ab0f4 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -74,9 +74,10 @@ enum engine_policy {
   engine_policy_stars = (1 << 15),
   engine_policy_structure_finding = (1 << 16),
   engine_policy_star_formation = (1 << 17),
-  engine_policy_feedback = (1 << 18)
+  engine_policy_feedback = (1 << 18),
+  engine_policy_limiter = (1 << 19)
 };
-#define engine_maxpolicy 19
+#define engine_maxpolicy 20
 extern const char *engine_policy_names[engine_maxpolicy + 1];
 
 /**
@@ -90,7 +91,8 @@ enum engine_step_properties {
   engine_step_prop_statistics = (1 << 3),
   engine_step_prop_snapshot = (1 << 4),
   engine_step_prop_restarts = (1 << 5),
-  engine_step_prop_logger_index = (1 << 6)
+  engine_step_prop_stf = (1 << 6),
+  engine_step_prop_logger_index = (1 << 7)
 };
 
 /* Some constants */
@@ -99,6 +101,8 @@ enum engine_step_properties {
 #define engine_parts_size_grow 1.05
 #define engine_max_proxy_centre_frac 0.2
 #define engine_redistribute_alloc_margin 1.2
+#define engine_rebuild_link_alloc_margin 1.2
+#define engine_foreign_alloc_margin 1.05
 #define engine_default_energy_file_name "energy"
 #define engine_default_timesteps_file_name "timesteps"
 #define engine_max_parts_per_ghost 1000
@@ -222,9 +226,6 @@ struct engine {
   /* The internal system of units */
   const struct unit_system *internal_units;
 
-  /* Top-level cell locations for VELOCIraptor. */
-  struct cell_loc *cell_loc;
-
   /* Snapshot information */
   double a_first_snapshot;
   double time_first_snapshot;
@@ -239,12 +240,11 @@ struct engine {
   char snapshot_base_name[PARSER_MAX_LINE_SIZE];
   int snapshot_compression;
   int snapshot_int_time_label_on;
+  int snapshot_invoke_stf;
   struct unit_system *snapshot_units;
   int snapshot_output_count;
 
   /* Structure finding information */
-  enum io_stf_output_format stf_output_freq_format;
-  int delta_step_stf;
   double a_first_stf_output;
   double time_first_stf_output;
   double delta_time_stf;
@@ -255,7 +255,9 @@ struct engine {
   /* Integer time of the next stf output */
   integertime_t ti_next_stf;
 
-  char stfBaseName[PARSER_MAX_LINE_SIZE];
+  char stf_config_file_name[PARSER_MAX_LINE_SIZE];
+  char stf_base_name[PARSER_MAX_LINE_SIZE];
+  int stf_output_count;
 
   /* Statistics information */
   double a_first_statistics;
@@ -329,6 +331,10 @@ struct engine {
    * of the various task arrays. */
   size_t tasks_per_cell;
 
+  /* Average number of links per tasks. This number is used before
+     the creation of communication tasks so needs to be large enough. */
+  size_t links_per_tasks;
+
   /* Are we talkative ? */
   int verbose;
 
@@ -405,6 +411,7 @@ void engine_unskip(struct engine *e);
 void engine_drift_all(struct engine *e, const int drift_mpoles);
 void engine_drift_top_multipoles(struct engine *e);
 void engine_reconstruct_multipoles(struct engine *e);
+void engine_allocate_foreign_particles(struct engine *e);
 void engine_print_stats(struct engine *e);
 void engine_check_for_dumps(struct engine *e);
 void engine_dump_snapshot(struct engine *e);
@@ -447,7 +454,7 @@ int engine_is_done(struct engine *e);
 void engine_pin(void);
 void engine_unpin(void);
 void engine_clean(struct engine *e);
-int engine_estimate_nr_tasks(struct engine *e);
+int engine_estimate_nr_tasks(const struct engine *e);
 
 /* Function prototypes, engine_maketasks.c. */
 void engine_maketasks(struct engine *e);
diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c
index 8e23b4932d8b519b4292c76b4ab3eca5b321b5b8..2175595cd149d9d5da5a3aa5f5341ff181fd58d6 100644
--- a/src/engine_maketasks.c
+++ b/src/engine_maketasks.c
@@ -210,9 +210,13 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci,
  * @param ci The sending #cell.
  * @param cj Dummy cell containing the nodeID of the receiving node.
  * @param t_ti The send_ti #task, if it has already been created.
+ * @param t_limiter The send_limiter #task, if already created.
+ * @param with_limiter Are we running with the time-step limiter?
  */
 void engine_addtasks_send_timestep(struct engine *e, struct cell *ci,
-                                   struct cell *cj, struct task *t_ti) {
+                                   struct cell *cj, struct task *t_ti,
+                                   struct task *t_limiter,
+                                   const int with_limiter) {
 
 #ifdef WITH_MPI
   struct link *l = NULL;
@@ -244,19 +248,31 @@ void engine_addtasks_send_timestep(struct engine *e, struct cell *ci,
       t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
                                ci->mpi.tag, 0, ci, cj);
 
+      if (with_limiter)
+        t_limiter = scheduler_addtask(s, task_type_send, task_subtype_limiter,
+                                      ci->mpi.tag, 0, ci, cj);
+
       /* The super-cell's timestep task should unlock the send_ti task. */
       scheduler_addunlock(s, ci->super->timestep, t_ti);
+      if (with_limiter) scheduler_addunlock(s, t_limiter, ci->super->timestep);
+      if (with_limiter)
+        scheduler_addunlock(s, t_limiter, ci->super->timestep_limiter);
+      if (with_limiter) scheduler_addunlock(s, ci->super->kick2, t_limiter);
+      if (with_limiter)
+        scheduler_addunlock(s, ci->super->timestep_limiter, t_ti);
     }
 
     /* Add them to the local cell. */
     engine_addlink(e, &ci->mpi.send_ti, t_ti);
+    if (with_limiter) engine_addlink(e, &ci->mpi.limiter.send, t_limiter);
   }
 
   /* Recurse? */
   if (ci->split)
     for (int k = 0; k < 8; k++)
       if (ci->progeny[k] != NULL)
-        engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti);
+        engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti, t_limiter,
+                                      with_limiter);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -380,9 +396,12 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c,
  * @param e The #engine.
  * @param c The foreign #cell.
  * @param t_ti The recv_ti #task, if already been created.
+ * @param t_limiter The recv_limiter #task, if already created.
+ * @param with_limiter Are we running with the time-step limiter?
  */
 void engine_addtasks_recv_timestep(struct engine *e, struct cell *c,
-                                   struct task *t_ti) {
+                                   struct task *t_ti, struct task *t_limiter,
+                                   const int with_limiter) {
 
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
@@ -397,21 +416,42 @@ void engine_addtasks_recv_timestep(struct engine *e, struct cell *c,
 
     t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, c->mpi.tag,
                              0, c, NULL);
+
+    if (with_limiter)
+      t_limiter = scheduler_addtask(s, task_type_recv, task_subtype_limiter,
+                                    c->mpi.tag, 0, c, NULL);
   }
 
   c->mpi.recv_ti = t_ti;
 
-  for (struct link *l = c->grav.grav; l != NULL; l = l->next)
+  for (struct link *l = c->grav.grav; l != NULL; l = l->next) {
     scheduler_addunlock(s, l->t, t_ti);
+  }
 
-  for (struct link *l = c->hydro.force; l != NULL; l = l->next)
-    scheduler_addunlock(s, l->t, t_ti);
+  if (with_limiter) {
+
+    for (struct link *l = c->hydro.force; l != NULL; l = l->next) {
+      scheduler_addunlock(s, l->t, t_limiter);
+    }
+
+    for (struct link *l = c->hydro.limiter; l != NULL; l = l->next) {
+      scheduler_addunlock(s, t_limiter, l->t);
+      scheduler_addunlock(s, l->t, t_ti);
+    }
+
+  } else {
+
+    for (struct link *l = c->hydro.force; l != NULL; l = l->next) {
+      scheduler_addunlock(s, l->t, t_ti);
+    }
+  }
 
   /* Recurse? */
   if (c->split)
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
-        engine_addtasks_recv_timestep(e, c->progeny[k], t_ti);
+        engine_addtasks_recv_timestep(e, c->progeny[k], t_ti, t_limiter,
+                                      with_limiter);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -435,6 +475,7 @@ void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) {
   struct scheduler *s = &e->sched;
   const int is_with_cooling = (e->policy & engine_policy_cooling);
   const int is_with_star_formation = (e->policy & engine_policy_star_formation);
+  const int with_limiter = (e->policy & engine_policy_limiter);
 
   /* Are we in a super-cell ? */
   if (c->super == c) {
@@ -489,6 +530,16 @@ void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) {
 
       scheduler_addunlock(s, c->timestep, c->kick1);
 
+      /* Time-step limiting */
+      if (with_limiter) {
+        c->timestep_limiter = scheduler_addtask(
+            s, task_type_timestep_limiter, task_subtype_none, 0, 0, c, NULL);
+
+        /* Make sure it is not run before kick2 */
+        scheduler_addunlock(s, c->timestep, c->timestep_limiter);
+        scheduler_addunlock(s, c->timestep_limiter, c->kick1);
+      }
+
 #if defined(WITH_LOGGER)
       scheduler_addunlock(s, c->kick1, c->logger);
 #endif
@@ -1281,7 +1332,8 @@ void engine_link_gravity_tasks(struct engine *e) {
  */
 static inline void engine_make_hydro_loops_dependencies(
     struct scheduler *sched, struct task *density, struct task *gradient,
-    struct task *force, struct cell *c, int with_cooling) {
+    struct task *force, struct task *limiter, struct cell *c, int with_cooling,
+    int with_limiter) {
 
   /* density loop --> ghost --> gradient loop --> extra_ghost */
   /* extra_ghost --> force loop  */
@@ -1299,14 +1351,15 @@ static inline void engine_make_hydro_loops_dependencies(
  * @param sched The #scheduler.
  * @param density The density task to link.
  * @param force The force task to link.
+ * @param limiter The limiter task to link.
  * @param c The cell.
- * @param with_cooling Are we running with cooling switched on ?
+ * @param with_cooling Are we running with cooling switched on?
+ * @param with_limiter Are we running with limiter switched on?
  */
-static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
-                                                        struct task *density,
-                                                        struct task *force,
-                                                        struct cell *c,
-                                                        int with_cooling) {
+static inline void engine_make_hydro_loops_dependencies(
+    struct scheduler *sched, struct task *density, struct task *force,
+    struct task *limiter, struct cell *c, int with_cooling, int with_limiter) {
+
   /* density loop --> ghost --> force loop */
   scheduler_addunlock(sched, density, c->hydro.super->hydro.ghost_in);
   scheduler_addunlock(sched, c->hydro.super->hydro.ghost_out, force);
@@ -1347,6 +1400,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int with_cooling = (e->policy & engine_policy_cooling);
+  const int with_limiter = (e->policy & engine_policy_limiter);
+#ifdef EXTRA_HYDRO_LOOP
+  struct task *t_gradient = NULL;
+#endif
+  struct task *t_force = NULL;
+  struct task *t_limiter = NULL;
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct task *t = &((struct task *)map_data)[ind];
@@ -1364,31 +1423,53 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 
 #ifdef EXTRA_HYDRO_LOOP
       /* Start by constructing the task for the second  and third hydro loop. */
-      struct task *t2 = scheduler_addtask(
-          sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL);
-      struct task *t3 = scheduler_addtask(
-          sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL);
+      t_gradient = scheduler_addtask(sched, task_type_self,
+                                     task_subtype_gradient, 0, 0, t->ci, NULL);
+      t_force = scheduler_addtask(sched, task_type_self, task_subtype_force, 0,
+                                  0, t->ci, NULL);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter = scheduler_addtask(sched, task_type_self,
+                                      task_subtype_limiter, 0, 0, t->ci, NULL);
 
       /* Add the link between the new loops and the cell */
-      engine_addlink(e, &t->ci->hydro.gradient, t2);
-      engine_addlink(e, &t->ci->hydro.force, t3);
+      engine_addlink(e, &t->ci->hydro.gradient, t_gradient);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro */
-      engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
-                                           with_cooling);
-      scheduler_addunlock(sched, t3, t->ci->super->end_force);
+      engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force,
+                                           t_limiter, t->ci, with_cooling,
+                                           with_limiter);
+      scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+      if (with_limiter)
+        scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+      if (with_limiter)
+        scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
 #else
 
       /* Start by constructing the task for the second hydro loop */
-      struct task *t2 = scheduler_addtask(
-          sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL);
+      t_force = scheduler_addtask(sched, task_type_self, task_subtype_force, 0,
+                                  0, t->ci, NULL);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter = scheduler_addtask(sched, task_type_self,
+                                      task_subtype_limiter, 0, 0, t->ci, NULL);
 
       /* Add the link between the new loop and the cell */
-      engine_addlink(e, &t->ci->hydro.force, t2);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro */
-      engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
-      scheduler_addunlock(sched, t2, t->ci->super->end_force);
+      engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter, t->ci,
+                                           with_cooling, with_limiter);
+      scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+      if (with_limiter)
+        scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+      if (with_limiter)
+        scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
 #endif
     }
 
@@ -1407,54 +1488,103 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 
 #ifdef EXTRA_HYDRO_LOOP
       /* Start by constructing the task for the second and third hydro loop */
-      struct task *t2 = scheduler_addtask(
-          sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj);
-      struct task *t3 = scheduler_addtask(
-          sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj);
+      t_gradient = scheduler_addtask(sched, task_type_pair,
+                                     task_subtype_gradient, 0, 0, t->ci, t->cj);
+      t_force = scheduler_addtask(sched, task_type_pair, task_subtype_force, 0,
+                                  0, t->ci, t->cj);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter = scheduler_addtask(sched, task_type_pair,
+                                      task_subtype_limiter, 0, 0, t->ci, t->cj);
 
       /* Add the link between the new loop and both cells */
-      engine_addlink(e, &t->ci->hydro.gradient, t2);
-      engine_addlink(e, &t->cj->hydro.gradient, t2);
-      engine_addlink(e, &t->ci->hydro.force, t3);
-      engine_addlink(e, &t->cj->hydro.force, t3);
+      engine_addlink(e, &t->ci->hydro.gradient, t_gradient);
+      engine_addlink(e, &t->cj->hydro.gradient, t_gradient);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      engine_addlink(e, &t->cj->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
+      if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
-                                             with_cooling);
-        scheduler_addunlock(sched, t3, t->ci->super->end_force);
+        engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force,
+                                             t_limiter, t->ci, with_cooling,
+                                             with_limiter);
+        scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+        if (with_limiter)
+          scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter);
       }
       if (t->cj->nodeID == nodeID) {
-        if (t->ci->hydro.super != t->cj->hydro.super)
-          engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj,
-                                               with_cooling);
-        if (t->ci->super != t->cj->super)
-          scheduler_addunlock(sched, t3, t->cj->super->end_force);
+        if (t->ci->hydro.super != t->cj->hydro.super) {
+          engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force,
+                                               t_limiter, t->cj, with_cooling,
+                                               with_limiter);
+        }
+
+        if (t->ci->super != t->cj->super) {
+          scheduler_addunlock(sched, t_force, t->cj->super->end_force);
+          if (with_limiter)
+            scheduler_addunlock(sched, t->cj->super->kick2, t_limiter);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter, t->cj->super->timestep);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter,
+                                t->cj->super->timestep_limiter);
+        }
       }
 
 #else
 
       /* Start by constructing the task for the second hydro loop */
-      struct task *t2 = scheduler_addtask(
-          sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj);
+      t_force = scheduler_addtask(sched, task_type_pair, task_subtype_force, 0,
+                                  0, t->ci, t->cj);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter = scheduler_addtask(sched, task_type_pair,
+                                      task_subtype_limiter, 0, 0, t->ci, t->cj);
 
       /* Add the link between the new loop and both cells */
-      engine_addlink(e, &t->ci->hydro.force, t2);
-      engine_addlink(e, &t->cj->hydro.force, t2);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      engine_addlink(e, &t->cj->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
+      if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
-        scheduler_addunlock(sched, t2, t->ci->super->end_force);
+        engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter,
+                                             t->ci, with_cooling, with_limiter);
+        scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+        if (with_limiter)
+          scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter);
       }
       if (t->cj->nodeID == nodeID) {
-        if (t->ci->hydro.super != t->cj->hydro.super)
-          engine_make_hydro_loops_dependencies(sched, t, t2, t->cj,
-                                               with_cooling);
-        if (t->ci->super != t->cj->super)
-          scheduler_addunlock(sched, t2, t->cj->super->end_force);
+        if (t->ci->hydro.super != t->cj->hydro.super) {
+          engine_make_hydro_loops_dependencies(
+              sched, t, t_force, t_limiter, t->cj, with_cooling, with_limiter);
+        }
+
+        if (t->ci->super != t->cj->super) {
+          scheduler_addunlock(sched, t_force, t->cj->super->end_force);
+          if (with_limiter)
+            scheduler_addunlock(sched, t->cj->super->kick2, t_limiter);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter, t->cj->super->timestep);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter,
+                                t->cj->super->timestep_limiter);
+        }
       }
 
 #endif
@@ -1472,39 +1602,65 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 #ifdef EXTRA_HYDRO_LOOP
 
       /* Start by constructing the task for the second and third hydro loop */
-      struct task *t2 =
+      t_gradient =
           scheduler_addtask(sched, task_type_sub_self, task_subtype_gradient,
-                            t->flags, 0, t->ci, t->cj);
-      struct task *t3 =
-          scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
-                            t->flags, 0, t->ci, t->cj);
+                            t->flags, 0, t->ci, NULL);
+      t_force = scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
+                                  t->flags, 0, t->ci, NULL);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter =
+            scheduler_addtask(sched, task_type_sub_self, task_subtype_limiter,
+                              t->flags, 0, t->ci, NULL);
 
       /* Add the link between the new loop and the cell */
-      engine_addlink(e, &t->ci->hydro.gradient, t2);
-      engine_addlink(e, &t->ci->hydro.force, t3);
+      engine_addlink(e, &t->ci->hydro.gradient, t_gradient);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
-                                             with_cooling);
-        scheduler_addunlock(sched, t3, t->ci->super->end_force);
+        engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force,
+                                             t_limiter, t->ci, with_cooling,
+                                             with_limiter);
+        scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+        if (with_limiter)
+          scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter);
       }
 
 #else
       /* Start by constructing the task for the second hydro loop */
-      struct task *t2 =
-          scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
-                            t->flags, 0, t->ci, t->cj);
+      t_force = scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
+                                  t->flags, 0, t->ci, NULL);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter =
+            scheduler_addtask(sched, task_type_sub_self, task_subtype_limiter,
+                              t->flags, 0, t->ci, NULL);
 
       /* Add the link between the new loop and the cell */
-      engine_addlink(e, &t->ci->hydro.force, t2);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
-        scheduler_addunlock(sched, t2, t->ci->super->end_force);
+        engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter,
+                                             t->ci, with_cooling, with_limiter);
+        scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+        if (with_limiter)
+          scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter);
       }
 #endif
     }
@@ -1526,56 +1682,106 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 #ifdef EXTRA_HYDRO_LOOP
 
       /* Start by constructing the task for the second and third hydro loop */
-      struct task *t2 =
+      t_gradient =
           scheduler_addtask(sched, task_type_sub_pair, task_subtype_gradient,
                             t->flags, 0, t->ci, t->cj);
-      struct task *t3 =
-          scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
-                            t->flags, 0, t->ci, t->cj);
+      t_force = scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
+                                  t->flags, 0, t->ci, t->cj);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter =
+            scheduler_addtask(sched, task_type_sub_pair, task_subtype_limiter,
+                              t->flags, 0, t->ci, t->cj);
 
       /* Add the link between the new loop and both cells */
-      engine_addlink(e, &t->ci->hydro.gradient, t2);
-      engine_addlink(e, &t->cj->hydro.gradient, t2);
-      engine_addlink(e, &t->ci->hydro.force, t3);
-      engine_addlink(e, &t->cj->hydro.force, t3);
+      engine_addlink(e, &t->ci->hydro.gradient, t_gradient);
+      engine_addlink(e, &t->cj->hydro.gradient, t_gradient);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      engine_addlink(e, &t->cj->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
+      if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
-                                             with_cooling);
-        scheduler_addunlock(sched, t3, t->ci->super->end_force);
+        engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force,
+                                             t_limiter, t->ci, with_cooling,
+                                             with_limiter);
+        scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+        if (with_limiter)
+          scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter);
       }
       if (t->cj->nodeID == nodeID) {
-        if (t->ci->hydro.super != t->cj->hydro.super)
-          engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj,
-                                               with_cooling);
-        if (t->ci->super != t->cj->super)
-          scheduler_addunlock(sched, t3, t->cj->super->end_force);
+        if (t->ci->hydro.super != t->cj->hydro.super) {
+          engine_make_hydro_loops_dependencies(sched, t, t_gradient, t_force,
+                                               t_limiter, t->cj, with_cooling,
+                                               with_limiter);
+        }
+
+        if (t->ci->super != t->cj->super) {
+          scheduler_addunlock(sched, t_force, t->cj->super->end_force);
+          if (with_limiter)
+            scheduler_addunlock(sched, t->cj->super->kick2, t_limiter);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter, t->cj->super->timestep);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter,
+                                t->cj->super->timestep_limiter);
+        }
       }
 
 #else
       /* Start by constructing the task for the second hydro loop */
-      struct task *t2 =
-          scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
-                            t->flags, 0, t->ci, t->cj);
+      t_force = scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
+                                  t->flags, 0, t->ci, t->cj);
+
+      /* and the task for the time-step limiter */
+      if (with_limiter)
+        t_limiter =
+            scheduler_addtask(sched, task_type_sub_pair, task_subtype_limiter,
+                              t->flags, 0, t->ci, t->cj);
 
       /* Add the link between the new loop and both cells */
-      engine_addlink(e, &t->ci->hydro.force, t2);
-      engine_addlink(e, &t->cj->hydro.force, t2);
+      engine_addlink(e, &t->ci->hydro.force, t_force);
+      engine_addlink(e, &t->cj->hydro.force, t_force);
+      if (with_limiter) engine_addlink(e, &t->ci->hydro.limiter, t_limiter);
+      if (with_limiter) engine_addlink(e, &t->cj->hydro.limiter, t_limiter);
 
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
-        scheduler_addunlock(sched, t2, t->ci->super->end_force);
+        engine_make_hydro_loops_dependencies(sched, t, t_force, t_limiter,
+                                             t->ci, with_cooling, with_limiter);
+
+        scheduler_addunlock(sched, t_force, t->ci->super->end_force);
+        if (with_limiter)
+          scheduler_addunlock(sched, t->ci->super->kick2, t_limiter);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep);
+        if (with_limiter)
+          scheduler_addunlock(sched, t_limiter, t->ci->super->timestep_limiter);
       }
       if (t->cj->nodeID == nodeID) {
-        if (t->ci->hydro.super != t->cj->hydro.super)
-          engine_make_hydro_loops_dependencies(sched, t, t2, t->cj,
-                                               with_cooling);
-        if (t->ci->super != t->cj->super)
-          scheduler_addunlock(sched, t2, t->cj->super->end_force);
+        if (t->ci->hydro.super != t->cj->hydro.super) {
+          engine_make_hydro_loops_dependencies(
+              sched, t, t_force, t_limiter, t->cj, with_cooling, with_limiter);
+        }
+
+        if (t->ci->super != t->cj->super) {
+          scheduler_addunlock(sched, t_force, t->cj->super->end_force);
+          if (with_limiter)
+            scheduler_addunlock(sched, t->cj->super->kick2, t_limiter);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter, t->cj->super->timestep);
+          if (with_limiter)
+            scheduler_addunlock(sched, t_limiter,
+                                t->cj->super->timestep_limiter);
+        }
       }
 #endif
     }
@@ -1961,6 +2167,7 @@ struct cell_type_pair {
 void engine_addtasks_send_mapper(void *map_data, int num_elements,
                                  void *extra_data) {
   struct engine *e = (struct engine *)extra_data;
+  const int with_limiter = (e->policy & engine_policy_limiter);
   struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data;
 
   for (int k = 0; k < num_elements; k++) {
@@ -1969,7 +2176,7 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements,
     const int type = cell_type_pairs[k].type;
 
     /* Add the send task for the particle timesteps. */
-    engine_addtasks_send_timestep(e, ci, cj, NULL);
+    engine_addtasks_send_timestep(e, ci, cj, NULL, NULL, with_limiter);
 
     /* Add the send tasks for the cells in the proxy that have a hydro
      * connection. */
@@ -1988,6 +2195,7 @@ void engine_addtasks_send_mapper(void *map_data, int num_elements,
 void engine_addtasks_recv_mapper(void *map_data, int num_elements,
                                  void *extra_data) {
   struct engine *e = (struct engine *)extra_data;
+  const int with_limiter = (e->policy & engine_policy_limiter);
   struct cell_type_pair *cell_type_pairs = (struct cell_type_pair *)map_data;
 
   for (int k = 0; k < num_elements; k++) {
@@ -1995,7 +2203,7 @@ void engine_addtasks_recv_mapper(void *map_data, int num_elements,
     const int type = cell_type_pairs[k].type;
 
     /* Add the recv task for the particle timesteps. */
-    engine_addtasks_recv_timestep(e, ci, NULL);
+    engine_addtasks_recv_timestep(e, ci, NULL, NULL, with_limiter);
 
     /* Add the recv tasks for the cells in the proxy that have a hydro
      * connection. */
@@ -2068,39 +2276,6 @@ void engine_maketasks(struct engine *e) {
   if (e->sched.nr_tasks == 0 && (s->nr_gparts > 0 || s->nr_parts > 0))
     error("We have particles but no hydro or gravity tasks were created.");
 
-  /* Free the old list of cell-task links. */
-  if (e->links != NULL) free(e->links);
-  e->size_links = 0;
-
-/* The maximum number of links is the
- * number of cells (s->tot_cells) times the number of neighbours (26) times
- * the number of interaction types, so 26 * 2 (density, force) pairs
- * and 2 (density, force) self.
- */
-#ifdef EXTRA_HYDRO_LOOP
-  const size_t hydro_tasks_per_cell = 27 * 3;
-#else
-  const size_t hydro_tasks_per_cell = 27 * 2;
-#endif
-  const size_t self_grav_tasks_per_cell = 125;
-  const size_t ext_grav_tasks_per_cell = 1;
-  const size_t stars_tasks_per_cell = 27;
-
-  if (e->policy & engine_policy_hydro)
-    e->size_links += s->tot_cells * hydro_tasks_per_cell;
-  if (e->policy & engine_policy_external_gravity)
-    e->size_links += s->tot_cells * ext_grav_tasks_per_cell;
-  if (e->policy & engine_policy_self_gravity)
-    e->size_links += s->tot_cells * self_grav_tasks_per_cell;
-  if (e->policy & engine_policy_stars)
-    e->size_links += s->tot_cells * stars_tasks_per_cell;
-
-  /* Allocate the new link list */
-  if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) ==
-      NULL)
-    error("Failed to allocate cell-task links.");
-  e->nr_links = 0;
-
   tic2 = getticks();
 
   /* Split the tasks. */
@@ -2118,6 +2293,20 @@ void engine_maketasks(struct engine *e) {
   }
 #endif
 
+  /* Free the old list of cell-task links. */
+  if (e->links != NULL) free(e->links);
+  e->size_links = e->sched.nr_tasks * e->links_per_tasks;
+
+  /* Make sure that we have space for more links than last time. */
+  if (e->size_links < e->nr_links * engine_rebuild_link_alloc_margin)
+    e->size_links = e->nr_links * engine_rebuild_link_alloc_margin;
+
+  /* Allocate the new link list */
+  if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) ==
+      NULL)
+    error("Failed to allocate cell-task links.");
+  e->nr_links = 0;
+
   tic2 = getticks();
 
   /* Count the number of tasks associated with each cell and
@@ -2272,8 +2461,27 @@ void engine_maketasks(struct engine *e) {
       message("Creating recv tasks took %.3f %s.",
               clocks_from_ticks(getticks() - tic2), clocks_getunit());
   }
+
+  /* Allocate memory for foreign particles */
+  engine_allocate_foreign_particles(e);
+
 #endif
 
+  /* Report the number of tasks we actually used */
+  if (e->verbose)
+    message(
+        "Nr. of tasks: %d allocated tasks: %d ratio: %f memory use: %zd MB.",
+        e->sched.nr_tasks, e->sched.size,
+        (float)e->sched.nr_tasks / (float)e->sched.size,
+        e->sched.size * sizeof(struct task) / (1024 * 1024));
+
+  /* Report the number of links we actually used */
+  if (e->verbose)
+    message(
+        "Nr. of links: %zd allocated links: %zd ratio: %f memory use: %zd MB.",
+        e->nr_links, e->size_links, (float)e->nr_links / (float)e->size_links,
+        e->size_links * sizeof(struct link) / (1024 * 1024));
+
   tic2 = getticks();
 
   /* Set the unlocks per task. */
diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c
index 9c7a783c2547899816842cf9a05163e75d329aa8..3a26dbb2f47f9503aa0b93fa28d679f5eebaeede 100644
--- a/src/engine_marktasks.c
+++ b/src/engine_marktasks.c
@@ -69,6 +69,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
   struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]);
   struct engine *e = (struct engine *)((size_t *)extra_data)[0];
   const int nodeID = e->nodeID;
+  const int with_limiter = e->policy & engine_policy_limiter;
 
   for (int ind = 0; ind < num_elements; ind++) {
 
@@ -90,6 +91,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (cell_is_active_hydro(ci, e)) {
           scheduler_activate(s, t);
           cell_activate_drift_part(ci, s);
+          if (with_limiter) cell_activate_limiter(ci, s);
         }
       }
 
@@ -99,6 +101,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (cell_is_active_hydro(ci, e)) {
           scheduler_activate(s, t);
           cell_activate_subcell_hydro_tasks(ci, NULL, s);
+          if (with_limiter) cell_activate_limiter(ci, s);
         }
       }
 
@@ -111,6 +114,16 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t);
       }
 
+      else if (t->type == task_type_self &&
+               t->subtype == task_subtype_limiter) {
+        if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t);
+      }
+
+      else if (t->type == task_type_sub_self &&
+               t->subtype == task_subtype_limiter) {
+        if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t);
+      }
+
 #ifdef EXTRA_HYDRO_LOOP
       else if (t_type == task_type_self && t_subtype == task_subtype_gradient) {
         if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t);
@@ -207,6 +220,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       /* Only activate tasks that involve a local active cell. */
       if ((t_subtype == task_subtype_density ||
            t_subtype == task_subtype_gradient ||
+           t_subtype == task_subtype_limiter ||
            t_subtype == task_subtype_force) &&
           ((ci_active_hydro && ci_nodeID == nodeID) ||
            (cj_active_hydro && cj_nodeID == nodeID))) {
@@ -226,6 +240,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s);
           if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s);
 
+          /* And the limiter */
+          if (ci_nodeID == nodeID && with_limiter) cell_activate_limiter(ci, s);
+          if (cj_nodeID == nodeID && with_limiter) cell_activate_limiter(cj, s);
+
           /* Check the sorts and activate them if needed. */
           cell_activate_hydro_sorts(ci, t->flags, s);
           cell_activate_hydro_sorts(cj, t->flags, s);
diff --git a/src/gravity_properties.c b/src/gravity_properties.c
index fffbf22ec187f179f0e80b7121beaa3a96de0260..e548e3010f3b46065a2510723b5bde97121b4c02 100644
--- a/src/gravity_properties.c
+++ b/src/gravity_properties.c
@@ -170,20 +170,22 @@ void gravity_props_print_snapshot(hid_t h_grpgrav,
   io_write_attribute_s(h_grpgrav, "Softening style",
                        kernel_gravity_softening_name);
   io_write_attribute_f(
-      h_grpgrav, "Comoving softening length",
+      h_grpgrav, "Comoving softening length [internal units]",
       p->epsilon_comoving * kernel_gravity_softening_plummer_equivalent);
-  io_write_attribute_f(h_grpgrav,
-                       "Comoving Softening length (Plummer equivalent)",
-                       p->epsilon_comoving);
   io_write_attribute_f(
-      h_grpgrav, "Maximal physical softening length",
+      h_grpgrav,
+      "Comoving Softening length (Plummer equivalent)  [internal units]",
+      p->epsilon_comoving);
+  io_write_attribute_f(
+      h_grpgrav, "Maximal physical softening length  [internal units]",
       p->epsilon_max_physical * kernel_gravity_softening_plummer_equivalent);
   io_write_attribute_f(h_grpgrav,
-                       "Maximal physical softening length (Plummer equivalent)",
+                       "Maximal physical softening length (Plummer equivalent) "
+                       " [internal units]",
                        p->epsilon_max_physical);
   io_write_attribute_f(h_grpgrav, "Opening angle", p->theta_crit);
   io_write_attribute_s(h_grpgrav, "Scheme", GRAVITY_IMPLEMENTATION);
-  io_write_attribute_d(h_grpgrav, "MM order", SELF_GRAVITY_MULTIPOLE_ORDER);
+  io_write_attribute_i(h_grpgrav, "MM order", SELF_GRAVITY_MULTIPOLE_ORDER);
   io_write_attribute_f(h_grpgrav, "Mesh a_smooth", p->a_smooth);
   io_write_attribute_f(h_grpgrav, "Mesh r_cut_max ratio", p->r_cut_max_ratio);
   io_write_attribute_f(h_grpgrav, "Mesh r_cut_min ratio", p->r_cut_min_ratio);
diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h
index b4dc25495ab5be3c2e9c5ba0153e748a344f050f..2b1d19bc916889a5cfdc40b1357f1e3dfe9388af 100644
--- a/src/hydro/Default/hydro.h
+++ b/src/hydro/Default/hydro.h
@@ -645,6 +645,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
   xp->v_full[2] = p->v[2];
diff --git a/src/hydro/Default/hydro_debug.h b/src/hydro/Default/hydro_debug.h
index 3be9c9e1760591423edbd218d19b46ddf9aad01e..68367beaee97c285057cb055c1fbdbba5c370085 100644
--- a/src/hydro/Default/hydro_debug.h
+++ b/src/hydro/Default/hydro_debug.h
@@ -25,10 +25,11 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "x=[%.3e,%.3e,%.3e], "
       "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "h=%.3e, "
-      "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, time_bin=%d\n",
+      "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, time_bin=%d wakeup=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
-      p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->time_bin);
+      p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->time_bin,
+      p->wakeup);
 }
 
 #endif /* SWIFT_DEFAULT_HYDRO_DEBUG_H */
diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h
index 72808874c3fc6b58005d0e3ad450eafea8aa4b4d..85c586a4e921e38296453b71a2a2b9637971c28c 100644
--- a/src/hydro/Default/hydro_iact.h
+++ b/src/hydro/Default/hydro_iact.h
@@ -378,4 +378,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_DEFAULT_HYDRO_IACT_H */
diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h
index 2a18e03cb533ca860f227a31152ef2058e0dd37d..7230826dc3c7c2a3486001ca9060dd07d55d0931 100644
--- a/src/hydro/Default/hydro_part.h
+++ b/src/hydro/Default/hydro_part.h
@@ -21,6 +21,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /* Extra particle data not needed during the SPH loops over neighbours. */
 struct xpart {
@@ -40,6 +41,9 @@ struct xpart {
   /* Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
   float u_full;
 
   /* Old density. */
@@ -132,6 +136,9 @@ struct part {
   /* Particle time-bin */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index 8cad1c62ad669b9c0dc5bbe333985c1e20b882af..4a1eaf729bd5de96b43ef4b749d40038fcf39406 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -765,6 +765,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
   xp->v_full[2] = p->v[2];
diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h
index d0642a03a4c4eecb2da80fdae473948e460c5e31..aeb43ee5d68930debfa867dc856465ac9d22902a 100644
--- a/src/hydro/Gadget2/hydro_debug.h
+++ b/src/hydro/Gadget2/hydro_debug.h
@@ -27,14 +27,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, "
       "P=%.3e, P_over_rho2=%.3e, S=%.3e, dS/dt=%.3e, c=%.3e\n"
       "divV=%.3e, rotV=[%.3e,%.3e,%.3e], balsara=%.3e \n "
-      "v_sig=%e dh/dt=%.3e time_bin=%d\n",
+      "v_sig=%e dh/dt=%.3e time_bin=%d wakeup=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh,
       p->rho, hydro_get_comoving_pressure(p), p->force.P_over_rho2, p->entropy,
       p->entropy_dt, p->force.soundspeed, p->density.div_v, p->density.rot_v[0],
       p->density.rot_v[1], p->density.rot_v[2], p->force.balsara,
-      p->force.v_sig, p->force.h_dt, p->time_bin);
+      p->force.v_sig, p->force.h_dt, p->time_bin, p->wakeup);
 }
 
 #endif /* SWIFT_GADGET2_HYDRO_DEBUG_H */
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index a3c5e21dbdf8df60b25b01c0326c33c3a10d1bce..1ded85acfb7486b1286ddfbbfa698da0f4344e7d 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -293,7 +293,7 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz,
                                  vector *wcountSum, vector *wcount_dhSum,
                                  vector *div_vSum, vector *curlvxSum,
                                  vector *curlvySum, vector *curlvzSum,
-                                 mask_t mask, mask_t mask2, short mask_cond) {
+                                 mask_t mask, mask_t mask2, int mask_cond) {
 
   vector r, ri, ui, wi, wi_dx;
   vector dvx, dvy, dvz;
@@ -1051,4 +1051,34 @@ runner_iact_nonsym_2_vec_force(
 
 #endif
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+
+    // MATTHIEU
+    // if (pj->wakeup == time_bin_not_awake)
+    // pj->wakeup = time_bin_awake;
+    // else if (pj->wakeup > 0)
+    // pj->wakeup = -pj->wakeup;
+  }
+}
+
 #endif /* SWIFT_GADGET2_HYDRO_IACT_H */
diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h
index 369eff881d5464a3ececca60cfedf077411e6d3d..28dbb2d0f7bb5e79e96a1a3f7e06fdb4086c6c5e 100644
--- a/src/hydro/Gadget2/hydro_part.h
+++ b/src/hydro/Gadget2/hydro_part.h
@@ -154,6 +154,9 @@ struct part {
   /* Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/GizmoMFM/hydro.h b/src/hydro/GizmoMFM/hydro.h
index b00a3578d02f492050c328af49a6108d566e9204..1ab1c1404f54450ddff8d95b51fdf3970daf7377 100644
--- a/src/hydro/GizmoMFM/hydro.h
+++ b/src/hydro/GizmoMFM/hydro.h
@@ -137,6 +137,9 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
                                  p->conserved.momentum[2] * p->v[2]);
 #endif
 
+  p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
+
   /* initialize the particle velocity based on the primitive fluid velocity */
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
diff --git a/src/hydro/GizmoMFM/hydro_debug.h b/src/hydro/GizmoMFM/hydro_debug.h
index e8b0914bd3cf6a99210399c6fc654e526319009f..e3c9f793aec92c7bfa2527143e6ad771c3897a09 100644
--- a/src/hydro/GizmoMFM/hydro_debug.h
+++ b/src/hydro/GizmoMFM/hydro_debug.h
@@ -27,6 +27,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "a=[%.3e,%.3e,%.3e], "
       "h=%.3e, "
       "time_bin=%d, "
+      "wakeup=%d, "
       "rho=%.3e, "
       "P=%.3e, "
       "gradients={"
@@ -51,7 +52,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "wcount_dh=%.3e, "
       "wcount=%.3e}\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0],
-      p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->rho, p->P,
+      p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->wakeup, p->rho, p->P,
       p->gradients.rho[0], p->gradients.rho[1], p->gradients.rho[2],
       p->gradients.v[0][0], p->gradients.v[0][1], p->gradients.v[0][2],
       p->gradients.v[1][0], p->gradients.v[1][1], p->gradients.v[1][2],
diff --git a/src/hydro/GizmoMFM/hydro_iact.h b/src/hydro/GizmoMFM/hydro_iact.h
index 38a97cbea39c1ed5c6926c911941e655e52362aa..09d4c7c70ee2bae8a31d10cb4a568c4627c7b3cd 100644
--- a/src/hydro/GizmoMFM/hydro_iact.h
+++ b/src/hydro/GizmoMFM/hydro_iact.h
@@ -486,4 +486,29 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0, a, H);
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->timestepvars.vmax >
+      const_limiter_max_v_sig_ratio * pj->timestepvars.vmax) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_GIZMO_MFM_HYDRO_IACT_H */
diff --git a/src/hydro/GizmoMFM/hydro_part.h b/src/hydro/GizmoMFM/hydro_part.h
index 0055d7d86a35746a8ba90015b3a6986f8ddb5f9f..a05cae18aaf18feb80f7a4ec383434eadece8a41 100644
--- a/src/hydro/GizmoMFM/hydro_part.h
+++ b/src/hydro/GizmoMFM/hydro_part.h
@@ -21,6 +21,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /* Extra particle data not needed during the computation. */
 struct xpart {
@@ -40,6 +41,9 @@ struct xpart {
   /* Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
@@ -187,6 +191,9 @@ struct part {
   /* Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/GizmoMFV/hydro.h b/src/hydro/GizmoMFV/hydro.h
index 284b67b3b62cd7c6b75de192b299c0c48d170a05..f4e2b829769a58a4896516907317d02c936f2d65 100644
--- a/src/hydro/GizmoMFV/hydro.h
+++ b/src/hydro/GizmoMFV/hydro.h
@@ -121,6 +121,9 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
 
   const float mass = p->conserved.mass;
 
+  p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
+
   p->primitives.v[0] = p->v[0];
   p->primitives.v[1] = p->v[1];
   p->primitives.v[2] = p->v[2];
diff --git a/src/hydro/GizmoMFV/hydro_debug.h b/src/hydro/GizmoMFV/hydro_debug.h
index 8af3f824666529efad833c3bd520ace779718449..181bd6f82d547803c7303bd19be11cf66dc3a8a8 100644
--- a/src/hydro/GizmoMFV/hydro_debug.h
+++ b/src/hydro/GizmoMFV/hydro_debug.h
@@ -27,6 +27,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "a=[%.3e,%.3e,%.3e], "
       "h=%.3e, "
       "time_bin=%d, "
+      "wakeup=%d, "
       "primitives={"
       "v=[%.3e,%.3e,%.3e], "
       "rho=%.3e, "
@@ -53,9 +54,9 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "wcount_dh=%.3e, "
       "wcount=%.3e}\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0],
-      p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->primitives.v[0],
-      p->primitives.v[1], p->primitives.v[2], p->primitives.rho,
-      p->primitives.P, p->primitives.gradients.rho[0],
+      p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->wakeup,
+      p->primitives.v[0], p->primitives.v[1], p->primitives.v[2],
+      p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0],
       p->primitives.gradients.rho[1], p->primitives.gradients.rho[2],
       p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1],
       p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0],
diff --git a/src/hydro/GizmoMFV/hydro_iact.h b/src/hydro/GizmoMFV/hydro_iact.h
index 2f73e67ea2fdcecc527de8b1af0d15731f967b9b..d882549f8c55018419a2e1730d2ac099bbe1f5ee 100644
--- a/src/hydro/GizmoMFV/hydro_iact.h
+++ b/src/hydro/GizmoMFV/hydro_iact.h
@@ -501,4 +501,29 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0, a, H);
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->timestepvars.vmax >
+      const_limiter_max_v_sig_ratio * pj->timestepvars.vmax) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_GIZMO_MFV_HYDRO_IACT_H */
diff --git a/src/hydro/GizmoMFV/hydro_part.h b/src/hydro/GizmoMFV/hydro_part.h
index 6248ddb11daf39a65be9a57fe51e40386ecda50b..8794b597712963e962cc23c796e9769efd4ea620 100644
--- a/src/hydro/GizmoMFV/hydro_part.h
+++ b/src/hydro/GizmoMFV/hydro_part.h
@@ -21,6 +21,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /* Extra particle data not needed during the computation. */
 struct xpart {
@@ -40,6 +41,9 @@ struct xpart {
   /* Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
@@ -198,6 +202,9 @@ struct part {
   /* Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index d638c168f23c95dc3010f838846f4dfc0522bee5..524774435d03a6d808c4535a6c54b68ad16bcb66 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -740,6 +740,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
   xp->v_full[2] = p->v[2];
diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h
index 73ffc26b8acf687a5445591ddccd72ea8e8fa8ae..3fadd05f9b93e53f1855c5daa7727d272ffe0fa5 100644
--- a/src/hydro/Minimal/hydro_debug.h
+++ b/src/hydro/Minimal/hydro_debug.h
@@ -41,12 +41,12 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "v_full=[%.3g, %.3g, %.3g], a=[%.3g, %.3g, %.3g], \n "
       "m=%.3g, u=%.3g, du/dt=%.3g, P=%.3g, c_s=%.3g, \n "
       "v_sig=%.3g, h=%.3g, dh/dt=%.3g, wcount=%.3g, rho=%.3g, \n "
-      "dh_drho=%.3g, time_bin=%d \n",
+      "dh_drho=%.3g, time_bin=%d wakeup=%d \n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->mass, p->u, p->u_dt, hydro_get_comoving_pressure(p),
       p->force.soundspeed, p->force.v_sig, p->h, p->force.h_dt,
-      p->density.wcount, p->rho, p->density.rho_dh, p->time_bin);
+      p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->wakeup);
 }
 
 #endif /* SWIFT_MINIMAL_HYDRO_DEBUG_H */
diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h
index b29f44588c2e13bb5b7c5c9cd5297205557c3fc9..7fc7a3c67f6c832d70109319ad964e25df30ff4e 100644
--- a/src/hydro/Minimal/hydro_iact.h
+++ b/src/hydro/Minimal/hydro_iact.h
@@ -424,4 +424,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_MINIMAL_HYDRO_IACT_H */
diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h
index 1d14a94f2d91bf259df54c875a32bf3072ad33b6..80e472194e6a008859fa7e7fde9c79df6611142b 100644
--- a/src/hydro/Minimal/hydro_part.h
+++ b/src/hydro/Minimal/hydro_part.h
@@ -34,6 +34,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /**
  * @brief Particle fields not needed during the SPH loops over neighbours.
@@ -62,6 +63,9 @@ struct xpart {
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /**
@@ -168,6 +172,9 @@ struct part {
   /*! Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/Planetary/hydro.h b/src/hydro/Planetary/hydro.h
index 957e96dcf391b9027016926a969b28366590664f..ed7aa6b89d2b50ab2e00cedb0b3ef6779689feb1 100644
--- a/src/hydro/Planetary/hydro.h
+++ b/src/hydro/Planetary/hydro.h
@@ -735,6 +735,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
   xp->v_full[2] = p->v[2];
diff --git a/src/hydro/Planetary/hydro_debug.h b/src/hydro/Planetary/hydro_debug.h
index 74261f3b49e2881af1c403013005560efa53a7f1..306f7526404599a051f83dc1b61886ed2aa5b69e 100644
--- a/src/hydro/Planetary/hydro_debug.h
+++ b/src/hydro/Planetary/hydro_debug.h
@@ -42,12 +42,13 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "v_full=[%.3g, %.3g, %.3g], a=[%.3g, %.3g, %.3g], \n "
       "m=%.3g, u=%.3g, du/dt=%.3g, P=%.3g, c_s=%.3g, \n "
       "v_sig=%.3g, h=%.3g, dh/dt=%.3g, wcount=%.3g, rho=%.3g, \n "
-      "dh_drho=%.3g, time_bin=%d, mat_id=%d \n",
+      "dh_drho=%.3g, time_bin=%d, wakeup=%d mat_id=%d \n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->mass, p->u, p->u_dt, hydro_get_comoving_pressure(p),
       p->force.soundspeed, p->force.v_sig, p->h, p->force.h_dt,
-      p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->mat_id);
+      p->density.wcount, p->rho, p->density.rho_dh, p->time_bin, p->wakeup,
+      p->mat_id);
 }
 
 #endif /* SWIFT_PLANETARY_HYDRO_DEBUG_H */
diff --git a/src/hydro/Planetary/hydro_iact.h b/src/hydro/Planetary/hydro_iact.h
index 19ee002b85c1b0bc8ed621a029059cd02c5e670f..afebb6a406bd310f38d51dcb32fc25da6b2674b5 100644
--- a/src/hydro/Planetary/hydro_iact.h
+++ b/src/hydro/Planetary/hydro_iact.h
@@ -346,4 +346,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_PLANETARY_HYDRO_IACT_H */
diff --git a/src/hydro/Planetary/hydro_part.h b/src/hydro/Planetary/hydro_part.h
index 4087cef62e873231a556f82869a7f6d848c8d72c..1955366da7265c4c40922d1e7290bc9128641600 100644
--- a/src/hydro/Planetary/hydro_part.h
+++ b/src/hydro/Planetary/hydro_part.h
@@ -36,6 +36,7 @@
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
 #include "equation_of_state.h"  // For enum material_id
+#include "tracers_struct.h"
 
 /**
  * @brief Particle fields not needed during the SPH loops over neighbours.
@@ -64,6 +65,9 @@ struct xpart {
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /**
@@ -173,6 +177,9 @@ struct part {
   /*! Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/PressureEnergy/hydro.h b/src/hydro/PressureEnergy/hydro.h
index 8dd43cd72968f89cfc818342d618688f2f39cbd3..400a84915b700464b9b86f74400ba578b4efa446 100644
--- a/src/hydro/PressureEnergy/hydro.h
+++ b/src/hydro/PressureEnergy/hydro.h
@@ -763,6 +763,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
   xp->v_full[2] = p->v[2];
@@ -802,4 +803,4 @@ hydro_set_init_internal_energy(struct part *p, float u_init) {
 __attribute__((always_inline)) INLINE static void hydro_remove_part(
     const struct part *p, const struct xpart *xp) {}
 
-#endif /* SWIFT_MINIMAL_HYDRO_H */
+#endif /* SWIFT_PRESSURE_ENERGY_HYDRO_H */
diff --git a/src/hydro/PressureEnergy/hydro_debug.h b/src/hydro/PressureEnergy/hydro_debug.h
index 6324167f12726e155eeaa3359be9741aca3a1e42..7ffc370ed4d6abd273fc3d8d5b887f5ccf8e001c 100644
--- a/src/hydro/PressureEnergy/hydro_debug.h
+++ b/src/hydro/PressureEnergy/hydro_debug.h
@@ -32,12 +32,12 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n"
       "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, \n"
       "p_dh=%.3e, p_bar=%.3e \n"
-      "time_bin=%d\n",
+      "time_bin=%d wakeup=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->u, p->u_dt, p->force.v_sig, hydro_get_comoving_pressure(p), p->h,
       p->force.h_dt, (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho,
-      p->density.pressure_bar_dh, p->pressure_bar, p->time_bin);
+      p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->wakeup);
 }
 
 #endif /* SWIFT_MINIMAL_HYDRO_DEBUG_H */
diff --git a/src/hydro/PressureEnergy/hydro_iact.h b/src/hydro/PressureEnergy/hydro_iact.h
index 4146e61a53dd7ece57e263cb90308e2579aa3930..ae154ea549a52cb24ed7c69453533b7d59b39a85 100644
--- a/src/hydro/PressureEnergy/hydro_iact.h
+++ b/src/hydro/PressureEnergy/hydro_iact.h
@@ -17,8 +17,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
-#ifndef SWIFT_MINIMAL_HYDRO_IACT_H
-#define SWIFT_MINIMAL_HYDRO_IACT_H
+#ifndef SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H
+#define SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H
 
 /**
  * @file PressureEnergy/hydro_iact.h
@@ -418,5 +418,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   /* Update the signal velocity. */
   pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float* dx, float hi, float hj, struct part* restrict pi,
+    struct part* restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float* dx, float hi, float hj, struct part* restrict pi,
+    struct part* restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
 
-#endif /* SWIFT_MINIMAL_HYDRO_IACT_H */
+#endif /* SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H */
diff --git a/src/hydro/PressureEnergy/hydro_io.h b/src/hydro/PressureEnergy/hydro_io.h
index 06762c6124c2c726c4e687980455ab956a5fa79e..701c12283bf77acef4af77598f57705a2b364fa1 100644
--- a/src/hydro/PressureEnergy/hydro_io.h
+++ b/src/hydro/PressureEnergy/hydro_io.h
@@ -17,8 +17,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
-#ifndef SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H
-#define SWIFT_PRESSURE_ENERGY_HYDRO_IACT_H
+#ifndef SWIFT_PRESSURE_ENERGY_HYDRO_IO_H
+#define SWIFT_PRESSURE_ENERGY_HYDRO_IO_H
 /**
  * @file PressureEnergy/hydro_io.h
  * @brief P-U implementation of SPH (i/o routines)
diff --git a/src/hydro/PressureEnergy/hydro_part.h b/src/hydro/PressureEnergy/hydro_part.h
index bc7d14b612556dc722ecca67dd6ce823192e00f0..218fbf5dc17559b07974b68e42f69f4e7a0e8e3b 100644
--- a/src/hydro/PressureEnergy/hydro_part.h
+++ b/src/hydro/PressureEnergy/hydro_part.h
@@ -33,6 +33,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /**
  * @brief Particle fields not needed during the SPH loops over neighbours.
@@ -61,6 +62,9 @@ struct xpart {
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /**
@@ -168,6 +172,9 @@ struct part {
   /*! Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h
index deb013579fd33340236d3dd5817021fd100c0fcb..7ef55b86c24972f8f287273441da99f26285c531 100644
--- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h
+++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro.h
@@ -50,22 +50,26 @@
 #include <float.h>
 
 /**
- * @brief Returns the comoving internal energy of a particle
+ * @brief Returns the comoving internal energy of a particle at the last
+ * time the particle was kicked.
  *
  * For implementations where the main thermodynamic variable
  * is not internal energy, this function computes the internal
  * energy from the thermodynamic variable.
  *
  * @param p The particle of interest
+ * @param xp The extended data of the particle of interest.
  */
 __attribute__((always_inline)) INLINE static float
-hydro_get_comoving_internal_energy(const struct part *restrict p) {
+hydro_get_comoving_internal_energy(const struct part *restrict p,
+                                   const struct xpart *restrict xp) {
 
-  return p->u;
+  return xp->u_full;
 }
 
 /**
- * @brief Returns the physical internal energy of a particle
+ * @brief Returns the physical internal energy of a particle at the last
+ * time the particle was kicked.
  *
  * For implementations where the main thermodynamic variable
  * is not internal energy, this function computes the internal
@@ -73,13 +77,15 @@ hydro_get_comoving_internal_energy(const struct part *restrict p) {
  * physical coordinates.
  *
  * @param p The particle of interest.
+ * @param xp The extended data of the particle of interest.
  * @param cosmo The cosmological model.
  */
 __attribute__((always_inline)) INLINE static float
 hydro_get_physical_internal_energy(const struct part *restrict p,
+                                   const struct xpart *restrict xp,
                                    const struct cosmology *cosmo) {
 
-  return p->u * cosmo->a_factor_internal_energy;
+  return xp->u_full * cosmo->a_factor_internal_energy;
 }
 
 /**
@@ -734,6 +740,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   xp->v_full[0] = p->v[0];
   xp->v_full[1] = p->v[1];
   xp->v_full[2] = p->v[2];
diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h
index ead5fcc0c842d8018f784a1084941bdb9ebcb6ca..d0cd5367f94cd90f36cc2b738a63c7963adbd445 100644
--- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h
+++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_debug.h
@@ -36,12 +36,13 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n"
       "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, \n"
       "p_dh=%.3e, p_bar=%.3e \n"
-      "time_bin=%d, alpha=%.3e\n",
+      "time_bin=%d, wakeup=%d alpha=%.3e\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->u, p->u_dt, p->force.v_sig, hydro_get_comoving_pressure(p), p->h,
       p->force.h_dt, (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho,
-      p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->alpha);
+      p->density.pressure_bar_dh, p->pressure_bar, p->time_bin, p->wakeup,
+      p->alpha);
 }
 
 #endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_DEBUG_H */
diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h
index 747fca714ce20d9c2b018e14ac24a6492c51a75f..69da511c7544a71ef381a0889c8b56c80d5211f1 100644
--- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h
+++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_iact.h
@@ -424,4 +424,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float* dx, float hi, float hj, struct part* restrict pi,
+    struct part* restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float* dx, float hi, float hj, struct part* restrict pi,
+    struct part* restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_PRESSURE_ENERGY_MORRIS_HYDRO_IACT_H */
diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h
index 1600679bc2e840d0b3b958531c279f5f29293b48..71662f14c61c92d65bcf493b6f5a43b8172e3697 100644
--- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h
+++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_io.h
@@ -69,12 +69,6 @@ INLINE static void hydro_read_particles(struct part* parts,
                                 UNIT_CONV_DENSITY, parts, rho);
 }
 
-INLINE static void convert_u(const struct engine* e, const struct part* p,
-                             const struct xpart* xp, float* ret) {
-
-  ret[0] = hydro_get_comoving_internal_energy(p);
-}
-
 INLINE static void convert_S(const struct engine* e, const struct part* p,
                              const struct xpart* xp, float* ret) {
 
@@ -170,9 +164,8 @@ INLINE static void hydro_write_particles(const struct part* parts,
       io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, mass);
   list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH,
                                  parts, h);
-  list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
-                                              UNIT_CONV_ENERGY_PER_UNIT_MASS,
-                                              parts, xparts, convert_u);
+  list[4] = io_make_output_field("InternalEnergy", FLOAT, 1,
+                                 UNIT_CONV_ENERGY_PER_UNIT_MASS, parts, u);
   list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
                                  UNIT_CONV_NO_UNITS, parts, id);
   list[6] =
diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h
index da6391236811e2a907281c3db05462bb57602fe0..d66249ea179a830cedbd3c3f165ca5012fd18862 100644
--- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h
+++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h
@@ -34,6 +34,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /**
  * @brief Particle fields not needed during the SPH loops over neighbours.
@@ -62,6 +63,9 @@ struct xpart {
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /**
@@ -172,6 +176,9 @@ struct part {
   /*! Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h
index 38e0f66fe7ecc1b6497717c9754bc36cd10a66f7..2e8d2d5db615f239bf5c3567e7beb155eab5cb38 100644
--- a/src/hydro/PressureEntropy/hydro.h
+++ b/src/hydro/PressureEntropy/hydro.h
@@ -730,6 +730,7 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
     struct part *restrict p, struct xpart *restrict xp) {
 
   p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
   p->rho_bar = 0.f;
   p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
   xp->v_full[0] = p->v[0];
diff --git a/src/hydro/PressureEntropy/hydro_debug.h b/src/hydro/PressureEntropy/hydro_debug.h
index 14d69bb650ff1bbd49394c0ca2f6256ad0cb188d..2163b70b94dde4e88f010d962358dccbde7960a3 100644
--- a/src/hydro/PressureEntropy/hydro_debug.h
+++ b/src/hydro/PressureEntropy/hydro_debug.h
@@ -36,14 +36,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, "
       "rho_bar=%.3e, P=%.3e, dP_dh=%.3e, P_over_rho2=%.3e, S=%.3e, S^1/g=%.3e, "
-      "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e time_bin=%d\n",
+      "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e time_bin=%d wakeup=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh,
       p->rho, p->rho_bar, hydro_get_comoving_pressure(p),
       p->density.pressure_dh, p->force.P_over_rho2, p->entropy,
       p->entropy_one_over_gamma, p->entropy_dt, p->force.soundspeed,
-      p->force.v_sig, p->force.h_dt, p->time_bin);
+      p->force.v_sig, p->force.h_dt, p->time_bin, p->wakeup);
 }
 
 #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_DEBUG_H */
diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h
index a018b39a99be5ed691485d93bd8dfd1735378bda..19279adec1f37117cf985e63a18a681ceee4f973 100644
--- a/src/hydro/PressureEntropy/hydro_iact.h
+++ b/src/hydro/PressureEntropy/hydro_iact.h
@@ -402,4 +402,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->entropy_dt += mj * visc_term * r_inv * dvdr;
 }
 
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->force.v_sig > const_limiter_max_v_sig_ratio * pj->force.v_sig) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
+
 #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */
diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h
index fb8424d66196b7013866acef6bec6ec9889a3353..a404a897b06ddc0777a493e2ecfd28b68e15defe 100644
--- a/src/hydro/PressureEntropy/hydro_part.h
+++ b/src/hydro/PressureEntropy/hydro_part.h
@@ -32,6 +32,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 
 /* Extra particle data not needed during the SPH loops over neighbours. */
 struct xpart {
@@ -54,6 +55,9 @@ struct xpart {
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
@@ -148,6 +152,9 @@ struct part {
   /* Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro/Shadowswift/hydro.h b/src/hydro/Shadowswift/hydro.h
index 446219104dffb2939877ae2a7c782e66af153213..b0f3207dfce69ca79899b1134740d035d47251d1 100644
--- a/src/hydro/Shadowswift/hydro.h
+++ b/src/hydro/Shadowswift/hydro.h
@@ -103,6 +103,9 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
 
   const float mass = p->conserved.mass;
 
+  p->time_bin = 0;
+  p->wakeup = time_bin_not_awake;
+
   p->primitives.v[0] = p->v[0];
   p->primitives.v[1] = p->v[1];
   p->primitives.v[2] = p->v[2];
diff --git a/src/hydro/Shadowswift/hydro_debug.h b/src/hydro/Shadowswift/hydro_debug.h
index 7cd7f89c8112ebcf1930c5ca52cb389139191975..8ff85d62fc7d58d53220b1f77a7afb44c00c33b0 100644
--- a/src/hydro/Shadowswift/hydro_debug.h
+++ b/src/hydro/Shadowswift/hydro_debug.h
@@ -23,6 +23,8 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "x=[%.16e,%.16e,%.16e], "
       "v=[%.3e,%.3e,%.3e], "
       "a=[%.3e,%.3e,%.3e], "
+      "time_bin=%d, "
+      "wakeup=%d, "
       "h=%.3e, "
       "primitives={"
       "v=[%.3e,%.3e,%.3e], "
@@ -47,9 +49,9 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "wcount_dh=%.3e, "
       "wcount=%.3e}",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0],
-      p->a_hydro[1], p->a_hydro[2], p->h, p->primitives.v[0],
-      p->primitives.v[1], p->primitives.v[2], p->primitives.rho,
-      p->primitives.P, p->primitives.gradients.rho[0],
+      p->a_hydro[1], p->a_hydro[2], p->time_bin, p->wakeup, p->h,
+      p->primitives.v[0], p->primitives.v[1], p->primitives.v[2],
+      p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0],
       p->primitives.gradients.rho[1], p->primitives.gradients.rho[2],
       p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1],
       p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0],
diff --git a/src/hydro/Shadowswift/hydro_iact.h b/src/hydro/Shadowswift/hydro_iact.h
index eda8e3759d9e08dac8073ebed9fb36dd0c5b99f6..791e4c7924df9806fa9150d03c08a543771a7049 100644
--- a/src/hydro/Shadowswift/hydro_iact.h
+++ b/src/hydro/Shadowswift/hydro_iact.h
@@ -342,3 +342,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
 
   runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0, a, H);
 }
+
+/**
+ * @brief Timestep limiter loop
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Nothing to do here if both particles are active */
+}
+
+/**
+ * @brief Timestep limiter loop (non-symmetric version)
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
+    float r2, const float *dx, float hi, float hj, struct part *restrict pi,
+    struct part *restrict pj, float a, float H) {
+
+  /* Wake up the neighbour? */
+  if (pi->timestepvars.vmax >
+      const_limiter_max_v_sig_ratio * pj->timestepvars.vmax) {
+
+    pj->wakeup = time_bin_awake;
+  }
+}
diff --git a/src/hydro/Shadowswift/hydro_part.h b/src/hydro/Shadowswift/hydro_part.h
index a7cc9daf0839216f098ac05c2267adc60ea11fb0..91ffaa85e5e6e80e7db577ce09363265f73e7f4c 100644
--- a/src/hydro/Shadowswift/hydro_part.h
+++ b/src/hydro/Shadowswift/hydro_part.h
@@ -21,6 +21,7 @@
 
 #include "chemistry_struct.h"
 #include "cooling_struct.h"
+#include "tracers_struct.h"
 #include "voronoi_cell.h"
 
 /* Extra particle data not needed during the computation. */
@@ -41,6 +42,9 @@ struct xpart {
   /* Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
+  /* Additional data used by the tracers */
+  struct tracers_xpart_data tracers_data;
+
 } SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
@@ -179,6 +183,9 @@ struct part {
   /* Time-step length */
   timebin_t time_bin;
 
+  /* Need waking-up ? */
+  char wakeup;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Time of the last drift */
diff --git a/src/hydro_properties.c b/src/hydro_properties.c
index 85f88d418bd46354f7a1cd3dd89b0e77b556b7d9..167c13a30c22b01c20e355d1b8c60903ca026ad8 100644
--- a/src/hydro_properties.c
+++ b/src/hydro_properties.c
@@ -90,6 +90,9 @@ void hydro_props_init(struct hydro_props *p,
   p->max_smoothing_iterations = parser_get_opt_param_int(
       params, "SPH:max_ghost_iterations", hydro_props_default_max_iterations);
 
+  if (p->max_smoothing_iterations <= 10)
+    error("The number of smoothing length iterations should be > 10");
+
   /* Time integration properties */
   p->CFL_condition = parser_get_param_float(params, "SPH:CFL_condition");
   const float max_volume_change = parser_get_opt_param_float(
@@ -239,7 +242,8 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) {
   io_write_attribute_f(h_grpsph, "Kernel delta N_ngb", p->delta_neighbours);
   io_write_attribute_f(h_grpsph, "Kernel eta", p->eta_neighbours);
   io_write_attribute_f(h_grpsph, "Smoothing length tolerance", p->h_tolerance);
-  io_write_attribute_f(h_grpsph, "Maximal smoothing length", p->h_max);
+  io_write_attribute_f(h_grpsph, "Maximal smoothing length [internal units]",
+                       p->h_max);
   io_write_attribute_f(h_grpsph, "CFL parameter", p->CFL_condition);
   io_write_attribute_f(h_grpsph, "Volume log(max(delta h))",
                        p->log_max_h_change);
@@ -248,8 +252,12 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) {
   io_write_attribute_i(h_grpsph, "Max ghost iterations",
                        p->max_smoothing_iterations);
   io_write_attribute_f(h_grpsph, "Minimal temperature", p->minimal_temperature);
+  io_write_attribute_f(h_grpsph,
+                       "Minimal energy per unit mass [internal units]",
+                       p->minimal_internal_energy);
   io_write_attribute_f(h_grpsph, "Initial temperature", p->initial_temperature);
-  io_write_attribute_f(h_grpsph, "Initial energy per unit mass",
+  io_write_attribute_f(h_grpsph,
+                       "Initial energy per unit mass [internal units]",
                        p->initial_internal_energy);
   io_write_attribute_f(h_grpsph, "Hydrogen mass fraction",
                        p->hydrogen_mass_fraction);
@@ -260,8 +268,11 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) {
                        p->viscosity.alpha_max);
   io_write_attribute_f(h_grpsph, "Alpha viscosity (min)",
                        p->viscosity.alpha_min);
-  io_write_attribute_f(h_grpsph, "Viscosity decay length", p->viscosity.length);
+  io_write_attribute_f(h_grpsph, "Viscosity decay length [internal units]",
+                       p->viscosity.length);
   io_write_attribute_f(h_grpsph, "Beta viscosity", const_viscosity_beta);
+  io_write_attribute_f(h_grpsph, "Max v_sig ratio (limiter)",
+                       const_limiter_max_v_sig_ratio);
 }
 #endif
 
diff --git a/src/io_properties.h b/src/io_properties.h
index 9e948fc3991b0178d06fdd5d83fa900a98f84d2a..c45edb2641e374e2cfaec6c3251aff7d18f361d6 100644
--- a/src/io_properties.h
+++ b/src/io_properties.h
@@ -43,14 +43,23 @@ typedef void (*conversion_func_part_float)(const struct engine*,
 typedef void (*conversion_func_part_double)(const struct engine*,
                                             const struct part*,
                                             const struct xpart*, double*);
+typedef void (*conversion_func_part_long_long)(const struct engine*,
+                                               const struct part*,
+                                               const struct xpart*, long long*);
 typedef void (*conversion_func_gpart_float)(const struct engine*,
                                             const struct gpart*, float*);
 typedef void (*conversion_func_gpart_double)(const struct engine*,
                                              const struct gpart*, double*);
+typedef void (*conversion_func_gpart_long_long)(const struct engine*,
+                                                const struct gpart*,
+                                                long long*);
 typedef void (*conversion_func_spart_float)(const struct engine*,
                                             const struct spart*, float*);
 typedef void (*conversion_func_spart_double)(const struct engine*,
                                              const struct spart*, double*);
+typedef void (*conversion_func_spart_long_long)(const struct engine*,
+                                                const struct spart*,
+                                                long long*);
 
 /**
  * @brief The properties of a given dataset for i/o
@@ -79,6 +88,7 @@ struct io_props {
   char* start_temp_c;
   float* start_temp_f;
   double* start_temp_d;
+  long long* start_temp_l;
 
   /* Pointer to the engine */
   const struct engine* e;
@@ -98,14 +108,17 @@ struct io_props {
   /* Conversion function for part */
   conversion_func_part_float convert_part_f;
   conversion_func_part_double convert_part_d;
+  conversion_func_part_long_long convert_part_l;
 
   /* Conversion function for gpart */
   conversion_func_gpart_float convert_gpart_f;
   conversion_func_gpart_double convert_gpart_d;
+  conversion_func_gpart_long_long convert_gpart_l;
 
   /* Conversion function for spart */
   conversion_func_spart_float convert_spart_f;
   conversion_func_spart_double convert_spart_d;
+  conversion_func_spart_long_long convert_spart_l;
 };
 
 /**
@@ -147,10 +160,13 @@ INLINE static struct io_props io_make_input_field_(
   r.conversion = 0;
   r.convert_part_f = NULL;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -191,10 +207,13 @@ INLINE static struct io_props io_make_output_field_(
   r.conversion = 0;
   r.convert_part_f = NULL;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -242,10 +261,13 @@ INLINE static struct io_props io_make_output_field_convert_part_FLOAT(
   r.conversion = 1;
   r.convert_part_f = functionPtr;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -285,10 +307,59 @@ INLINE static struct io_props io_make_output_field_convert_part_DOUBLE(
   r.conversion = 1;
   r.convert_part_f = NULL;
   r.convert_part_d = functionPtr;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
+
+  return r;
+}
+
+/**
+ * @brief Construct an #io_props from its parameters
+ *
+ * @param name Name of the field to read
+ * @param type The type of the data
+ * @param dimension Dataset dimension (1D, 3D, ...)
+ * @param units The units of the dataset
+ * @param partSize The size in byte of the particle
+ * @param parts The particle array
+ * @param xparts The xparticle array
+ * @param functionPtr The function used to convert a particle to a double
+ *
+ * Do not call this function directly. Use the macro defined above.
+ */
+INLINE static struct io_props io_make_output_field_convert_part_LONGLONG(
+    const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum unit_conversion_factor units, size_t partSize,
+    const struct part* parts, const struct xpart* xparts,
+    conversion_func_part_long_long functionPtr) {
+
+  struct io_props r;
+  strcpy(r.name, name);
+  r.type = type;
+  r.dimension = dimension;
+  r.importance = UNUSED;
+  r.units = units;
+  r.field = NULL;
+  r.partSize = partSize;
+  r.parts = parts;
+  r.xparts = xparts;
+  r.gparts = NULL;
+  r.sparts = NULL;
+  r.conversion = 1;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_part_l = functionPtr;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
+  r.convert_spart_f = NULL;
+  r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -334,10 +405,13 @@ INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT(
   r.conversion = 1;
   r.convert_part_f = NULL;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = functionPtr;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -375,10 +449,57 @@ INLINE static struct io_props io_make_output_field_convert_gpart_DOUBLE(
   r.conversion = 1;
   r.convert_part_f = NULL;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = functionPtr;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
+
+  return r;
+}
+
+/**
+ * @brief Construct an #io_props from its parameters
+ *
+ * @param name Name of the field to read
+ * @param type The type of the data
+ * @param dimension Dataset dimension (1D, 3D, ...)
+ * @param units The units of the dataset
+ * @param gpartSize The size in byte of the particle
+ * @param gparts The particle array
+ * @param functionPtr The function used to convert a g-particle to a double
+ *
+ * Do not call this function directly. Use the macro defined above.
+ */
+INLINE static struct io_props io_make_output_field_convert_gpart_LONGLONG(
+    const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum unit_conversion_factor units, size_t gpartSize,
+    const struct gpart* gparts, conversion_func_gpart_long_long functionPtr) {
+
+  struct io_props r;
+  strcpy(r.name, name);
+  r.type = type;
+  r.dimension = dimension;
+  r.importance = UNUSED;
+  r.units = units;
+  r.field = NULL;
+  r.partSize = gpartSize;
+  r.parts = NULL;
+  r.xparts = NULL;
+  r.gparts = gparts;
+  r.sparts = NULL;
+  r.conversion = 1;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
+  r.convert_gpart_l = functionPtr;
+  r.convert_spart_f = NULL;
+  r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -424,10 +545,13 @@ INLINE static struct io_props io_make_output_field_convert_spart_FLOAT(
   r.conversion = 1;
   r.convert_part_f = NULL;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = functionPtr;
   r.convert_spart_d = NULL;
+  r.convert_spart_l = NULL;
 
   return r;
 }
@@ -465,10 +589,57 @@ INLINE static struct io_props io_make_output_field_convert_spart_DOUBLE(
   r.conversion = 1;
   r.convert_part_f = NULL;
   r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
   r.convert_gpart_f = NULL;
   r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
   r.convert_spart_f = NULL;
   r.convert_spart_d = functionPtr;
+  r.convert_spart_l = NULL;
+
+  return r;
+}
+
+/**
+ * @brief Construct an #io_props from its parameters
+ *
+ * @param name Name of the field to read
+ * @param type The type of the data
+ * @param dimension Dataset dimension (1D, 3D, ...)
+ * @param units The units of the dataset
+ * @param spartSize The size in byte of the particle
+ * @param sparts The particle array
+ * @param functionPtr The function used to convert a s-particle to a double
+ *
+ * Do not call this function directly. Use the macro defined above.
+ */
+INLINE static struct io_props io_make_output_field_convert_spart_LONGLONG(
+    const char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension,
+    enum unit_conversion_factor units, size_t spartSize,
+    const struct spart* sparts, conversion_func_spart_long_long functionPtr) {
+
+  struct io_props r;
+  strcpy(r.name, name);
+  r.type = type;
+  r.dimension = dimension;
+  r.importance = UNUSED;
+  r.units = units;
+  r.field = NULL;
+  r.partSize = spartSize;
+  r.parts = NULL;
+  r.xparts = NULL;
+  r.gparts = NULL;
+  r.sparts = sparts;
+  r.conversion = 1;
+  r.convert_part_f = NULL;
+  r.convert_part_d = NULL;
+  r.convert_part_l = NULL;
+  r.convert_gpart_f = NULL;
+  r.convert_gpart_d = NULL;
+  r.convert_gpart_l = NULL;
+  r.convert_spart_f = NULL;
+  r.convert_spart_d = NULL;
+  r.convert_spart_l = functionPtr;
 
   return r;
 }
diff --git a/src/kick.h b/src/kick.h
index 4cd0cee56750c96ce1f1d2be66148d68e69f055e..b33ee059f56a1e979834cb4bc784d55de2130fbe 100644
--- a/src/kick.h
+++ b/src/kick.h
@@ -86,8 +86,8 @@ __attribute__((always_inline)) INLINE static void kick_part(
   if (p->ti_kick != ti_start)
     error(
         "particle has not been kicked to the current time p->ti_kick=%lld, "
-        "ti_start=%lld, ti_end=%lld id=%lld",
-        p->ti_kick, ti_start, ti_end, p->id);
+        "ti_start=%lld, ti_end=%lld id=%lld time_bin=%d wakeup=%d",
+        p->ti_kick, ti_start, ti_end, p->id, p->time_bin, p->wakeup);
 
   p->ti_kick = ti_end;
 #endif
diff --git a/src/memswap.h b/src/memswap.h
index 2f7b9215ed48535fab9e8331303457c2f92859cd..91d83d231692a2b8f540c3e6b9334bc89e1ee87b 100644
--- a/src/memswap.h
+++ b/src/memswap.h
@@ -1,7 +1,7 @@
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
- *
+ *               2018 STFC (author email aidan.chalk@stfc.ac.uk)
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
  * by the Free Software Foundation, either version 3 of the License, or
@@ -20,6 +20,7 @@
 #define SWIFT_MEMSWAP_H
 
 /* Config parameters. */
+#include <stdint.h>
 #include "../config.h"
 
 #ifdef HAVE_IMMINTRIN_H
@@ -33,7 +34,7 @@
 #endif
 
 /* Macro for in-place swap of two values a and b of type t. a and b are
-   assumed to be of type char* so that the pointer arithmetic works. */
+   assumed to be of type uint8_t* so that the pointer arithmetic works. */
 #define swap_loop(type, a, b, count) \
   while (count >= sizeof(type)) {    \
     register type temp = *(type *)a; \
@@ -60,9 +61,10 @@
  * @param void_b Pointer to the second element.
  * @param bytes Size, in bytes, of the data pointed to by @c a and @c b.
  */
-__attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
+__attribute__((always_inline)) inline void memswap(void *restrict void_a,
+                                                   void *restrict void_b,
                                                    size_t bytes) {
-  char *a = (char *)void_a, *b = (char *)void_b;
+  int8_t *restrict a = (int8_t *)void_a, *restrict b = (int8_t *)void_b;
 #if defined(__AVX512F__) && defined(__INTEL_COMPILER)
   swap_loop(__m512i, a, b, bytes);
 #endif
@@ -75,10 +77,10 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
 #ifdef __ALTIVEC__
   swap_loop(vector int, a, b, bytes);
 #endif
-  swap_loop(size_t, a, b, bytes);
-  swap_loop(int, a, b, bytes);
-  swap_loop(short, a, b, bytes);
-  swap_loop(char, a, b, bytes);
+  swap_loop(int_least64_t, a, b, bytes);
+  swap_loop(int_least32_t, a, b, bytes);
+  swap_loop(int_least16_t, a, b, bytes);
+  swap_loop(int_least8_t, a, b, bytes);
 }
 
 /**
@@ -93,10 +95,9 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
  * @param void_b Pointer to the second element.
  * @param bytes Size, in bytes, of the data pointed to by @c a and @c b.
  */
-__attribute__((always_inline)) inline void memswap_unaligned(void *void_a,
-                                                             void *void_b,
-                                                             size_t bytes) {
-  char *a = (char *)void_a, *b = (char *)void_b;
+__attribute__((always_inline)) inline void memswap_unaligned(
+    void *restrict void_a, void *restrict void_b, size_t bytes) {
+  int8_t *restrict a = (int8_t *)void_a, *restrict b = (int8_t *)void_b;
 #ifdef __AVX512F__
   while (bytes >= sizeof(__m512i)) {
     register __m512i temp;
@@ -134,10 +135,10 @@ __attribute__((always_inline)) inline void memswap_unaligned(void *void_a,
   // Power8 supports unaligned load/stores, but not sure what it will do here.
   swap_loop(vector int, a, b, bytes);
 #endif
-  swap_loop(size_t, a, b, bytes);
-  swap_loop(int, a, b, bytes);
-  swap_loop(short, a, b, bytes);
-  swap_loop(char, a, b, bytes);
+  swap_loop(int_least64_t, a, b, bytes);
+  swap_loop(int_least32_t, a, b, bytes);
+  swap_loop(int_least16_t, a, b, bytes);
+  swap_loop(int_least8_t, a, b, bytes);
 }
 
 #endif /* SWIFT_MEMSWAP_H */
diff --git a/src/parallel_io.c b/src/parallel_io.c
index c826d13646c4196f40a77401cfe44f38c10e377b..e06ffaddc7c8a5b225fda6dd81af756b0ab76189 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -56,6 +56,7 @@
 #include "stars_io.h"
 #include "tracers_io.h"
 #include "units.h"
+#include "velociraptor_io.h"
 #include "xmf.h"
 
 /* The current limit of ROMIO (the underlying MPI-IO layer) is 2GB */
@@ -957,9 +958,16 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6],
   const struct spart* sparts = e->s->sparts;
   struct swift_params* params = e->parameter_file;
   const int with_cosmology = e->policy & engine_policy_cosmology;
+  const int with_cooling = e->policy & engine_policy_cooling;
+  const int with_temperature = e->policy & engine_policy_temperature;
+#ifdef HAVE_VELOCIRAPTOR
+  const int with_stf = (e->policy & engine_policy_structure_finding) &&
+                       (e->s->gpart_group_data != NULL);
+#else
+  const int with_stf = 0;
+#endif
 
   FILE* xmfFile = 0;
-  int periodic = e->s->periodic;
   int numFiles = 1;
 
   /* First time, we need to create the XMF file */
@@ -985,28 +993,26 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6],
    * specific output */
   xmf_write_outputheader(xmfFile, fileName, e->time);
 
-  /* Open header to write simulation properties */
-  /* message("Writing runtime parameters..."); */
-  hid_t h_grp =
-      H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  if (h_grp < 0) error("Error while creating runtime parameters group\n");
-
-  /* Write the relevant information */
-  io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1);
-
-  /* Close runtime parameters */
-  H5Gclose(h_grp);
-
   /* Open header to write simulation properties */
   /* message("Writing file header..."); */
-  h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  hid_t h_grp =
+      H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
   if (h_grp < 0) error("Error while creating file header\n");
 
+  /* Convert basic output information to snapshot units */
+  const double factor_time =
+      units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_TIME);
+  const double factor_length =
+      units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_LENGTH);
+  const double dblTime = e->time * factor_time;
+  const double dim[3] = {e->s->dim[0] * factor_length,
+                         e->s->dim[1] * factor_length,
+                         e->s->dim[2] * factor_length};
+
   /* Print the relevant information and print status */
-  io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
-  double dblTime = e->time;
+  io_write_attribute(h_grp, "BoxSize", DOUBLE, dim, 3);
   io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1);
-  int dimension = (int)hydro_dimension;
+  const int dimension = (int)hydro_dimension;
   io_write_attribute(h_grp, "Dimension", INT, &dimension, 1);
   io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1);
   io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1);
@@ -1143,17 +1149,26 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6],
       case swift_type_gas:
         hydro_write_particles(parts, xparts, list, &num_fields);
         num_fields += chemistry_write_particles(parts, list + num_fields);
-        num_fields += cooling_write_particles(parts, xparts, list + num_fields,
-                                              e->cooling_func);
+        if (with_cooling || with_temperature) {
+          num_fields += cooling_write_particles(
+              parts, xparts, list + num_fields, e->cooling_func);
+        }
         num_fields += tracers_write_particles(parts, xparts, list + num_fields,
                                               with_cosmology);
-        num_fields += sftracers_write_particles(
+      num_fields += sftracers_write_particles(
             parts, xparts, list + num_fields, with_cosmology);
-
+        if (with_stf) {
+          num_fields +=
+              velociraptor_write_parts(parts, xparts, list + num_fields);
+        }
         break;
 
       case swift_type_dark_matter:
         darkmatter_write_particles(gparts, list, &num_fields);
+        if (with_stf) {
+          num_fields += velociraptor_write_gparts(e->s->gpart_group_data,
+                                                  list + num_fields);
+        }
         break;
 
       case swift_type_stars:
@@ -1161,6 +1176,9 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6],
         num_fields += chemistry_write_sparticles(sparts, list + num_fields);
         num_fields +=
             tracers_write_sparticles(sparts, list + num_fields, with_cosmology);
+        if (with_stf) {
+          num_fields += velociraptor_write_sparts(sparts, list + num_fields);
+        }
         break;
 
       default:
@@ -1228,6 +1246,14 @@ void write_output_parallel(struct engine* e, const char* baseName,
   const struct spart* sparts = e->s->sparts;
   struct swift_params* params = e->parameter_file;
   const int with_cosmology = e->policy & engine_policy_cosmology;
+  const int with_cooling = e->policy & engine_policy_cooling;
+  const int with_temperature = e->policy & engine_policy_temperature;
+#ifdef HAVE_VELOCIRAPTOR
+  const int with_stf = (e->policy & engine_policy_structure_finding) &&
+                       (e->s->gpart_group_data != NULL);
+#else
+  const int with_stf = 0;
+#endif
 
   /* Number of particles currently in the arrays */
   const size_t Ntot = e->s->nr_gparts;
@@ -1290,6 +1316,32 @@ void write_output_parallel(struct engine* e, const char* baseName,
     snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName,
              e->snapshot_output_count);
 
+  /* Now write the top-level cell structure */
+  hid_t h_file_cells = 0, h_grp_cells = 0;
+  if (mpi_rank == 0) {
+
+    /* Open the snapshot on rank 0 */
+    h_file_cells = H5Fopen(fileName, H5F_ACC_RDWR, H5P_DEFAULT);
+    if (h_file_cells < 0)
+      error("Error while opening file '%s' on rank %d.", fileName, mpi_rank);
+
+    /* Create the group we want in the file */
+    h_grp_cells = H5Gcreate(h_file_cells, "/Cells", H5P_DEFAULT, H5P_DEFAULT,
+                            H5P_DEFAULT);
+    if (h_grp_cells < 0) error("Error while creating cells group");
+  }
+
+  /* Write the location of the particles in the arrays */
+  io_write_cell_offsets(h_grp_cells, e->s->cdim, e->s->cells_top,
+                        e->s->nr_cells, e->s->width, mpi_rank, N_total, offset,
+                        internal_units, snapshot_units);
+
+  /* Close everything */
+  if (mpi_rank == 0) {
+    H5Gclose(h_grp_cells);
+    H5Fclose(h_file_cells);
+  }
+
   /* Prepare some file-access properties */
   hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
 
@@ -1403,6 +1455,7 @@ void write_output_parallel(struct engine* e, const char* baseName,
     struct part* parts_written = NULL;
     struct xpart* xparts_written = NULL;
     struct gpart* gparts_written = NULL;
+    struct velociraptor_gpart_data* gpart_group_data_written = NULL;
     struct spart* sparts_written = NULL;
 
     /* Write particle fields from the particle structure */
@@ -1415,8 +1468,14 @@ void write_output_parallel(struct engine* e, const char* baseName,
           Nparticles = Ngas;
           hydro_write_particles(parts, xparts, list, &num_fields);
           num_fields += chemistry_write_particles(parts, list + num_fields);
-          num_fields += cooling_write_particles(
-              parts, xparts, list + num_fields, e->cooling_func);
+          if (with_cooling || with_temperature) {
+            num_fields += cooling_write_particles(
+                parts, xparts, list + num_fields, e->cooling_func);
+          }
+          if (with_stf) {
+            num_fields +=
+                velociraptor_write_parts(parts, xparts, list + num_fields);
+          }
           num_fields += tracers_write_particles(
               parts, xparts, list + num_fields, with_cosmology);
           num_fields += sftracers_write_particles(
@@ -1444,9 +1503,15 @@ void write_output_parallel(struct engine* e, const char* baseName,
                                 &num_fields);
           num_fields +=
               chemistry_write_particles(parts_written, list + num_fields);
-          num_fields +=
-              cooling_write_particles(parts_written, xparts_written,
-                                      list + num_fields, e->cooling_func);
+          if (with_cooling || with_temperature) {
+            num_fields +=
+                cooling_write_particles(parts_written, xparts_written,
+                                        list + num_fields, e->cooling_func);
+          }
+          if (with_stf) {
+            num_fields += velociraptor_write_parts(
+                parts_written, xparts_written, list + num_fields);
+          }
           num_fields += tracers_write_particles(
               parts_written, xparts_written, list + num_fields, with_cosmology);
           num_fields += sftracers_write_particles(
@@ -1460,6 +1525,10 @@ void write_output_parallel(struct engine* e, const char* baseName,
           /* This is a DM-only run without inhibited particles */
           Nparticles = Ntot;
           darkmatter_write_particles(gparts, list, &num_fields);
+          if (with_stf) {
+            num_fields += velociraptor_write_gparts(e->s->gpart_group_data,
+                                                    list + num_fields);
+          }
         } else {
 
           /* Ok, we need to fish out the particles we want */
@@ -1470,11 +1539,28 @@ void write_output_parallel(struct engine* e, const char* baseName,
                              Ndm_written * sizeof(struct gpart)) != 0)
             error("Error while allocating temporart memory for gparts");
 
+          if (with_stf) {
+            if (posix_memalign(
+                    (void**)&gpart_group_data_written, gpart_align,
+                    Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0)
+              error(
+                  "Error while allocating temporart memory for gparts STF "
+                  "data");
+          }
+
           /* Collect the non-inhibited DM particles from gpart */
-          io_collect_gparts_to_write(gparts, gparts_written, Ntot, Ndm_written);
+          io_collect_gparts_to_write(gparts, e->s->gpart_group_data,
+                                     gparts_written, gpart_group_data_written,
+                                     Ntot, Ndm_written, with_stf);
 
-          /* Write DM particles */
+          /* Select the fields to write */
           darkmatter_write_particles(gparts_written, list, &num_fields);
+          if (with_stf) {
+#ifdef HAVE_VELOCIRAPTOR
+            num_fields += velociraptor_write_gparts(gpart_group_data_written,
+                                                    list + num_fields);
+#endif
+          }
         }
       } break;
 
@@ -1487,6 +1573,9 @@ void write_output_parallel(struct engine* e, const char* baseName,
           num_fields += chemistry_write_sparticles(sparts, list + num_fields);
           num_fields += tracers_write_sparticles(sparts, list + num_fields,
                                                  with_cosmology);
+          if (with_stf) {
+            num_fields += velociraptor_write_sparts(sparts, list + num_fields);
+          }
         } else {
 
           /* Ok, we need to fish out the particles we want */
@@ -1503,9 +1592,13 @@ void write_output_parallel(struct engine* e, const char* baseName,
 
           /* Select the fields to write */
           stars_write_particles(sparts_written, list, &num_fields);
-          num_fields += chemistry_write_sparticles(sparts, list + num_fields);
+	  num_fields += chemistry_write_sparticles(sparts, list + num_fields);
           num_fields += tracers_write_sparticles(sparts, list + num_fields,
                                                  with_cosmology);
+          if (with_stf) {
+            num_fields +=
+                velociraptor_write_sparts(sparts_written, list + num_fields);
+          }
         }
       } break;
 
@@ -1532,6 +1625,7 @@ void write_output_parallel(struct engine* e, const char* baseName,
     if (parts_written) free(parts_written);
     if (xparts_written) free(xparts_written);
     if (gparts_written) free(gparts_written);
+    if (gpart_group_data_written) free(gpart_group_data_written);
     if (sparts_written) free(sparts_written);
 
 #ifdef IO_SPEED_MEASUREMENT
diff --git a/src/partition.c b/src/partition.c
index bbd7454dd63be6ab5192558fb4a2e3399ea03cfc..60ee7716efb25188b3a09f44f93a65a3ccbd5893 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -330,22 +330,28 @@ static void accumulate_sizes(struct space *s, double *counts) {
   mapper_data.s = s;
 
   double hsize = (double)sizeof(struct part);
-  mapper_data.size = hsize;
-  threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts,
-                 s->nr_parts, sizeof(struct part), space_splitsize,
-                 &mapper_data);
+  if (s->nr_parts > 0) {
+    mapper_data.size = hsize;
+    threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts,
+                   s->nr_parts, sizeof(struct part), space_splitsize,
+                   &mapper_data);
+  }
 
   double gsize = (double)sizeof(struct gpart);
-  mapper_data.size = gsize;
-  threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts,
-                 s->nr_gparts, sizeof(struct gpart), space_splitsize,
-                 &mapper_data);
+  if (s->nr_gparts > 0) {
+    mapper_data.size = gsize;
+    threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts,
+                   s->nr_gparts, sizeof(struct gpart), space_splitsize,
+                   &mapper_data);
+  }
 
   double ssize = (double)sizeof(struct spart);
-  mapper_data.size = ssize;
-  threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts,
-                 s->nr_sparts, sizeof(struct spart), space_splitsize,
-                 &mapper_data);
+  if (s->nr_sparts > 0) {
+    mapper_data.size = ssize;
+    threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts,
+                   s->nr_sparts, sizeof(struct spart), space_splitsize,
+                   &mapper_data);
+  }
 
   /* Keep the sum of particles across all ranks in the range of IDX_MAX. */
   if ((s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize +
diff --git a/src/physical_constants.c b/src/physical_constants.c
index 7752f4d3130b7174863d520b3d4d3c6a3e8eb433..3e3c72812c552aba1204086353dc7d239a5c36f9 100644
--- a/src/physical_constants.c
+++ b/src/physical_constants.c
@@ -32,7 +32,8 @@
 /**
  * @brief Converts physical constants to the internal unit system
  *
- * Some constants can be overwritten by the YAML file values.
+ * Some constants can be overwritten by the YAML file values. If the
+ * param argument is NULL, no overwriting is done.
  *
  * @param us The current internal system of units.
  * @param params The parsed parameter file.
@@ -48,8 +49,10 @@ void phys_const_init(const struct unit_system *us, struct swift_params *params,
       const_newton_G_cgs / units_general_cgs_conversion_factor(us, dimension_G);
 
   /* Overwrite G if present in the file */
-  internal_const->const_newton_G = parser_get_opt_param_double(
-      params, "PhysicalConstants:G", internal_const->const_newton_G);
+  if (params != NULL) {
+    internal_const->const_newton_G = parser_get_opt_param_double(
+        params, "PhysicalConstants:G", internal_const->const_newton_G);
+  }
 
   const float dimension_c[5] = {0, 1, -1, 0, 0}; /* [cm s^-1] */
   internal_const->const_speed_light_c =
diff --git a/src/runner.c b/src/runner.c
index 69f7577a7a3a33b73a6492ba52abfea7e874614e..0d0765af3a1315d9554af0a2eaebb10be8931c2c 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -66,6 +66,7 @@
 #include "task.h"
 #include "timers.h"
 #include "timestep.h"
+#include "timestep_limiter.h"
 #include "tracers.h"
 
 #define TASK_LOOP_DENSITY 0
@@ -96,6 +97,13 @@
 #undef FUNCTION
 #undef FUNCTION_TASK_LOOP
 
+/* Import the limiter loop functions. */
+#define FUNCTION limiter
+#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER
+#include "runner_doiact.h"
+#undef FUNCTION
+#undef FUNCTION_TASK_LOOP
+
 /* Import the gravity loop functions. */
 #include "runner_doiact_grav.h"
 
@@ -1227,14 +1235,22 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
      * current smoothing lengths. */
     int *pid = NULL;
     float *h_0 = NULL;
+    float *left = NULL;
+    float *right = NULL;
     if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL)
       error("Can't allocate memory for pid.");
     if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
       error("Can't allocate memory for h_0.");
+    if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for left.");
+    if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL)
+      error("Can't allocate memory for right.");
     for (int k = 0; k < c->hydro.count; k++)
       if (part_is_active(&parts[k], e)) {
         pid[count] = k;
         h_0[count] = parts[k].h;
+        left[count] = 0.f;
+        right[count] = hydro_h_max;
         ++count;
       }
 
@@ -1287,6 +1303,16 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
               p->density.wcount_dh * h_old_dim +
               hydro_dimension * p->density.wcount * h_old_dim_minus_one;
 
+          /* Improve the bisection bounds */
+          if (n_sum < n_target) left[i] = max(left[i], h_old);
+          if (n_sum > n_target) right[i] = min(right[i], h_old);
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Check the validity of the left and right bounds */
+          if (left[i] > right[i])
+            error("Invalid left (%e) and right (%e)", left[i], right[i]);
+#endif
+
           /* Skip if h is already h_max and we don't have enough neighbours */
           if ((p->h >= hydro_h_max) && (f < 0.f)) {
 
@@ -1349,13 +1375,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
           /* Avoid floating point exception from f_prime = 0 */
           h_new = h_old - f / (f_prime + FLT_MIN);
 
-          /* Be verbose about the particles that struggle to converve */
+          /* Be verbose about the particles that struggle to converge */
           if (num_reruns > max_smoothing_iter - 10) {
 
             message(
-                "iter=%d p->id=%lld h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
-                "n_sum=%f n_target=%f",
-                num_reruns, p->id, h_old, h_new, f, f_prime, n_sum, n_target);
+                "Smoothing length convergence problem: iter=%d p->id=%lld "
+                "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f "
+                "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e",
+                num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum,
+                n_target, left[i], right[i]);
           }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -1367,13 +1395,30 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
           /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */
           h_new = min(h_new, 2.f * h_old);
           h_new = max(h_new, 0.5f * h_old);
+
+          /* Verify that we are actually progrssing towards the answer */
+          h_new = max(h_new, left[i]);
+          h_new = min(h_new, right[i]);
         }
 
         /* Check whether the particle has an inappropriate smoothing length */
         if (fabsf(h_new - h_old) > eps * h_init) {
 
           /* Ok, correct then */
-          p->h = h_new;
+
+          /* Case where we have been oscillating around the solution */
+          if ((h_new == left[i] && h_old == right[i]) ||
+              (h_old == left[i] && h_new == right[i])) {
+
+            /* Bissect the remaining interval */
+            p->h = pow_inv_dimension(
+                0.5f * (pow_dimension(left[i]) + pow_dimension(right[i])));
+
+          } else {
+
+            /* Normal case */
+            p->h = h_new;
+          }
 
           /* If below the absolute maximum, try again */
           if (p->h < hydro_h_max) {
@@ -1381,6 +1426,8 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
             /* Flag for another round of fun */
             pid[redo] = pid[i];
             h_0[redo] = h_0[i];
+            left[redo] = left[i];
+            right[redo] = right[i];
             redo += 1;
 
             /* Re-initialise everything */
@@ -1511,6 +1558,8 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
     }
 
     /* Be clean */
+    free(left);
+    free(right);
     free(pid);
     free(h_0);
   }
@@ -1712,19 +1761,26 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
       /* If particle needs to be kicked */
       if (part_is_starting(p, e)) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->wakeup == time_bin_awake)
+          error("Woken-up particle that has not been processed in kick1");
+#endif
+
+        /* Skip particles that have been woken up and treated by the limiter. */
+        if (p->wakeup != time_bin_not_awake) continue;
+
         const integertime_t ti_step = get_integer_timestep(p->time_bin);
         const integertime_t ti_begin =
             get_integer_time_begin(ti_current + 1, p->time_bin);
 
 #ifdef SWIFT_DEBUG_CHECKS
-        const integertime_t ti_end =
-            get_integer_time_end(ti_current + 1, p->time_bin);
+        const integertime_t ti_end = ti_begin + ti_step;
 
         if (ti_begin != ti_current)
           error(
               "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
-              "ti_step=%lld time_bin=%d ti_current=%lld",
-              ti_end, ti_begin, ti_step, p->time_bin, ti_current);
+              "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
 #endif
 
         /* Time interval for this half-kick */
@@ -1887,39 +1943,60 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
       /* If particle needs to be kicked */
       if (part_is_active(p, e)) {
 
-        const integertime_t ti_step = get_integer_timestep(p->time_bin);
-        const integertime_t ti_begin =
-            get_integer_time_begin(ti_current, p->time_bin);
+        integertime_t ti_begin, ti_end, ti_step;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (p->wakeup == time_bin_awake)
+          error("Woken-up particle that has not been processed in kick1");
+#endif
+
+        if (p->wakeup == time_bin_not_awake) {
+
+          /* Time-step from a regular kick */
+          ti_step = get_integer_timestep(p->time_bin);
+          ti_begin = get_integer_time_begin(ti_current, p->time_bin);
+          ti_end = ti_begin + ti_step;
+
+        } else {
+
+          /* Time-step that follows a wake-up call */
+          ti_begin = get_integer_time_begin(ti_current, p->wakeup);
+          ti_end = get_integer_time_end(ti_current, p->time_bin);
+          ti_step = ti_end - ti_begin;
+
+          /* Reset the flag. Everything is back to normal from now on. */
+          p->wakeup = time_bin_awake;
+        }
 
 #ifdef SWIFT_DEBUG_CHECKS
         if (ti_begin + ti_step != ti_current)
           error(
               "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
-              "time_bin=%d ti_current=%lld",
-              ti_begin, ti_step, p->time_bin, ti_current);
+              "time_bin=%d wakeup=%d ti_current=%lld",
+              ti_begin, ti_step, p->time_bin, p->wakeup, ti_current);
 #endif
         /* Time interval for this half-kick */
         double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr;
         if (with_cosmology) {
           dt_kick_hydro = cosmology_get_hydro_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+              cosmo, ti_begin + ti_step / 2, ti_end);
           dt_kick_grav = cosmology_get_grav_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+              cosmo, ti_begin + ti_step / 2, ti_end);
           dt_kick_therm = cosmology_get_therm_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+              cosmo, ti_begin + ti_step / 2, ti_end);
           dt_kick_corr = cosmology_get_corr_kick_factor(
-              cosmo, ti_begin + ti_step / 2, ti_begin + ti_step);
+              cosmo, ti_begin + ti_step / 2, ti_end);
         } else {
-          dt_kick_hydro = (ti_step / 2) * time_base;
-          dt_kick_grav = (ti_step / 2) * time_base;
-          dt_kick_therm = (ti_step / 2) * time_base;
-          dt_kick_corr = (ti_step / 2) * time_base;
+          dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base;
+          dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base;
         }
 
         /* Finish the time-step with a second half-kick */
         kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm,
                   dt_kick_corr, cosmo, hydro_props, entropy_floor,
-                  ti_begin + ti_step / 2, ti_begin + ti_step);
+                  ti_begin + ti_step / 2, ti_end);
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that kick and the drift are synchronized */
@@ -2321,6 +2398,144 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
   if (timer) TIMER_TOC(timer_timestep);
 }
 
+/**
+ * @brief Apply the time-step limiter to all awaken particles in a cell
+ * hierarchy.
+ *
+ * @param r The task #runner.
+ * @param c The #cell.
+ * @param force Limit the particles irrespective of the #cell flags.
+ * @param timer Are we timing this ?
+ */
+void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int count = c->hydro.count;
+  struct part *restrict parts = c->hydro.parts;
+  struct xpart *restrict xparts = c->hydro.xparts;
+
+  TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that we only limit local cells. */
+  if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope.");
+#endif
+
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
+
+  /* Limit irrespective of cell flags? */
+  force |= c->hydro.do_limiter;
+
+  /* Early abort? */
+  if (c->hydro.count == 0) {
+
+    /* Clear the limiter flags. */
+    c->hydro.do_limiter = 0;
+    c->hydro.do_sub_limiter = 0;
+    return;
+  }
+
+  /* Loop over the progeny ? */
+  if (c->split && (force || c->hydro.do_sub_limiter)) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *restrict cp = c->progeny[k];
+
+        /* Recurse */
+        runner_do_limiter(r, cp, force, 0);
+
+        /* And aggregate */
+        ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min);
+        ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max);
+        ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max);
+        ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min);
+        ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max);
+        ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max);
+      }
+    }
+
+    /* Store the updated values */
+    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
+    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
+    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
+    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
+    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
+    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
+
+  } else if (!c->split && force) {
+
+    ti_hydro_end_min = c->hydro.ti_end_min;
+    ti_hydro_end_max = c->hydro.ti_end_max;
+    ti_hydro_beg_max = c->hydro.ti_beg_max;
+    ti_gravity_end_min = c->grav.ti_end_min;
+    ti_gravity_end_max = c->grav.ti_end_max;
+    ti_gravity_beg_max = c->grav.ti_beg_max;
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* Avoid inhibited particles */
+      if (part_is_inhibited(p, e)) continue;
+
+      /* If the particle will be active no need to wake it up */
+      if (part_is_active(p, e) && p->wakeup != time_bin_not_awake)
+        p->wakeup = time_bin_not_awake;
+
+      /* Bip, bip, bip... wake-up time */
+      if (p->wakeup == time_bin_awake) {
+
+        /* Apply the limiter and get the new time-step size */
+        const integertime_t ti_new_step = timestep_limit_part(p, xp, e);
+
+        /* What is the next sync-point ? */
+        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
+
+        /* What is the next starting point for this cell ? */
+        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
+
+        /* Also limit the gpart counter-part */
+        if (p->gpart != NULL) {
+
+          /* Register the time-bin */
+          p->gpart->time_bin = p->time_bin;
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+        }
+      }
+    }
+
+    /* Store the updated values */
+    c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min);
+    c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max);
+    c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max);
+    c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min);
+    c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max);
+    c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max);
+  }
+
+  /* Clear the limiter flags. */
+  c->hydro.do_limiter = 0;
+  c->hydro.do_sub_limiter = 0;
+
+  if (timer) TIMER_TOC(timer_do_limiter);
+}
+
 /**
  * @brief End the force calculation of all active particles in a cell
  * by multiplying the acccelerations by the relevant constants
@@ -2773,6 +2988,8 @@ void *runner_main(void *data) {
 #endif
           else if (t->subtype == task_subtype_force)
             runner_doself2_branch_force(r, ci);
+          else if (t->subtype == task_subtype_limiter)
+            runner_doself2_branch_limiter(r, ci);
           else if (t->subtype == task_subtype_grav)
             runner_doself_recursive_grav(r, ci, 1);
           else if (t->subtype == task_subtype_external_grav)
@@ -2794,6 +3011,8 @@ void *runner_main(void *data) {
 #endif
           else if (t->subtype == task_subtype_force)
             runner_dopair2_branch_force(r, ci, cj);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dopair2_branch_limiter(r, ci, cj);
           else if (t->subtype == task_subtype_grav)
             runner_dopair_recursive_grav(r, ci, cj, 1);
           else if (t->subtype == task_subtype_stars_density)
@@ -2813,6 +3032,8 @@ void *runner_main(void *data) {
 #endif
           else if (t->subtype == task_subtype_force)
             runner_dosub_self2_force(r, ci, 1);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dosub_self2_limiter(r, ci, 1);
           else if (t->subtype == task_subtype_stars_density)
             runner_dosub_self_stars_density(r, ci, 1);
           else if (t->subtype == task_subtype_stars_feedback)
@@ -2830,6 +3051,8 @@ void *runner_main(void *data) {
 #endif
           else if (t->subtype == task_subtype_force)
             runner_dosub_pair2_force(r, ci, cj, t->flags, 1);
+          else if (t->subtype == task_subtype_limiter)
+            runner_dosub_pair2_limiter(r, ci, cj, t->flags, 1);
           else if (t->subtype == task_subtype_stars_density)
             runner_dosub_pair_stars_density(r, ci, cj, t->flags, 1);
           else if (t->subtype == task_subtype_stars_feedback)
@@ -2889,6 +3112,9 @@ void *runner_main(void *data) {
         case task_type_timestep:
           runner_do_timestep(r, ci, 1);
           break;
+        case task_type_timestep_limiter:
+          runner_do_limiter(r, ci, 0, 1);
+          break;
 #ifdef WITH_MPI
         case task_type_send:
           if (t->subtype == task_subtype_tend) {
@@ -2905,6 +3131,8 @@ void *runner_main(void *data) {
             runner_do_recv_part(r, ci, 0, 1);
           } else if (t->subtype == task_subtype_gradient) {
             runner_do_recv_part(r, ci, 0, 1);
+          } else if (t->subtype == task_subtype_limiter) {
+            runner_do_recv_part(r, ci, 0, 1);
           } else if (t->subtype == task_subtype_gpart) {
             runner_do_recv_gpart(r, ci, 1);
           } else if (t->subtype == task_subtype_spart) {
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 53cf51ed400f82d0e195e38dd08fcc5af16f1ad7..861798b70b8ba90b9267375253bd8570baec3e9a 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -168,8 +168,11 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
 
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts_i[pid];
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
+
     const int pi_active = part_is_active(pi, e);
-    const int pi_inhibited = part_is_inhibited(pi, e);
     const float hi = pi->h;
     const float hig2 = hi * hi * kernel_gamma2;
     const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])),
@@ -181,10 +184,13 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
 
       /* Get a pointer to the jth particle. */
       struct part *restrict pj = &parts_j[pjd];
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
       const float hj = pj->h;
       const float hjg2 = hj * hj * kernel_gamma2;
       const int pj_active = part_is_active(pj, e);
-      const int pj_inhibited = part_is_inhibited(pj, e);
 
       /* Compute the pairwise distance. */
       const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
@@ -195,21 +201,21 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Check that particles have been drifted to the current time */
-      if (pi->ti_drift != e->ti_current && !pi_inhibited)
+      if (pi->ti_drift != e->ti_current)
         error("Particle pi not drifted to current time");
-      if (pj->ti_drift != e->ti_current && !pj_inhibited)
+      if (pj->ti_drift != e->ti_current)
         error("Particle pj not drifted to current time");
 #endif
 
       /* Hit or miss? */
-      if (r2 < hig2 && pi_active && !pj_inhibited) {
+      if (r2 < hig2 && pi_active) {
 
         IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
         runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
 #endif
       }
-      if (r2 < hjg2 && pj_active && !pi_inhibited) {
+      if (r2 < hjg2 && pj_active) {
 
         dx[0] = -dx[0];
         dx[1] = -dx[1];
@@ -270,8 +276,11 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
 
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts_i[pid];
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
+
     const int pi_active = part_is_active(pi, e);
-    const int pi_inhibited = part_is_inhibited(pi, e);
     const float hi = pi->h;
     const float hig2 = hi * hi * kernel_gamma2;
     const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])),
@@ -283,8 +292,11 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
 
       /* Get a pointer to the jth particle. */
       struct part *restrict pj = &parts_j[pjd];
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
       const int pj_active = part_is_active(pj, e);
-      const int pj_inhibited = part_is_inhibited(pj, e);
       const float hj = pj->h;
       const float hjg2 = hj * hj * kernel_gamma2;
 
@@ -297,28 +309,28 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Check that particles have been drifted to the current time */
-      if (pi->ti_drift != e->ti_current && !pj_inhibited)
+      if (pi->ti_drift != e->ti_current)
         error("Particle pi not drifted to current time");
-      if (pj->ti_drift != e->ti_current && !pi_inhibited)
+      if (pj->ti_drift != e->ti_current)
         error("Particle pj not drifted to current time");
 #endif
 
       /* Hit or miss? */
       if (r2 < hig2 || r2 < hjg2) {
 
-        if (pi_active && pj_active && !pi_inhibited && !pj_inhibited) {
+        if (pi_active && pj_active) {
 
           IACT(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
           runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H);
 #endif
-        } else if (pi_active && !pj_inhibited) {
+        } else if (pi_active) {
 
           IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
           runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
 #endif
-        } else if (pj_active && !pi_inhibited) {
+        } else if (pj_active) {
 
           dx[0] = -dx[0];
           dx[1] = -dx[1];
@@ -366,8 +378,11 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) {
 
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts[pid];
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
+
     const int pi_active = part_is_active(pi, e);
-    const int pi_inhibited = part_is_inhibited(pi, e);
     const float hi = pi->h;
     const float hig2 = hi * hi * kernel_gamma2;
     const float pix[3] = {(float)(pi->x[0] - c->loc[0]),
@@ -379,10 +394,13 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) {
 
       /* Get a pointer to the jth particle. */
       struct part *restrict pj = &parts[pjd];
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
       const float hj = pj->h;
       const float hjg2 = hj * hj * kernel_gamma2;
       const int pj_active = part_is_active(pj, e);
-      const int pj_inhibited = part_is_inhibited(pj, e);
 
       /* Compute the pairwise distance. */
       const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
@@ -391,14 +409,14 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) {
       float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
       const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
-      const int doi = pi_active && (r2 < hig2) && !pj_inhibited;
-      const int doj = pj_active && (r2 < hjg2) && !pi_inhibited;
+      const int doi = pi_active && (r2 < hig2);
+      const int doj = pj_active && (r2 < hjg2);
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Check that particles have been drifted to the current time */
-      if (pi->ti_drift != e->ti_current && !pi_inhibited)
+      if (pi->ti_drift != e->ti_current)
         error("Particle pi not drifted to current time");
-      if (pj->ti_drift != e->ti_current && !pj_inhibited)
+      if (pj->ti_drift != e->ti_current)
         error("Particle pj not drifted to current time");
 #endif
 
@@ -462,8 +480,11 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {
 
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts[pid];
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
+
     const int pi_active = part_is_active(pi, e);
-    const int pi_inhibited = part_is_inhibited(pi, e);
     const float hi = pi->h;
     const float hig2 = hi * hi * kernel_gamma2;
     const float pix[3] = {(float)(pi->x[0] - c->loc[0]),
@@ -475,10 +496,13 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {
 
       /* Get a pointer to the jth particle. */
       struct part *restrict pj = &parts[pjd];
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
       const float hj = pj->h;
       const float hjg2 = hj * hj * kernel_gamma2;
       const int pj_active = part_is_active(pj, e);
-      const int pj_inhibited = part_is_inhibited(pj, e);
 
       /* Compute the pairwise distance. */
       const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
@@ -487,16 +511,14 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {
       float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
       const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
 
-      const int doi =
-          pi_active && ((r2 < hig2) || (r2 < hjg2)) && !pj_inhibited;
-      const int doj =
-          pj_active && ((r2 < hig2) || (r2 < hjg2)) && !pi_inhibited;
+      const int doi = pi_active && ((r2 < hig2) || (r2 < hjg2));
+      const int doj = pj_active && ((r2 < hig2) || (r2 < hjg2));
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Check that particles have been drifted to the current time */
-      if (pi->ti_drift != e->ti_current && !pi_inhibited)
+      if (pi->ti_drift != e->ti_current)
         error("Particle pi not drifted to current time");
-      if (pj->ti_drift != e->ti_current && !pj_inhibited)
+      if (pj->ti_drift != e->ti_current)
         error("Particle pj not drifted to current time");
 #endif
 
@@ -581,7 +603,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
 
       /* Get a pointer to the jth particle. */
       struct part *restrict pj = &parts_j[pjd];
-      const int pj_inhibited = part_is_inhibited(pj, e);
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
 
       /* Compute the pairwise distance. */
       float r2 = 0.0f;
@@ -595,12 +619,12 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
       /* Check that particles have been drifted to the current time */
       if (pi->ti_drift != e->ti_current)
         error("Particle pi not drifted to current time");
-      if (pj->ti_drift != e->ti_current && !pj_inhibited)
+      if (pj->ti_drift != e->ti_current)
         error("Particle pj not drifted to current time");
 #endif
 
       /* Hit or miss? */
-      if (r2 < hig2 && !pj_inhibited) {
+      if (r2 < hig2) {
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -669,7 +693,10 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 
         /* Get a pointer to the jth particle. */
         struct part *restrict pj = &parts_j[sort_j[pjd].i];
-        const int pj_inhibited = part_is_inhibited(pj, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
         const float hj = pj->h;
         const double pjx = pj->x[0];
         const double pjy = pj->x[1];
@@ -684,12 +711,12 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
         /* Check that particles have been drifted to the current time */
         if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss? */
-        if (r2 < hig2 && !pj_inhibited) {
+        if (r2 < hig2) {
 
           IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -721,7 +748,10 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 
         /* Get a pointer to the jth particle. */
         struct part *restrict pj = &parts_j[sort_j[pjd].i];
-        const int pj_inhibited = part_is_inhibited(pj, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
         const float hj = pj->h;
         const double pjx = pj->x[0];
         const double pjy = pj->x[1];
@@ -736,12 +766,12 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
         /* Check that particles have been drifted to the current time */
         if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss? */
-        if (r2 < hig2 && !pj_inhibited) {
+        if (r2 < hig2) {
 
           IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -858,7 +888,10 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 
       /* Get a pointer to the jth particle. */
       struct part *restrict pj = &parts_j[pjd];
-      const int pj_inhibited = part_is_inhibited(pj, e);
+
+      /* Skip inhibited particles. */
+      if (part_is_inhibited(pj, e)) continue;
+
       const float hj = pj->h;
 
       /* Compute the pairwise distance. */
@@ -872,12 +905,12 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
       /* Check that particles have been drifted to the current time */
       if (pi->ti_drift != e->ti_current)
         error("Particle pi not drifted to current time");
-      if (pj->ti_drift != e->ti_current && !pj_inhibited)
+      if (pj->ti_drift != e->ti_current)
         error("Particle pj not drifted to current time");
 #endif
 
       /* Hit or miss? */
-      if (r2 > 0.f && r2 < hig2 && !pj_inhibited) {
+      if (r2 > 0.f && r2 < hig2) {
 
         IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -992,7 +1025,10 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
         /* Recover pj */
         struct part *pj = &parts_j[sort_j[pjd].i];
-        const int pj_inhibited = part_is_inhibited(pj, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
         const float hj = pj->h;
         const float pjx = pj->x[0] - cj->loc[0];
         const float pjy = pj->x[1] - cj->loc[1];
@@ -1032,12 +1068,12 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
         /* Check that particles have been drifted to the current time */
         if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss? */
-        if (r2 < hig2 && !pj_inhibited) {
+        if (r2 < hig2) {
 
           IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -1076,7 +1112,10 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
         /* Recover pi */
         struct part *pi = &parts_i[sort_i[pid].i];
-        const int pi_inhibited = part_is_inhibited(pi, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pi, e)) continue;
+
         const float hi = pi->h;
         const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
         const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
@@ -1114,14 +1153,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
               pjz, ci->width[2]);
 
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
         if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss? */
-        if (r2 < hjg2 && !pi_inhibited) {
+        if (r2 < hjg2) {
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -1335,7 +1374,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
     /* Get a hold of the ith part in ci. */
     struct part *pi = &parts_i[sort_i[pid].i];
-    const int pi_inhibited = part_is_inhibited(pi, e);
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
+
     const float hi = pi->h;
 
     /* Is there anything we need to interact with (for this specific hi) ? */
@@ -1397,7 +1439,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
               pjz, ci->width[2]);
 
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
         if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
@@ -1405,7 +1447,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
         /* Hit or miss?
            (note that we will do the other condition in the reverse loop) */
-        if (r2 < hig2 && !pi_inhibited) {
+        if (r2 < hig2) {
           IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
           runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H);
@@ -1421,7 +1463,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
         /* Recover pj */
         struct part *pj = &parts_j[sort_j[pjd].i];
-        const int pj_inhibited = part_is_inhibited(pj, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
         const float hj = pj->h;
 
         /* Get the position of pj in the right frame */
@@ -1461,14 +1506,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
               pjz, ci->width[2]);
 
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
         /* Hit or miss?
            (note that we will do the other condition in the reverse loop) */
-        if (r2 < hig2 && !pj_inhibited) {
+        if (r2 < hig2) {
 
           /* Does pj need to be updated too? */
           if (part_is_active(pj, e)) {
@@ -1496,7 +1541,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
     /* Get a hold of the jth part in cj. */
     struct part *pj = &parts_j[sort_j[pjd].i];
-    const int pj_inhibited = part_is_inhibited(pj, e);
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pj, e)) continue;
+
     const float hj = pj->h;
 
     /* Is there anything we need to interact with (for this specific hj) ? */
@@ -1561,13 +1609,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
         /* Check that particles have been drifted to the current time */
         if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss?
            (note that we must avoid the r2 < hig2 cases we already processed) */
-        if (r2 < hjg2 && r2 >= hig2 && !pj_inhibited) {
+        if (r2 < hjg2 && r2 >= hig2) {
           IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
           runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
@@ -1584,7 +1632,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
 
         /* Recover pi */
         struct part *pi = &parts_i[sort_i[pid].i];
-        const int pi_inhibited = part_is_inhibited(pi, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pi, e)) continue;
+
         const float hi = pi->h;
         const float hig2 = hi * hi * kernel_gamma2;
 
@@ -1625,15 +1676,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
               pjz, ci->width[2]);
 
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss?
            (note that we must avoid the r2 < hig2 cases we already processed) */
-        if (r2 < hjg2 && r2 >= hig2 && !pi_inhibited) {
+        if (r2 < hjg2 && r2 >= hig2) {
 
           /* Does pi need to be updated too? */
           if (part_is_active(pi, e)) {
@@ -1788,7 +1839,9 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
     /* Get a pointer to the ith particle. */
     struct part *restrict pi = &parts[pid];
-    const int pi_inhibited = part_is_inhibited(pi, e);
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
 
     /* Get the particle position and radius. */
     double pix[3];
@@ -1808,7 +1861,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
         if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
@@ -1823,7 +1876,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
         }
 
         /* Hit or miss? */
-        if (r2 < hj * hj * kernel_gamma2 && !pi_inhibited) {
+        if (r2 < hj * hj * kernel_gamma2) {
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -1844,7 +1897,10 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
         /* Get a pointer to the jth particle. */
         struct part *restrict pj = &parts[pjd];
-        const int pj_inhibited = part_is_inhibited(pj, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
         const float hj = pj->h;
 
         /* Compute the pairwise distance. */
@@ -1861,9 +1917,9 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
@@ -1877,13 +1933,13 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
             runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H);
 #endif
-          } else if (doi && !pj_inhibited) {
+          } else if (doi) {
 
             IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
             runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
 #endif
-          } else if (doj && !pi_inhibited) {
+          } else if (doj) {
 
             dx[0] = -dx[0];
             dx[1] = -dx[1];
@@ -1972,7 +2028,9 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
     /* Get a pointer to the ith particle. */
     struct part *restrict pi = &parts[pid];
-    const int pi_inhibited = part_is_inhibited(pi, e);
+
+    /* Skip inhibited particles. */
+    if (part_is_inhibited(pi, e)) continue;
 
     /* Get the particle position and radius. */
     double pix[3];
@@ -2000,14 +2058,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
         if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss? */
-        if ((r2 < hig2 || r2 < hj * hj * kernel_gamma2) && !pi_inhibited) {
+        if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
 #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
@@ -2028,7 +2086,10 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
         /* Get a pointer to the jth particle. */
         struct part *restrict pj = &parts[pjd];
-        const int pj_inhibited = part_is_inhibited(pj, e);
+
+        /* Skip inhibited particles. */
+        if (part_is_inhibited(pj, e)) continue;
+
         const float hj = pj->h;
 
         /* Compute the pairwise distance. */
@@ -2041,14 +2102,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Check that particles have been drifted to the current time */
-        if (pi->ti_drift != e->ti_current && !pi_inhibited)
+        if (pi->ti_drift != e->ti_current)
           error("Particle pi not drifted to current time");
-        if (pj->ti_drift != e->ti_current && !pj_inhibited)
+        if (pj->ti_drift != e->ti_current)
           error("Particle pj not drifted to current time");
 #endif
 
         /* Hit or miss? */
-        if ((r2 < hig2 || r2 < hj * hj * kernel_gamma2) && !pj_inhibited) {
+        if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
 
           /* Does pj need to be updated too? */
           if (part_is_active(pj, e)) {
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index c74fa7c8f53576f2e80578488fdf3378c59c0400..75bd86a6b4c5aa6c229ffeffa0d43c61e2948b72 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -23,9 +23,6 @@
 /* This object's header. */
 #include "runner_doiact_vec.h"
 
-/* Local headers. */
-#include "active.h"
-
 #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
 
 static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
@@ -68,8 +65,6 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
     vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
     vector v_viz, int *icount_align) {
 
-  mask_t int_mask, int_mask2;
-
   /* Work out the number of remainder interactions and pad secondary cache. */
   *icount_align = icount;
   int rem = icount % (NUM_VEC_PROC * VEC_SIZE);
@@ -78,6 +73,7 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
     *icount_align += pad;
 
     /* Initialise masks to true. */
+    mask_t int_mask, int_mask2;
     vec_init_mask_true(int_mask);
     vec_init_mask_true(int_mask2);
 
@@ -654,7 +650,6 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
 
   /* Get some local variables */
   const struct engine *e = r->e;
-  const timebin_t max_active_bin = e->max_active_bin;
   struct part *restrict parts = c->hydro.parts;
   const int count = c->hydro.count;
 
@@ -663,12 +658,13 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
   /* Anything to do here? */
   if (!cell_is_active_hydro(c, e)) return;
 
+  /* Check that everybody was drifted here */
   if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
 
 #ifdef SWIFT_DEBUG_CHECKS
   for (int i = 0; i < count; i++) {
     /* Check that particles have been drifted to the current time */
-    if (parts[i].ti_drift != e->ti_current)
+    if (parts[i].ti_drift != e->ti_current && !part_is_inhibited(&parts[i], e))
       error("Particle pi not drifted to current time");
   }
 #endif
@@ -679,7 +675,7 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
   if (cell_cache->count < count) cache_init(cell_cache, count);
 
   /* Read the particles from the cell and store them locally in the cache. */
-  cache_read_particles(c, cell_cache);
+  const int count_align = cache_read_particles(c, cell_cache);
 
   /* Create secondary cache to store particle interactions. */
   struct c2_cache int_cache;
@@ -690,25 +686,23 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
     /* Get a pointer to the ith particle. */
     struct part *restrict pi = &parts[pid];
 
-    /* Is the ith particle active? */
-    if (!part_is_active_no_debug(pi, max_active_bin)) continue;
-
-    const float hi = cell_cache->h[pid];
+    /* Is the i^th particle active? */
+    if (!part_is_active(pi, e)) continue;
 
     /* Fill particle pi vectors. */
     const vector v_pix = vector_set1(cell_cache->x[pid]);
     const vector v_piy = vector_set1(cell_cache->y[pid]);
     const vector v_piz = vector_set1(cell_cache->z[pid]);
-    const vector v_hi = vector_set1(hi);
+    const vector v_hi = vector_set1(cell_cache->h[pid]);
     const vector v_vix = vector_set1(cell_cache->vx[pid]);
     const vector v_viy = vector_set1(cell_cache->vy[pid]);
     const vector v_viz = vector_set1(cell_cache->vz[pid]);
 
+    /* Some useful mulitples of h */
+    const float hi = cell_cache->h[pid];
     const float hig2 = hi * hi * kernel_gamma2;
     const vector v_hig2 = vector_set1(hig2);
-
-    /* Get the inverse of hi. */
-    vector v_hi_inv = vec_reciprocal(v_hi);
+    const vector v_hi_inv = vec_reciprocal(v_hi);
 
     /* Reset cumulative sums of update vectors. */
     vector v_rhoSum = vector_setzero();
@@ -720,21 +714,6 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
     vector v_curlvySum = vector_setzero();
     vector v_curlvzSum = vector_setzero();
 
-    /* Pad cache if there is a serial remainder. */
-    int count_align = count;
-    const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
-    if (rem != 0) {
-      count_align += (NUM_VEC_PROC * VEC_SIZE) - rem;
-
-      /* Set positions to the same as particle pi so when the r2 > 0 mask is
-       * applied these extra contributions are masked out.*/
-      for (int i = count; i < count_align; i++) {
-        cell_cache->x[i] = v_pix.f[0];
-        cell_cache->y[i] = v_piy.f[0];
-        cell_cache->z[i] = v_piz.f[0];
-      }
-    }
-
     /* The number of interactions for pi and the padded version of it to
      * make it a multiple of VEC_SIZE. */
     int icount = 0, icount_align = 0;
@@ -771,8 +750,8 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
       v_r2_2.v = vec_fma(v_dz_2.v, v_dz_2.v, v_r2_2.v);
 
       /* Form a mask from r2 < hig2 and r2 > 0.*/
-      mask_t v_doi_mask, v_doi_mask_self_check, v_doi_mask2,
-          v_doi_mask2_self_check;
+      mask_t v_doi_mask, v_doi_mask2;
+      mask_t v_doi_mask_self_check, v_doi_mask2_self_check;
 
       /* Form r2 > 0 mask and r2 < hig2 mask. */
       vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero()));
@@ -789,6 +768,25 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
       const int doi_mask2 = vec_is_mask_true(v_doi_mask2) &
                             vec_is_mask_true(v_doi_mask2_self_check);
 
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Verify that we have no inhibited particles in the interaction cache */
+      for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+        if (doi_mask & (1 << bit_index)) {
+          if (parts[pjd + bit_index].time_bin >= time_bin_inhibited) {
+            error("Inhibited particle in interaction cache!");
+          }
+        }
+      }
+      for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+        if (doi_mask2 & (1 << bit_index)) {
+          if (parts[pjd + VEC_SIZE + bit_index].time_bin >=
+              time_bin_inhibited) {
+            error("Inhibited particle in interaction cache2!");
+          }
+        }
+      }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
       for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
         if (doi_mask & (1 << bit_index)) {
@@ -837,7 +835,7 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
     vec_init_mask_true(int_mask);
     vec_init_mask_true(int_mask2);
 
-    /* Perform interaction with 2 vectors. */
+    /* Perform interaction with NUM_VEC_PROC vectors. */
     for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
       runner_iact_nonsym_2_vec_density(
           &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
@@ -848,8 +846,7 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c) {
           &v_curlvzSum, int_mask, int_mask2, 0);
     }
 
-    /* Perform horizontal adds on vector sums and store result in particle pi.
-     */
+    /* Perform horizontal adds on vector sums and store result in pi. */
     VEC_HADD(v_rhoSum, pi->rho);
     VEC_HADD(v_rho_dhSum, pi->density.rho_dh);
     VEC_HADD(v_wcountSum, pi->density.wcount);
@@ -899,7 +896,7 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c,
   if (cell_cache->count < count) cache_init(cell_cache, count);
 
   /* Read the particles from the cell and store them locally in the cache. */
-  cache_read_particles(c, cell_cache);
+  const int count_align = cache_read_particles(c, cell_cache);
 
   /* Create secondary cache to store particle interactions. */
   struct c2_cache int_cache;
@@ -942,23 +939,6 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c,
     vector v_curlvySum = vector_setzero();
     vector v_curlvzSum = vector_setzero();
 
-    /* Pad cache if there is a serial remainder. */
-    int count_align = count;
-    const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
-    if (rem != 0) {
-      const int pad = (NUM_VEC_PROC * VEC_SIZE) - rem;
-
-      count_align += pad;
-
-      /* Set positions to the same as particle pi so when the r2 > 0 mask is
-       * applied these extra contributions are masked out.*/
-      for (int i = count; i < count_align; i++) {
-        cell_cache->x[i] = v_pix.f[0];
-        cell_cache->y[i] = v_piy.f[0];
-        cell_cache->z[i] = v_piz.f[0];
-      }
-    }
-
     /* The number of interactions for pi and the padded version of it to
      * make it a multiple of VEC_SIZE. */
     int icount = 0, icount_align = 0;
@@ -1015,9 +995,33 @@ void runner_doself_subset_density_vec(struct runner *r, struct cell *restrict c,
       const int doi_mask2 = vec_is_mask_true(v_doi_mask2) &
                             vec_is_mask_true(v_doi_mask2_self_check);
 
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Verify that we have no inhibited particles in the interaction cache */
+      for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+        struct part *restrict parts_i = c->hydro.parts;
+
+        if (doi_mask & (1 << bit_index)) {
+          if (parts_i[pjd + bit_index].time_bin >= time_bin_inhibited) {
+            error("Inhibited particle in interaction cache!");
+          }
+        }
+      }
+      for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+        struct part *restrict parts_i = c->hydro.parts;
+
+        if (doi_mask2 & (1 << bit_index)) {
+          if (parts_i[pjd + VEC_SIZE + bit_index].time_bin >=
+              time_bin_inhibited) {
+            error("Inhibited particle in interaction cache2!");
+          }
+        }
+      }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
-      struct part *restrict parts_i = c->hydro.parts;
       for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+        struct part *restrict parts_i = c->hydro.parts;
+
         if (doi_mask & (1 << bit_index)) {
           if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS)
             pi->ids_ngbs_density[pi->num_ngb_density] =
@@ -1112,7 +1116,6 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
 
   const struct engine *e = r->e;
   const struct cosmology *restrict cosmo = e->cosmology;
-  const timebin_t max_active_bin = e->max_active_bin;
   struct part *restrict parts = c->hydro.parts;
   const int count = c->hydro.count;
 
@@ -1126,7 +1129,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
 #ifdef SWIFT_DEBUG_CHECKS
   for (int i = 0; i < count; i++) {
     /* Check that particles have been drifted to the current time */
-    if (parts[i].ti_drift != e->ti_current)
+    if (parts[i].ti_drift != e->ti_current && !part_is_inhibited(&parts[i], e))
       error("Particle pi not drifted to current time");
   }
 #endif
@@ -1138,7 +1141,7 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
   if (cell_cache->count < count) cache_init(cell_cache, count);
 
   /* Read the particles from the cell and store them locally in the cache. */
-  cache_read_force_particles(c, cell_cache);
+  const int count_align = cache_read_force_particles(c, cell_cache);
 
   /* Cosmological terms */
   const float a = cosmo->a;
@@ -1150,16 +1153,14 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
     /* Get a pointer to the ith particle. */
     struct part *restrict pi = &parts[pid];
 
-    /* Is the ith particle active? */
-    if (!part_is_active_no_debug(pi, max_active_bin)) continue;
-
-    const float hi = cell_cache->h[pid];
+    /* Is the i^th particle active? */
+    if (!part_is_active(pi, e)) continue;
 
     /* Fill particle pi vectors. */
     const vector v_pix = vector_set1(cell_cache->x[pid]);
     const vector v_piy = vector_set1(cell_cache->y[pid]);
     const vector v_piz = vector_set1(cell_cache->z[pid]);
-    const vector v_hi = vector_set1(hi);
+    const vector v_hi = vector_set1(cell_cache->h[pid]);
     const vector v_vix = vector_set1(cell_cache->vx[pid]);
     const vector v_viy = vector_set1(cell_cache->vy[pid]);
     const vector v_viz = vector_set1(cell_cache->vz[pid]);
@@ -1170,11 +1171,11 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
     const vector v_balsara_i = vector_set1(cell_cache->balsara[pid]);
     const vector v_ci = vector_set1(cell_cache->soundspeed[pid]);
 
+    /* Some useful powers of h */
+    const float hi = cell_cache->h[pid];
     const float hig2 = hi * hi * kernel_gamma2;
     const vector v_hig2 = vector_set1(hig2);
-
-    /* Get the inverse of hi. */
-    vector v_hi_inv = vec_reciprocal(v_hi);
+    const vector v_hi_inv = vec_reciprocal(v_hi);
 
     /* Reset cumulative sums of update vectors. */
     vector v_a_hydro_xSum = vector_setzero();
@@ -1184,39 +1185,18 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
     vector v_sigSum = vector_set1(pi->force.v_sig);
     vector v_entropy_dtSum = vector_setzero();
 
-    /* Pad cache if there is a serial remainder. */
-    int count_align = count;
-    int rem = count % VEC_SIZE;
-    if (rem != 0) {
-      int pad = VEC_SIZE - rem;
-
-      count_align += pad;
-
-      /* Set positions to the same as particle pi so when the r2 > 0 mask is
-       * applied these extra contributions are masked out.*/
-      for (int i = count; i < count_align; i++) {
-        cell_cache->x[i] = v_pix.f[0];
-        cell_cache->y[i] = v_piy.f[0];
-        cell_cache->z[i] = v_piz.f[0];
-        cell_cache->h[i] = 1.f;
-        cell_cache->rho[i] = 1.f;
-        cell_cache->grad_h[i] = 1.f;
-        cell_cache->pOrho2[i] = 1.f;
-        cell_cache->balsara[i] = 1.f;
-        cell_cache->soundspeed[i] = 1.f;
-      }
-    }
-
     /* Find all of particle pi's interacions and store needed values in the
      * secondary cache.*/
     for (int pjd = 0; pjd < count_align; pjd += VEC_SIZE) {
 
       /* Load 1 set of vectors from the particle cache. */
-      vector hjg2;
       const vector v_pjx = vector_load(&cell_cache->x[pjd]);
       const vector v_pjy = vector_load(&cell_cache->y[pjd]);
       const vector v_pjz = vector_load(&cell_cache->z[pjd]);
       const vector hj = vector_load(&cell_cache->h[pjd]);
+
+      /* (hj * gamma)^2 */
+      vector hjg2;
       hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v);
 
       /* Compute the pairwise distance. */
@@ -1229,20 +1209,33 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
       v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v);
       v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v);
 
-      /* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */
-      mask_t v_doi_mask, v_doi_mask_self_check;
-
-      /* Form r2 > 0 mask.*/
+      /* Form r2 > 0 mask.
+       * This is used to avoid self-interctions */
+      mask_t v_doi_mask_self_check;
       vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero()));
 
-      /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
-      vector v_h2;
-      v_h2.v = vec_fmax(v_hig2.v, hjg2.v);
-      vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v));
+      /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask.
+       * This is writen as r2 < max(hig2, hjg2) */
+      mask_t v_doi_mask;
+      vec_create_mask(v_doi_mask,
+                      vec_cmp_lt(v_r2.v, vec_fmax(v_hig2.v, hjg2.v)));
 
-      /* Combine all 3 masks. */
+      /* Combine both masks. */
       vec_combine_masks(v_doi_mask, v_doi_mask_self_check);
 
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Verify that we have no inhibited particles in the interaction cache */
+      for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+        if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
+          if ((pjd + bit_index < count) &&
+              (parts[pjd + bit_index].time_bin >= time_bin_inhibited)) {
+            error("Inhibited particle in interaction cache! id=%lld",
+                  parts[pjd + bit_index].id);
+          }
+        }
+      }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
       for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
         if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
@@ -1255,10 +1248,14 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
 
       /* If there are any interactions perform them. */
       if (vec_is_mask_true(v_doi_mask)) {
-        vector v_hj_inv = vec_reciprocal(hj);
 
-        /* To stop floating point exceptions for when particle separations are
-         * 0. */
+        /* 1 / hj */
+        const vector v_hj_inv = vec_reciprocal(hj);
+
+        /* To stop floating point exceptions when particle separations are 0.
+         * Note that the results for r2==0 are masked out but may still raise
+         * an FPE as only the final operaion is masked, not the whole math
+         * operations sequence. */
         v_r2.v = vec_add(v_r2.v, vec_set1(FLT_MIN));
 
         runner_iact_nonsym_1_vec_force(
@@ -1278,9 +1275,10 @@ void runner_doself2_force_vec(struct runner *r, struct cell *restrict c) {
     VEC_HADD(v_a_hydro_ySum, pi->a_hydro[1]);
     VEC_HADD(v_a_hydro_zSum, pi->a_hydro[2]);
     VEC_HADD(v_h_dtSum, pi->force.h_dt);
-    VEC_HMAX(v_sigSum, pi->force.v_sig);
     VEC_HADD(v_entropy_dtSum, pi->entropy_dt);
 
+    VEC_HMAX(v_sigSum, pi->force.v_sig);
+
   } /* loop over all particles. */
 
   TIMER_TOC(timer_doself_force);
@@ -1341,10 +1339,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that particles have been drifted to the current time */
   for (int pid = 0; pid < count_i; pid++)
-    if (parts_i[pid].ti_drift != e->ti_current)
+    if (parts_i[pid].ti_drift != e->ti_current &&
+        !part_is_inhibited(&parts_i[pid], e))
       error("Particle pi not drifted to current time");
   for (int pjd = 0; pjd < count_j; pjd++)
-    if (parts_j[pjd].ti_drift != e->ti_current)
+    if (parts_j[pjd].ti_drift != e->ti_current &&
+        !part_is_inhibited(&parts_j[pjd], e))
       error("Particle pj not drifted to current time");
 #endif
 
@@ -1497,6 +1497,21 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
         /* Form r2 < hig2 mask. */
         vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v));
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Verify that we have no inhibited particles in the interaction cache
+         */
+        for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
+            if ((pjd + bit_index < count_j) &&
+                (parts_j[sort_j[pjd + bit_index].i].time_bin >=
+                 time_bin_inhibited)) {
+              error("Inhibited particle in interaction cache! id=%lld",
+                    parts_j[sort_j[pjd + bit_index].i].id);
+            }
+          }
+        }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
         for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
           if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
@@ -1623,6 +1638,21 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
         /* Form r2 < hig2 mask. */
         vec_create_mask(v_doj_mask, vec_cmp_lt(v_r2.v, v_hjg2.v));
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Verify that we have no inhibited particles in the interaction cache
+         */
+        for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) {
+            if ((ci_cache_idx + first_pi + bit_index < count_i) &&
+                (parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i]
+                     .time_bin >= time_bin_inhibited)) {
+              error("Inhibited particle in interaction cache! id=%lld",
+                    parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i].id);
+            }
+          }
+        }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
         for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
           if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) {
@@ -1805,9 +1835,27 @@ void runner_dopair_subset_density_vec(struct runner *r,
         mask_t v_doi_mask;
         vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v));
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Verify that we have no inhibited particles in the interaction cache
+         */
+        for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          struct part *restrict parts_j = cj->hydro.parts;
+
+          if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
+            if ((pjd + bit_index < count_j) &&
+                (parts_j[sort_j[pjd + bit_index].i].time_bin >=
+                 time_bin_inhibited)) {
+              error("Inhibited particle in interaction cache! id=%lld",
+                    parts_j[sort_j[pjd + bit_index].i].id);
+            }
+          }
+        }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
-        struct part *restrict parts_j = cj->hydro.parts;
         for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          struct part *restrict parts_j = cj->hydro.parts;
+
           if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
             if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) {
               pi->ids_ngbs_density[pi->num_ngb_density] =
@@ -1934,9 +1982,27 @@ void runner_dopair_subset_density_vec(struct runner *r,
         mask_t v_doi_mask;
         vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v));
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Verify that we have no inhibited particles in the interaction cache
+         */
+        for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          struct part *restrict parts_j = cj->hydro.parts;
+
+          if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
+            if ((cj_cache_idx + bit_index < count_j) &&
+                (parts_j[sort_j[cj_cache_idx + first_pj + bit_index].i]
+                     .time_bin >= time_bin_inhibited)) {
+              error("Inhibited particle in interaction cache! id=%lld",
+                    parts_j[sort_j[cj_cache_idx + first_pj + bit_index].i].id);
+            }
+          }
+        }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
-        struct part *restrict parts_j = cj->hydro.parts;
         for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          struct part *restrict parts_j = cj->hydro.parts;
+
           if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
             if (pi->num_ngb_density < MAX_NUM_OF_NEIGHBOURS) {
               pi->ids_ngbs_density[pi->num_ngb_density] =
@@ -2032,10 +2098,12 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that particles have been drifted to the current time */
   for (int pid = 0; pid < count_i; pid++)
-    if (parts_i[pid].ti_drift != e->ti_current)
+    if (parts_i[pid].ti_drift != e->ti_current &&
+        !part_is_inhibited(&parts_i[pid], e))
       error("Particle pi not drifted to current time");
   for (int pjd = 0; pjd < count_j; pjd++)
-    if (parts_j[pjd].ti_drift != e->ti_current)
+    if (parts_j[pjd].ti_drift != e->ti_current &&
+        !part_is_inhibited(&parts_j[pjd], e))
       error("Particle pj not drifted to current time");
 #endif
 
@@ -2200,6 +2268,21 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
         v_h2.v = vec_fmax(v_hig2.v, v_hjg2.v);
         vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v));
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Verify that we have no inhibited particles in the interaction cache
+         */
+        for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
+            if ((pjd + bit_index < count_j) &&
+                (parts_j[sort_j[pjd + bit_index].i].time_bin >=
+                 time_bin_inhibited)) {
+              error("Inhibited particle in interaction cache! id=%lld",
+                    parts_j[sort_j[pjd + bit_index].i].id);
+            }
+          }
+        }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
         for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
           if (vec_is_mask_true(v_doi_mask) & (1 << bit_index)) {
@@ -2336,6 +2419,21 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
         v_h2.v = vec_fmax(v_hjg2.v, v_hig2.v);
         vec_create_mask(v_doj_mask, vec_cmp_lt(v_r2.v, v_h2.v));
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Verify that we have no inhibited particles in the interaction cache
+         */
+        for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+          if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) {
+            if ((ci_cache_idx + first_pi + bit_index < count_i) &&
+                (parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i]
+                     .time_bin >= time_bin_inhibited)) {
+              error("Inhibited particle in interaction cache! id=%lld",
+                    parts_i[sort_i[ci_cache_idx + first_pi + bit_index].i].id);
+            }
+          }
+        }
+#endif
+
 #ifdef DEBUG_INTERACTIONS_SPH
         for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
           if (vec_is_mask_true(v_doj_mask) & (1 << bit_index)) {
diff --git a/src/scheduler.c b/src/scheduler.c
index 4e3eb4e29e6cd4a2cd91032d4ee81d203977a59e..ad6af73aec209a19106794636c3b6599baca21e1 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -59,12 +59,43 @@
  */
 void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; }
 
+/**
+ * @brief Increase the space available for unlocks. Only call when
+ *        current index == s->size_unlock;
+ */
+static void scheduler_extend_unlocks(struct scheduler *s) {
+
+  /* Allocate the new buffer. */
+  const int size_unlocks_new = s->size_unlocks * 2;
+  struct task **unlocks_new =
+      (struct task **)malloc(sizeof(struct task *) * size_unlocks_new);
+  int *unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new);
+  if (unlocks_new == NULL || unlock_ind_new == NULL)
+    error("Failed to re-allocate unlocks.");
+
+  /* Wait for all writes to the old buffer to complete. */
+  while (s->completed_unlock_writes < s->size_unlocks)
+    ;
+
+  /* Copy the buffers. */
+  memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * s->size_unlocks);
+  memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * s->size_unlocks);
+  free(s->unlocks);
+  free(s->unlock_ind);
+  s->unlocks = unlocks_new;
+  s->unlock_ind = unlock_ind_new;
+
+  /* Publish the new buffer size. */
+  s->size_unlocks = size_unlocks_new;
+}
+
 /**
  * @brief Add an unlock_task to the given task.
  *
  * @param s The #scheduler.
  * @param ta The unlocking #task.
  * @param tb The #task that will be unlocked.
+
  */
 void scheduler_addunlock(struct scheduler *s, struct task *ta,
                          struct task *tb) {
@@ -77,37 +108,21 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
   const int ind = atomic_inc(&s->nr_unlocks);
 
   /* Does the buffer need to be grown? */
-  if (ind == s->size_unlocks) {
-    /* Allocate the new buffer. */
-    struct task **unlocks_new;
-    int *unlock_ind_new;
-    const int size_unlocks_new = s->size_unlocks * 2;
-    if ((unlocks_new = (struct task **)malloc(sizeof(struct task *) *
-                                              size_unlocks_new)) == NULL ||
-        (unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new)) ==
-            NULL)
-      error("Failed to re-allocate unlocks.");
-
-    /* Wait for all writes to the old buffer to complete. */
-    while (s->completed_unlock_writes < ind)
-      ;
-
-    /* Copy the buffers. */
-    memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * ind);
-    memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * ind);
-    free(s->unlocks);
-    free(s->unlock_ind);
-    s->unlocks = unlocks_new;
-    s->unlock_ind = unlock_ind_new;
-
-    /* Publish the new buffer size. */
-    s->size_unlocks = size_unlocks_new;
-  }
+  if (ind == s->size_unlocks) scheduler_extend_unlocks(s);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ind > s->size_unlocks * 2)
+    message("unlocks guard enabled: %d / %d", ind, s->size_unlocks);
+#endif
 
   /* Wait for there to actually be space at my index. */
   while (ind > s->size_unlocks)
     ;
 
+  /* Guard against case when more than (old) s->size_unlocks unlocks
+   * are now pending. */
+  if (ind == s->size_unlocks) scheduler_extend_unlocks(s);
+
   /* Write the unlock to the scheduler. */
   s->unlocks[ind] = tb;
   s->unlock_ind[ind] = ta - s->tasks;
@@ -115,7 +130,7 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
 }
 
 /**
- * @brief compute the number of same dependencies
+ * @brief compute the number of similar dependencies
  *
  * @param s The #scheduler
  * @param ta The #task
@@ -513,7 +528,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) {
   /* Be clean */
   free(task_dep);
 
-  if (verbose && s->nodeID == 0)
+  if (verbose)
     message("Printing task graph took %.3f %s.",
             clocks_from_ticks(getticks() - tic), clocks_getunit());
 }
diff --git a/src/serial_io.c b/src/serial_io.c
index 609f9aaf05a7f673a324a3e64848d89bdb248d16..0753e171cc4784c3c38fd5ea0e2a2c39dc4da1b7 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -56,6 +56,7 @@
 #include "stars_io.h"
 #include "tracers_io.h"
 #include "units.h"
+#include "velociraptor_io.h"
 #include "xmf.h"
 
 /**
@@ -778,7 +779,6 @@ void write_output_serial(struct engine* e, const char* baseName,
                          int mpi_size, MPI_Comm comm, MPI_Info info) {
 
   hid_t h_file = 0, h_grp = 0;
-  int periodic = e->s->periodic;
   int numFiles = 1;
   const struct part* parts = e->s->parts;
   const struct xpart* xparts = e->s->xparts;
@@ -786,6 +786,15 @@ void write_output_serial(struct engine* e, const char* baseName,
   const struct spart* sparts = e->s->sparts;
   struct swift_params* params = e->parameter_file;
   const int with_cosmology = e->policy & engine_policy_cosmology;
+  const int with_cooling = e->policy & engine_policy_cooling;
+  const int with_temperature = e->policy & engine_policy_temperature;
+#ifdef HAVE_VELOCIRAPTOR
+  const int with_stf = (e->policy & engine_policy_structure_finding) &&
+                       (e->s->gpart_group_data != NULL);
+#else
+  const int with_stf = 0;
+#endif
+
   FILE* xmfFile = 0;
 
   /* Number of particles currently in the arrays */
@@ -847,28 +856,25 @@ void write_output_serial(struct engine* e, const char* baseName,
     h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
     if (h_file < 0) error("Error while opening file '%s'.", fileName);
 
-    /* Open header to write simulation properties */
-    /* message("Writing runtime parameters..."); */
-    h_grp = H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT,
-                      H5P_DEFAULT);
-    if (h_grp < 0) error("Error while creating runtime parameters group\n");
-
-    /* Write the relevant information */
-    io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1);
-
-    /* Close runtime parameters */
-    H5Gclose(h_grp);
-
     /* Open header to write simulation properties */
     /* message("Writing file header..."); */
     h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
     if (h_grp < 0) error("Error while creating file header\n");
 
+    /* Convert basic output information to snapshot units */
+    const double factor_time =
+        units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_TIME);
+    const double factor_length = units_conversion_factor(
+        internal_units, snapshot_units, UNIT_CONV_LENGTH);
+    const double dblTime = e->time * factor_time;
+    const double dim[3] = {e->s->dim[0] * factor_length,
+                           e->s->dim[1] * factor_length,
+                           e->s->dim[2] * factor_length};
+
     /* Print the relevant information and print status */
-    io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
-    double dblTime = e->time;
+    io_write_attribute(h_grp, "BoxSize", DOUBLE, dim, 3);
     io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1);
-    int dimension = (int)hydro_dimension;
+    const int dimension = (int)hydro_dimension;
     io_write_attribute(h_grp, "Dimension", INT, &dimension, 1);
     io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1);
     io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1);
@@ -1031,6 +1037,32 @@ void write_output_serial(struct engine* e, const char* baseName,
     H5Fclose(h_file);
   }
 
+  /* Now write the top-level cell structure */
+  hid_t h_file_cells = 0, h_grp_cells = 0;
+  if (mpi_rank == 0) {
+
+    /* Open the snapshot on rank 0 */
+    h_file_cells = H5Fopen(fileName, H5F_ACC_RDWR, H5P_DEFAULT);
+    if (h_file_cells < 0)
+      error("Error while opening file '%s' on rank %d.", fileName, mpi_rank);
+
+    /* Create the group we want in the file */
+    h_grp_cells = H5Gcreate(h_file_cells, "/Cells", H5P_DEFAULT, H5P_DEFAULT,
+                            H5P_DEFAULT);
+    if (h_grp_cells < 0) error("Error while creating cells group");
+  }
+
+  /* Write the location of the particles in the arrays */
+  io_write_cell_offsets(h_grp_cells, e->s->cdim, e->s->cells_top,
+                        e->s->nr_cells, e->s->width, mpi_rank, N_total, offset,
+                        internal_units, snapshot_units);
+
+  /* Close everything */
+  if (mpi_rank == 0) {
+    H5Gclose(h_grp_cells);
+    H5Fclose(h_file_cells);
+  }
+
   /* Now loop over ranks and write the data */
   for (int rank = 0; rank < mpi_size; ++rank) {
 
@@ -1068,6 +1100,7 @@ void write_output_serial(struct engine* e, const char* baseName,
         struct part* parts_written = NULL;
         struct xpart* xparts_written = NULL;
         struct gpart* gparts_written = NULL;
+        struct velociraptor_gpart_data* gpart_group_data_written = NULL;
         struct spart* sparts_written = NULL;
 
         /* Write particle fields from the particle structure */
@@ -1080,8 +1113,14 @@ void write_output_serial(struct engine* e, const char* baseName,
               Nparticles = Ngas;
               hydro_write_particles(parts, xparts, list, &num_fields);
               num_fields += chemistry_write_particles(parts, list + num_fields);
-              num_fields += cooling_write_particles(
-                  parts, xparts, list + num_fields, e->cooling_func);
+              if (with_cooling || with_temperature) {
+                num_fields += cooling_write_particles(
+                    parts, xparts, list + num_fields, e->cooling_func);
+              }
+              if (with_stf) {
+                num_fields +=
+                    velociraptor_write_parts(parts, xparts, list + num_fields);
+              }
               num_fields += tracers_write_particles(
                   parts, xparts, list + num_fields, with_cosmology);
               num_fields += sftracers_write_particles(
@@ -1109,9 +1148,15 @@ void write_output_serial(struct engine* e, const char* baseName,
                                     &num_fields);
               num_fields +=
                   chemistry_write_particles(parts_written, list + num_fields);
-              num_fields +=
-                  cooling_write_particles(parts_written, xparts_written,
-                                          list + num_fields, e->cooling_func);
+              if (with_cooling || with_temperature) {
+                num_fields +=
+                    cooling_write_particles(parts_written, xparts_written,
+                                            list + num_fields, e->cooling_func);
+              }
+              if (with_stf) {
+                num_fields += velociraptor_write_parts(
+                    parts_written, xparts_written, list + num_fields);
+              }
               num_fields +=
                   tracers_write_particles(parts_written, xparts_written,
                                           list + num_fields, with_cosmology);
@@ -1127,6 +1172,10 @@ void write_output_serial(struct engine* e, const char* baseName,
               /* This is a DM-only run without inhibited particles */
               Nparticles = Ntot;
               darkmatter_write_particles(gparts, list, &num_fields);
+              if (with_stf) {
+                num_fields += velociraptor_write_gparts(e->s->gpart_group_data,
+                                                        list + num_fields);
+              }
             } else {
 
               /* Ok, we need to fish out the particles we want */
@@ -1137,12 +1186,27 @@ void write_output_serial(struct engine* e, const char* baseName,
                                  Ndm_written * sizeof(struct gpart)) != 0)
                 error("Error while allocating temporart memory for gparts");
 
+              if (with_stf) {
+                if (posix_memalign(
+                        (void**)&gpart_group_data_written, gpart_align,
+                        Ndm_written * sizeof(struct velociraptor_gpart_data)) !=
+                    0)
+                  error(
+                      "Error while allocating temporart memory for gparts STF "
+                      "data");
+              }
+
               /* Collect the non-inhibited DM particles from gpart */
-              io_collect_gparts_to_write(gparts, gparts_written, Ntot,
-                                         Ndm_written);
+              io_collect_gparts_to_write(
+                  gparts, e->s->gpart_group_data, gparts_written,
+                  gpart_group_data_written, Ntot, Ndm_written, with_stf);
 
-              /* Write DM particles */
+              /* Select the fields to write */
               darkmatter_write_particles(gparts_written, list, &num_fields);
+              if (with_stf) {
+                num_fields += velociraptor_write_gparts(
+                    gpart_group_data_written, list + num_fields);
+              }
             }
           } break;
 
@@ -1156,6 +1220,10 @@ void write_output_serial(struct engine* e, const char* baseName,
                   chemistry_write_sparticles(sparts, list + num_fields);
               num_fields += tracers_write_sparticles(sparts, list + num_fields,
                                                      with_cosmology);
+              if (with_stf) {
+                num_fields +=
+                    velociraptor_write_sparts(sparts, list + num_fields);
+              }
             } else {
 
               /* Ok, we need to fish out the particles we want */
@@ -1176,6 +1244,10 @@ void write_output_serial(struct engine* e, const char* baseName,
                   chemistry_write_sparticles(sparts, list + num_fields);
               num_fields += tracers_write_sparticles(sparts, list + num_fields,
                                                      with_cosmology);
+              if (with_stf) {
+                num_fields += velociraptor_write_sparts(sparts_written,
+                                                        list + num_fields);
+              }
             }
           } break;
 
@@ -1202,6 +1274,7 @@ void write_output_serial(struct engine* e, const char* baseName,
         if (parts_written) free(parts_written);
         if (xparts_written) free(xparts_written);
         if (gparts_written) free(gparts_written);
+        if (gpart_group_data_written) free(gpart_group_data_written);
         if (sparts_written) free(sparts_written);
 
         /* Close particle group */
diff --git a/src/single_io.c b/src/single_io.c
index 110b5d95862ba6c3f3d5ff653395d09721957e07..a7a701d8bd3dc9371ece8f7dc298e2708741a163 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -55,6 +55,7 @@
 #include "stars_io.h"
 #include "tracers_io.h"
 #include "units.h"
+#include "velociraptor_io.h"
 #include "xmf.h"
 
 /**
@@ -641,7 +642,6 @@ void write_output_single(struct engine* e, const char* baseName,
                          const struct unit_system* snapshot_units) {
 
   hid_t h_file = 0, h_grp = 0;
-  int periodic = e->s->periodic;
   int numFiles = 1;
   const struct part* parts = e->s->parts;
   const struct xpart* xparts = e->s->xparts;
@@ -649,6 +649,14 @@ void write_output_single(struct engine* e, const char* baseName,
   const struct spart* sparts = e->s->sparts;
   struct swift_params* params = e->parameter_file;
   const int with_cosmology = e->policy & engine_policy_cosmology;
+  const int with_cooling = e->policy & engine_policy_cooling;
+  const int with_temperature = e->policy & engine_policy_temperature;
+#ifdef HAVE_VELOCIRAPTOR
+  const int with_stf = (e->policy & engine_policy_structure_finding) &&
+                       (e->s->gpart_group_data != NULL);
+#else
+  const int with_stf = 0;
+#endif
 
   /* Number of particles currently in the arrays */
   const size_t Ntot = e->s->nr_gparts;
@@ -700,28 +708,25 @@ void write_output_single(struct engine* e, const char* baseName,
   h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
   if (h_file < 0) error("Error while opening file '%s'.", fileName);
 
-  /* Open header to write simulation properties */
-  /* message("Writing runtime parameters..."); */
-  h_grp =
-      H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  if (h_grp < 0) error("Error while creating runtime parameters group\n");
-
-  /* Write the relevant information */
-  io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1);
-
-  /* Close runtime parameters */
-  H5Gclose(h_grp);
-
   /* Open header to write simulation properties */
   /* message("Writing file header..."); */
   h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
   if (h_grp < 0) error("Error while creating file header\n");
 
+  /* Convert basic output information to snapshot units */
+  const double factor_time =
+      units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_TIME);
+  const double factor_length =
+      units_conversion_factor(internal_units, snapshot_units, UNIT_CONV_LENGTH);
+  const double dblTime = e->time * factor_time;
+  const double dim[3] = {e->s->dim[0] * factor_length,
+                         e->s->dim[1] * factor_length,
+                         e->s->dim[2] * factor_length};
+
   /* Print the relevant information and print status */
-  io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
-  double dblTime = e->time;
+  io_write_attribute(h_grp, "BoxSize", DOUBLE, dim, 3);
   io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1);
-  int dimension = (int)hydro_dimension;
+  const int dimension = (int)hydro_dimension;
   io_write_attribute(h_grp, "Dimension", INT, &dimension, 1);
   io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1);
   io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1);
@@ -830,6 +835,17 @@ void write_output_single(struct engine* e, const char* baseName,
   /* Print the system of Units used internally */
   io_write_unit_system(h_file, internal_units, "InternalCodeUnits");
 
+  /* Now write the top-level cell structure */
+  long long global_offsets[swift_type_count] = {0};
+  h_grp = H5Gcreate(h_file, "/Cells", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  if (h_grp < 0) error("Error while creating cells group");
+
+  /* Write the location of the particles in the arrays */
+  io_write_cell_offsets(h_grp, e->s->cdim, e->s->cells_top, e->s->nr_cells,
+                        e->s->width, e->nodeID, N_total, global_offsets,
+                        internal_units, snapshot_units);
+  H5Gclose(h_grp);
+
   /* Tell the user if a conversion will be needed */
   if (e->verbose) {
     if (units_are_equal(snapshot_units, internal_units)) {
@@ -888,6 +904,7 @@ void write_output_single(struct engine* e, const char* baseName,
     struct part* parts_written = NULL;
     struct xpart* xparts_written = NULL;
     struct gpart* gparts_written = NULL;
+    struct velociraptor_gpart_data* gpart_group_data_written = NULL;
     struct spart* sparts_written = NULL;
 
     /* Write particle fields from the particle structure */
@@ -900,8 +917,14 @@ void write_output_single(struct engine* e, const char* baseName,
           N = Ngas;
           hydro_write_particles(parts, xparts, list, &num_fields);
           num_fields += chemistry_write_particles(parts, list + num_fields);
-          num_fields += cooling_write_particles(
-              parts, xparts, list + num_fields, e->cooling_func);
+          if (with_cooling || with_temperature) {
+            num_fields += cooling_write_particles(
+                parts, xparts, list + num_fields, e->cooling_func);
+          }
+          if (with_stf) {
+            num_fields +=
+                velociraptor_write_parts(parts, xparts, list + num_fields);
+          }
           num_fields += tracers_write_particles(
               parts, xparts, list + num_fields, with_cosmology);
           num_fields += sftracers_write_particles(
@@ -929,9 +952,15 @@ void write_output_single(struct engine* e, const char* baseName,
                                 &num_fields);
           num_fields +=
               chemistry_write_particles(parts_written, list + num_fields);
-          num_fields +=
-              cooling_write_particles(parts_written, xparts_written,
-                                      list + num_fields, e->cooling_func);
+          if (with_cooling || with_temperature) {
+            num_fields +=
+                cooling_write_particles(parts_written, xparts_written,
+                                        list + num_fields, e->cooling_func);
+          }
+          if (with_stf) {
+            num_fields += velociraptor_write_parts(
+                parts_written, xparts_written, list + num_fields);
+          }
           num_fields += tracers_write_particles(
               parts_written, xparts_written, list + num_fields, with_cosmology);
           num_fields += sftracers_write_particles(
@@ -945,6 +974,10 @@ void write_output_single(struct engine* e, const char* baseName,
           /* This is a DM-only run without inhibited particles */
           N = Ntot;
           darkmatter_write_particles(gparts, list, &num_fields);
+          if (with_stf) {
+            num_fields += velociraptor_write_gparts(e->s->gpart_group_data,
+                                                    list + num_fields);
+          }
         } else {
 
           /* Ok, we need to fish out the particles we want */
@@ -955,11 +988,26 @@ void write_output_single(struct engine* e, const char* baseName,
                              Ndm_written * sizeof(struct gpart)) != 0)
             error("Error while allocating temporart memory for gparts");
 
+          if (with_stf) {
+            if (posix_memalign(
+                    (void**)&gpart_group_data_written, gpart_align,
+                    Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0)
+              error(
+                  "Error while allocating temporart memory for gparts STF "
+                  "data");
+          }
+
           /* Collect the non-inhibited DM particles from gpart */
-          io_collect_gparts_to_write(gparts, gparts_written, Ntot, Ndm_written);
+          io_collect_gparts_to_write(gparts, e->s->gpart_group_data,
+                                     gparts_written, gpart_group_data_written,
+                                     Ntot, Ndm_written, with_stf);
 
-          /* Write DM particles */
+          /* Select the fields to write */
           darkmatter_write_particles(gparts_written, list, &num_fields);
+          if (with_stf) {
+            num_fields += velociraptor_write_gparts(gpart_group_data_written,
+                                                    list + num_fields);
+          }
         }
       } break;
 
@@ -972,6 +1020,9 @@ void write_output_single(struct engine* e, const char* baseName,
           num_fields += chemistry_write_sparticles(sparts, list + num_fields);
           num_fields += tracers_write_sparticles(sparts, list + num_fields,
                                                  with_cosmology);
+          if (with_stf) {
+            num_fields += velociraptor_write_sparts(sparts, list + num_fields);
+          }
         } else {
 
           /* Ok, we need to fish out the particles we want */
@@ -992,6 +1043,10 @@ void write_output_single(struct engine* e, const char* baseName,
               chemistry_write_sparticles(sparts_written, list + num_fields);
           num_fields += tracers_write_sparticles(
               sparts_written, list + num_fields, with_cosmology);
+          if (with_stf) {
+            num_fields +=
+                velociraptor_write_sparts(sparts_written, list + num_fields);
+          }
         }
       } break;
 
@@ -1017,6 +1072,7 @@ void write_output_single(struct engine* e, const char* baseName,
     if (parts_written) free(parts_written);
     if (xparts_written) free(xparts_written);
     if (gparts_written) free(gparts_written);
+    if (gpart_group_data_written) free(gpart_group_data_written);
     if (sparts_written) free(sparts_written);
 
     /* Close particle group */
diff --git a/src/space.c b/src/space.c
index 35aaffa66d5b921ec687350588ac91e3a52bb59f..d930bcdcabada454c86719b66eb029f4e231d6b3 100644
--- a/src/space.c
+++ b/src/space.c
@@ -189,6 +189,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->hydro.density = NULL;
     c->hydro.gradient = NULL;
     c->hydro.force = NULL;
+    c->hydro.limiter = NULL;
     c->grav.grav = NULL;
     c->grav.mm = NULL;
     c->hydro.dx_max_part = 0.0f;
@@ -223,12 +224,12 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->kick1 = NULL;
     c->kick2 = NULL;
     c->timestep = NULL;
+    c->timestep_limiter = NULL;
     c->end_force = NULL;
     c->hydro.drift = NULL;
     c->grav.drift = NULL;
     c->grav.drift_out = NULL;
     c->hydro.cooling = NULL;
-    c->sourceterms = NULL;
     c->grav.long_range = NULL;
     c->grav.down_in = NULL;
     c->grav.down = NULL;
@@ -244,6 +245,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->stars.do_sub_sort = 0;
     c->grav.do_sub_drift = 0;
     c->hydro.do_sub_drift = 0;
+    c->hydro.do_sub_limiter = 0;
+    c->hydro.do_limiter = 0;
     c->hydro.ti_end_min = -1;
     c->hydro.ti_end_max = -1;
     c->grav.ti_end_min = -1;
@@ -272,12 +275,14 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->mpi.hydro.recv_gradient = NULL;
     c->mpi.grav.recv = NULL;
     c->mpi.recv_ti = NULL;
+    c->mpi.limiter.recv = NULL;
 
     c->mpi.hydro.send_xv = NULL;
     c->mpi.hydro.send_rho = NULL;
     c->mpi.hydro.send_gradient = NULL;
     c->mpi.grav.send = NULL;
     c->mpi.send_ti = NULL;
+    c->mpi.limiter.send = NULL;
 #endif
   }
 }
@@ -2707,6 +2712,8 @@ void space_split_recursive(struct space *s, struct cell *c,
       cp->stars.do_sub_sort = 0;
       cp->grav.do_sub_drift = 0;
       cp->hydro.do_sub_drift = 0;
+      cp->hydro.do_sub_limiter = 0;
+      cp->hydro.do_limiter = 0;
 #ifdef WITH_MPI
       cp->mpi.tag = -1;
 #endif  // WITH_MPI
@@ -4301,6 +4308,49 @@ void space_check_timesteps(struct space *s) {
 #endif
 }
 
+/**
+ * @brief #threadpool mapper function for the limiter debugging check
+ */
+void space_check_limiter_mapper(void *map_data, int nr_parts,
+                                void *extra_data) {
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Unpack the data */
+  struct part *restrict parts = (struct part *)map_data;
+
+  /* Verify that all limited particles have been treated */
+  for (int k = 0; k < nr_parts; k++) {
+
+    if (parts[k].time_bin == time_bin_inhibited) continue;
+
+    if (parts[k].wakeup == time_bin_awake)
+      error("Particle still woken up! id=%lld", parts[k].id);
+
+    if (parts[k].gpart != NULL)
+      if (parts[k].time_bin != parts[k].gpart->time_bin)
+        error("Gpart not on the same time-bin as part");
+  }
+#else
+  error("Calling debugging code without debugging flag activated.");
+#endif
+}
+
+/**
+ * @brief Checks that all particles have their wakeup flag in a correct state.
+ *
+ * Should only be used for debugging purposes.
+ *
+ * @param s The #space to check.
+ */
+void space_check_limiter(struct space *s) {
+#ifdef SWIFT_DEBUG_CHECKS
+
+  threadpool_map(&s->e->threadpool, space_check_limiter_mapper, s->parts,
+                 s->nr_parts, sizeof(struct part), 1000, NULL);
+#else
+  error("Calling debugging code without debugging flag activated.");
+#endif
+}
+
 /**
  * @brief Resets all the individual cell task counters to 0.
  *
@@ -4384,7 +4434,6 @@ void space_struct_restore(struct space *s, FILE *stream) {
   s->local_cells_with_tasks_top = NULL;
   s->cells_with_particles_top = NULL;
   s->local_cells_with_particles_top = NULL;
-  s->grav_top_level = NULL;
   s->nr_local_cells_with_tasks = 0;
   s->nr_cells_with_particles = 0;
 #ifdef WITH_MPI
diff --git a/src/space.h b/src/space.h
index a1280945d2aa232cbb5e5b519266bc7058e5dc57..98ab2523668c9789bb644f0ebe300cf73ef6f182 100644
--- a/src/space.h
+++ b/src/space.h
@@ -35,6 +35,7 @@
 #include "lock.h"
 #include "parser.h"
 #include "part.h"
+#include "velociraptor_struct.h"
 
 /* Avoid cyclic inclusions */
 struct cell;
@@ -207,9 +208,6 @@ struct space {
   /*! The s-particle data (cells have pointers to this). */
   struct spart *sparts;
 
-  /*! The top-level FFT task */
-  struct task *grav_top_level;
-
   /*! Minimal mass of all the #part */
   float min_part_mass;
 
@@ -237,6 +235,9 @@ struct space {
   /*! The associated engine. */
   struct engine *e;
 
+  /*! The group information returned by VELOCIraptor for each #gpart. */
+  struct velociraptor_gpart_data *gpart_group_data;
+
 #ifdef WITH_MPI
 
   /*! Buffers for parts that we will receive from foreign cells. */
@@ -317,6 +318,7 @@ void space_check_drift_point(struct space *s, integertime_t ti_drift,
 void space_check_top_multipoles_drift_point(struct space *s,
                                             integertime_t ti_drift);
 void space_check_timesteps(struct space *s);
+void space_check_limiter(struct space *s);
 void space_replicate(struct space *s, int replicate, int verbose);
 void space_generate_gas(struct space *s, const struct cosmology *cosmo,
                         int periodic, const double dim[3], int verbose);
diff --git a/src/swift_velociraptor_part.h b/src/swift_velociraptor_part.h
index adae884c2f930c44edf4d48f47f168475bc65885..700842ac5a13e5bee4af15cc0d8726fc668ce421 100644
--- a/src/swift_velociraptor_part.h
+++ b/src/swift_velociraptor_part.h
@@ -21,7 +21,13 @@
 
 #include "part_type.h"
 
-/* SWIFT/VELOCIraptor particle. */
+/**
+ * @brief SWIFT/VELOCIraptor particle.
+ *
+ * This should match the structure Swift::swift_vel_part
+ * defined in the file NBodylib/src/NBody/SwiftParticle.h
+ * of the VELOCIraptor code.
+ */
 struct swift_vel_part {
 
   /*! Particle ID. */
@@ -42,8 +48,18 @@ struct swift_vel_part {
   /*! Internal energy of gas particle */
   float u;
 
+  /*! Temperature of a gas particle */
+  float T;
+
   /*! Type of the #gpart (DM, gas, star, ...) */
   enum part_type type;
+
+  /*! MPI rank on which this #gpart lives on the SWIFT side. */
+  int task;
+
+  /*! Index of this #gpart in the global array of this rank on the SWIFT
+    side. */
+  int index;
 };
 
 #endif /* SWIFT_VELOCIRAPTOR_PART_H */
diff --git a/src/task.c b/src/task.c
index f16aadc8afb7a2f811c4790688fb849ba1601ce3..4d5695f64c81e710c39fcc460a642a0887856814 100644
--- a/src/task.c
+++ b/src/task.c
@@ -66,6 +66,7 @@ const char *taskID_names[task_type_count] = {"none",
                                              "kick1",
                                              "kick2",
                                              "timestep",
+                                             "timestep_limiter",
                                              "send",
                                              "recv",
                                              "grav_long_range",
@@ -83,10 +84,10 @@ const char *taskID_names[task_type_count] = {"none",
 
 /* Sub-task type names. */
 const char *subtaskID_names[task_subtype_count] = {
-    "none",          "density",       "gradient",  "force",
-    "grav",          "external_grav", "tend",      "xv",
-    "rho",           "gpart",         "multipole", "spart",
-    "stars_density", "stars_feedback"};
+    "none",    "density",       "gradient",      "force",
+    "limiter", "grav",          "external_grav", "tend",
+    "xv",      "rho",           "gpart",         "multipole",
+    "spart",   "stars_density", "stars_feedback"};
 
 #ifdef WITH_MPI
 /* MPI communicators for the subtypes. */
@@ -140,6 +141,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
     case task_type_sort:
     case task_type_ghost:
     case task_type_extra_ghost:
+    case task_type_timestep_limiter:
     case task_type_cooling:
       return task_action_part;
       break;
@@ -161,6 +163,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
         case task_subtype_density:
         case task_subtype_gradient:
         case task_subtype_force:
+        case task_subtype_limiter:
           return task_action_part;
           break;
 
@@ -337,6 +340,8 @@ void task_unlock(struct task *t) {
 
     case task_type_drift_part:
     case task_type_sort:
+    case task_type_ghost:
+    case task_type_timestep_limiter:
       cell_unlocktree(ci);
       break;
 
@@ -462,6 +467,8 @@ int task_lock(struct task *t) {
 
     case task_type_drift_part:
     case task_type_sort:
+    case task_type_ghost:
+    case task_type_timestep_limiter:
       if (ci->hydro.hold) return 0;
       if (cell_locktree(ci) != 0) return 0;
       break;
@@ -655,6 +662,9 @@ void task_get_group_name(int type, int subtype, char *cluster) {
     case task_subtype_grav:
       strcpy(cluster, "Gravity");
       break;
+    case task_subtype_limiter:
+      strcpy(cluster, "Timestep_limiter");
+      break;
     case task_subtype_stars_density:
       strcpy(cluster, "Stars");
       break;
diff --git a/src/task.h b/src/task.h
index a6782a6302e2f234f02d2b4e3052a11cb388dc31..100ac225bd5956e8d59d6a197c1257cb3e796ebb 100644
--- a/src/task.h
+++ b/src/task.h
@@ -58,6 +58,7 @@ enum task_types {
   task_type_kick1,
   task_type_kick2,
   task_type_timestep,
+  task_type_timestep_limiter,
   task_type_send,
   task_type_recv,
   task_type_grav_long_range,
@@ -83,6 +84,7 @@ enum task_subtypes {
   task_subtype_density,
   task_subtype_gradient,
   task_subtype_force,
+  task_subtype_limiter,
   task_subtype_grav,
   task_subtype_external_grav,
   task_subtype_tend,
diff --git a/src/timestep_limiter.h b/src/timestep_limiter.h
new file mode 100644
index 0000000000000000000000000000000000000000..cfadc2e62a872a2d2a8a578fe6bb48fd24c5ba29
--- /dev/null
+++ b/src/timestep_limiter.h
@@ -0,0 +1,143 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_TIMESTEP_LIMITER_H
+#define SWIFT_TIMESTEP_LIMITER_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/**
+ * @brief Wakes up a particle by rewinding it's kick1 back in time and applying
+ * a new one such that the particle becomes active again in the next time-step.
+ *
+ * @param p The #part to update.
+ * @param xp Its #xpart companion.
+ * @param e The #engine (to extract time-line information).
+ */
+__attribute__((always_inline)) INLINE static integertime_t timestep_limit_part(
+    struct part *restrict p, struct xpart *restrict xp,
+    const struct engine *e) {
+
+  const struct cosmology *cosmo = e->cosmology;
+  const int with_cosmology = e->policy & engine_policy_cosmology;
+  const double time_base = e->time_base;
+
+  integertime_t old_ti_beg, old_ti_end;
+  timebin_t old_time_bin;
+
+  /* Let's see when this particle started and used to end */
+  if (p->wakeup == time_bin_awake) {
+
+    /* Normal case */
+    old_ti_beg = get_integer_time_begin(e->ti_current, p->time_bin);
+    old_ti_end = get_integer_time_end(e->ti_current, p->time_bin);
+    old_time_bin = p->time_bin;
+  } else {
+
+    /* Particle that was limited in the previous step already */
+    old_ti_beg = get_integer_time_begin(e->ti_current, -p->wakeup);
+    old_ti_end = get_integer_time_end(e->ti_current, p->time_bin);
+    old_time_bin = -p->wakeup;
+  }
+
+  const integertime_t old_dti = old_ti_end - old_ti_beg;
+
+  /* The new fake time-step the particle will be on */
+  const integertime_t new_fake_ti_step =
+      get_integer_timestep(e->min_active_bin);
+
+  /* The actual time-step size this particle will use */
+  const integertime_t new_ti_beg = old_ti_beg;
+  const integertime_t new_ti_end = e->ti_current + new_fake_ti_step;
+  const integertime_t new_dti = new_ti_end - new_ti_beg;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Some basic safety checks */
+  if (old_ti_beg >= e->ti_current)
+    error(
+        "Incorrect value for old time-step beginning ti_current=%lld, "
+        "old_ti_beg=%lld",
+        e->ti_current, old_ti_beg);
+
+  if (old_ti_end <= e->ti_current)
+    error(
+        "Incorrect value for old time-step end ti_current=%lld, "
+        "old_ti_end=%lld",
+        e->ti_current, old_ti_end);
+
+  if (new_ti_end > old_ti_end) error("New end of time-step after the old one");
+
+  if (new_dti > old_dti) error("New time-step larger than old one");
+
+  if (new_fake_ti_step == 0) error("Wakeup call too early");
+#endif
+
+  double dt_kick_grav = 0., dt_kick_hydro = 0., dt_kick_therm = 0.,
+         dt_kick_corr = 0.;
+
+  /* Now we need to reverse the kick1... (the dt are negative here) */
+  if (with_cosmology) {
+    dt_kick_hydro = -cosmology_get_hydro_kick_factor(cosmo, old_ti_beg,
+                                                     old_ti_beg + old_dti / 2);
+    dt_kick_grav = -cosmology_get_grav_kick_factor(cosmo, old_ti_beg,
+                                                   old_ti_beg + old_dti / 2);
+    dt_kick_therm = -cosmology_get_therm_kick_factor(cosmo, old_ti_beg,
+                                                     old_ti_beg + old_dti / 2);
+    dt_kick_corr = -cosmology_get_corr_kick_factor(cosmo, old_ti_beg,
+                                                   old_ti_beg + old_dti / 2);
+  } else {
+    dt_kick_hydro = -(old_dti / 2) * time_base;
+    dt_kick_grav = -(old_dti / 2) * time_base;
+    dt_kick_therm = -(old_dti / 2) * time_base;
+    dt_kick_corr = -(old_dti / 2) * time_base;
+  }
+  kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, dt_kick_corr,
+            e->cosmology, e->hydro_properties, old_ti_beg + old_dti / 2,
+            old_ti_beg);
+
+  /* ...and apply the new one (dt is positiive) */
+  if (with_cosmology) {
+    dt_kick_hydro = cosmology_get_hydro_kick_factor(cosmo, new_ti_beg,
+                                                    new_ti_beg + new_dti / 2);
+    dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, new_ti_beg,
+                                                  new_ti_beg + new_dti / 2);
+    dt_kick_therm = cosmology_get_therm_kick_factor(cosmo, new_ti_beg,
+                                                    new_ti_beg + new_dti / 2);
+    dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, new_ti_beg,
+                                                  new_ti_beg + new_dti / 2);
+  } else {
+    dt_kick_hydro = (new_dti / 2) * time_base;
+    dt_kick_grav = (new_dti / 2) * time_base;
+    dt_kick_therm = (new_dti / 2) * time_base;
+    dt_kick_corr = (new_dti / 2) * time_base;
+  }
+  kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, dt_kick_corr,
+            e->cosmology, e->hydro_properties, new_ti_beg,
+            new_ti_beg + new_dti / 2);
+
+  /* Remember the old time-bin */
+  p->wakeup = old_time_bin;
+
+  /* Update the time bin of this particle */
+  p->time_bin = e->min_active_bin;
+
+  return new_fake_ti_step;
+}
+
+#endif /* SWIFT_TIMESTEP_LIMITER_H */
diff --git a/src/tools.c b/src/tools.c
index c0400aa7b42322fce276a5e788af7bcb9e7f3625..ca531671a7a2522eab760c1eb4896a6bd522a073 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -217,7 +217,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) {
       }
 
       /* Hit or miss? */
-      if (r2 < hig2) {
+      if (r2 < hig2 && !part_is_inhibited(pj, e)) {
 
         /* Interact */
         runner_iact_nonsym_density(r2, dx, hi, pj->h, pi, pj, a, H);
@@ -249,7 +249,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) {
       }
 
       /* Hit or miss? */
-      if (r2 < hjg2) {
+      if (r2 < hjg2 && !part_is_inhibited(pi, e)) {
 
         /* Interact */
         runner_iact_nonsym_density(r2, dx, hj, pi->h, pj, pi, a, H);
@@ -438,7 +438,7 @@ void self_all_density(struct runner *r, struct cell *ci) {
       }
 
       /* Hit or miss? */
-      if (r2 < hig2 && part_is_active(pi, e)) {
+      if (r2 < hig2 && part_is_active(pi, e) && !part_is_inhibited(pj, e)) {
 
         /* Interact */
         runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj, a, H);
@@ -446,7 +446,7 @@ void self_all_density(struct runner *r, struct cell *ci) {
       }
 
       /* Hit or miss? */
-      if (r2 < hjg2 && part_is_active(pj, e)) {
+      if (r2 < hjg2 && part_is_active(pj, e) && !part_is_inhibited(pi, e)) {
 
         dxi[0] = -dxi[0];
         dxi[1] = -dxi[1];
diff --git a/src/tracers/EAGLE/tracers_io.h b/src/tracers/EAGLE/tracers_io.h
index c593524c31d652e9aeb586a838d39d7eef7668ad..0b0e372ffc35ec5729affd4f6a86e358c914ca6d 100644
--- a/src/tracers/EAGLE/tracers_io.h
+++ b/src/tracers/EAGLE/tracers_io.h
@@ -65,7 +65,7 @@ __attribute__((always_inline)) INLINE static int tracers_write_particles(
 
   } else {
 
-    list[1] = io_make_output_field("MaxTemperature time", FLOAT, 1,
+    list[1] = io_make_output_field("Maximal Temperature time", FLOAT, 1,
                                    UNIT_CONV_NO_UNITS, xparts,
                                    tracers_data.maximum_temperature_time);
   }
diff --git a/src/velociraptor_dummy.c b/src/velociraptor_dummy.c
index 8f14a3230d341993122f09f2bccf3d8232550fd9..36cb65bfbe6931464f33d7e4b641f8882fdf65d0 100644
--- a/src/velociraptor_dummy.c
+++ b/src/velociraptor_dummy.c
@@ -20,9 +20,6 @@
 /* Config parameters. */
 #include "../config.h"
 
-/* Some standard headers. */
-#include <stddef.h>
-
 /* Local includes. */
 #include "error.h"
 #include "swift_velociraptor_part.h"
@@ -36,19 +33,41 @@ struct unitinfo {};
 struct cell_loc {};
 struct siminfo {};
 
+/*
 int InitVelociraptor(char *config_name, char *output_name,
                      struct cosmoinfo cosmo_info, struct unitinfo unit_info,
-                     struct siminfo sim_info) {
+                     struct siminfo sim_info, const int numthreads) {
 
   error("This is only a dummy. Call the real one!");
   return 0;
 }
+
 int InvokeVelociraptor(const size_t num_gravity_parts,
-                       const size_t num_hydro_parts,
+                       const size_t num_hydro_parts, const int snapnum,
                        struct swift_vel_part *swift_parts,
-                       const int *cell_node_ids, char *output_name) {
+                       const int *cell_node_ids, char *output_name,
+                       const int numthreads) {
+
+  error("This is only a dummy. Call the real one!");
+  return 0;
+}
+*/
+int InitVelociraptor(char *config_name, struct unitinfo unit_info,
+                     struct siminfo sim_info, const int numthreads) {
 
   error("This is only a dummy. Call the real one!");
   return 0;
 }
+
+struct groupinfo *InvokeVelociraptor(
+    const int snapnum, char *output_name, struct cosmoinfo cosmo_info,
+    struct siminfo sim_info, const size_t num_gravity_parts,
+    const size_t num_hydro_parts, const size_t num_star_parts,
+    struct swift_vel_part *swift_parts, const int *cell_node_ids,
+    const int numthreads, const int return_group_flags,
+    int *const num_in_groups) {
+  error("This is only a dummy. Call the real one!");
+  return 0;
+}
+
 #endif /* HAVE_DUMMY_VELOCIRAPTOR */
diff --git a/src/velociraptor_interface.c b/src/velociraptor_interface.c
index 7756fe4b937986c108d223c56183f7d31cdfaa98..1049c4730e996112c9b4dc88effad3732af9025d 100644
--- a/src/velociraptor_interface.c
+++ b/src/velociraptor_interface.c
@@ -21,21 +21,23 @@
 #include "../config.h"
 
 /* Some standard headers. */
-#include <errno.h>
 #include <unistd.h>
 
 /* This object's header. */
 #include "velociraptor_interface.h"
 
 /* Local includes. */
-#include "common_io.h"
+#include "cooling.h"
 #include "engine.h"
 #include "hydro.h"
 #include "swift_velociraptor_part.h"
+#include "velociraptor_struct.h"
 
 #ifdef HAVE_VELOCIRAPTOR
 
-/* Structure for passing cosmological information to VELOCIraptor. */
+/**
+ * @brief Structure for passing cosmological information to VELOCIraptor.
+ */
 struct cosmoinfo {
 
   /*! Current expansion factor of the Universe. (cosmology.a) */
@@ -47,6 +49,15 @@ struct cosmoinfo {
   /*! Matter density parameter (cosmology.Omega_m) */
   double Omega_m;
 
+  /*! Radiation density parameter (cosmology.Omega_r) */
+  double Omega_r;
+
+  /*! Neutrino density parameter (0 in SWIFT) */
+  double Omega_nu;
+
+  /*! Neutrino density parameter (cosmology.Omega_k) */
+  double Omega_k;
+
   /*! Baryon density parameter (cosmology.Omega_b) */
   double Omega_b;
 
@@ -60,19 +71,21 @@ struct cosmoinfo {
   double w_de;
 };
 
-/* Structure for passing unit information to VELOCIraptor. */
+/**
+ * @brief Structure for passing unit information to VELOCIraptor.
+ */
 struct unitinfo {
 
-  /* Length conversion factor to kpc. */
+  /*! Length conversion factor to kpc. */
   double lengthtokpc;
 
-  /* Velocity conversion factor to km/s. */
+  /*! Velocity conversion factor to km/s. */
   double velocitytokms;
 
-  /* Mass conversion factor to solar masses. */
+  /*! Mass conversion factor to solar masses. */
   double masstosolarmass;
 
-  /* Potential conversion factor. */
+  /*! Potential conversion factor to (km/s)^2. */
   double energyperunitmass;
 
   /*! Newton's gravitationl constant (phys_const.const_newton_G)*/
@@ -82,18 +95,34 @@ struct unitinfo {
   double hubbleunit;
 };
 
-/* Structure to hold the location of a top-level cell. */
+/**
+ * @brief Structure to hold the location of a top-level cell.
+ */
 struct cell_loc {
 
-  /* Coordinates x,y,z */
+  /*! Coordinates x,y,z */
   double loc[3];
 };
 
-/* Structure for passing simulation information to VELOCIraptor. */
+/**
+ * @brief Structure for passing simulation information to VELOCIraptor for a
+ * given call.
+ */
 struct siminfo {
-  double period, zoomhigresolutionmass, interparticlespacing, spacedimension[3];
 
-  /* Number of top-cells. */
+  /*! Size of periodic replications */
+  double period;
+
+  /*! Mass of the high-resolution DM particles in a zoom-in run. */
+  double zoomhigresolutionmass;
+
+  /*! Mean inter-particle separation of the DM particles */
+  double interparticlespacing;
+
+  /*! Spacial extent of the simulation volume */
+  double spacedimension[3];
+
+  /*! Number of top-level cells. */
   int numcells;
 
   /*! Locations of top-level cells. */
@@ -105,142 +134,135 @@ struct siminfo {
   /*! Inverse of the top-level cell width. */
   double icellwidth[3];
 
+  /*! Holds the node ID of each top-level cell. */
+  int *cellnodeids;
+
+  /*! Is this a cosmological simulation? */
   int icosmologicalsim;
+
+  /*! Is this a zoom-in simulation? */
+  int izoomsim;
+
+  /*! Do we have DM particles? */
+  int idarkmatter;
+
+  /*! Do we have gas particles? */
+  int igas;
+
+  /*! Do we have star particles? */
+  int istar;
+
+  /*! Do we have BH particles? */
+  int ibh;
+
+  /*! Do we have other particles? */
+  int iother;
 };
 
-/* VELOCIraptor interface. */
-int InitVelociraptor(char *config_name, char *output_name,
-                     struct cosmoinfo cosmo_info, struct unitinfo unit_info,
-                     struct siminfo sim_info);
-int InvokeVelociraptor(const size_t num_gravity_parts,
-                       const size_t num_hydro_parts,
-                       struct swift_vel_part *swift_parts,
-                       const int *cell_node_ids, char *output_name);
+/**
+ * @brief Structure for group information back to swift
+ */
+struct groupinfo {
+
+  /*! Index of a #gpart in the global array on this MPI rank */
+  int index;
+
+  /*! Group number of the #gpart. */
+  long long groupID;
+};
+
+int InitVelociraptor(char *config_name, struct unitinfo unit_info,
+                     struct siminfo sim_info, const int numthreads);
+
+struct groupinfo *InvokeVelociraptor(
+    const int snapnum, char *output_name, struct cosmoinfo cosmo_info,
+    struct siminfo sim_info, const size_t num_gravity_parts,
+    const size_t num_hydro_parts, const size_t num_star_parts,
+    struct swift_vel_part *swift_parts, const int *cell_node_ids,
+    const int numthreads, const int return_group_flags,
+    int *const num_in_groups);
 
 #endif /* HAVE_VELOCIRAPTOR */
 
 /**
- * @brief Initialise VELOCIraptor with input and output file names along with
- * cosmological info needed to run.
+ * @brief Initialise VELOCIraptor with configuration, units,
+ * simulation info needed to run.
  *
  * @param e The #engine.
- *
  */
 void velociraptor_init(struct engine *e) {
 
 #ifdef HAVE_VELOCIRAPTOR
-  struct space *s = e->s;
-  struct cosmoinfo cosmo_info;
-  struct unitinfo unit_info;
-  struct siminfo sim_info;
-
-  /* Set cosmological constants. */
-  cosmo_info.atime = e->cosmology->a;
-  cosmo_info.littleh = e->cosmology->h;
-  cosmo_info.Omega_m = e->cosmology->Omega_m;
-  cosmo_info.Omega_b = e->cosmology->Omega_b;
-  cosmo_info.Omega_Lambda = e->cosmology->Omega_lambda;
-  cosmo_info.Omega_cdm = e->cosmology->Omega_m - e->cosmology->Omega_b;
-  cosmo_info.w_de = e->cosmology->w;
+  const ticks tic = getticks();
 
-  message("Scale factor: %e", cosmo_info.atime);
-  message("Little h: %e", cosmo_info.littleh);
-  message("Omega_m: %e", cosmo_info.Omega_m);
-  message("Omega_b: %e", cosmo_info.Omega_b);
-  message("Omega_Lambda: %e", cosmo_info.Omega_Lambda);
-  message("Omega_cdm: %e", cosmo_info.Omega_cdm);
-  message("w_de: %e", cosmo_info.w_de);
+  /* Internal SWIFT units */
+  const struct unit_system *swift_us = e->internal_units;
 
-  if (e->cosmology->w != -1.)
-    error("w_de is not 1. It is: %lf", e->cosmology->w);
+  /* CGS units and physical constants in CGS */
+  struct unit_system cgs_us;
+  units_init_cgs(&cgs_us);
+  struct phys_const cgs_pc;
+  phys_const_init(&cgs_us, /*params=*/NULL, &cgs_pc);
 
   /* Set unit conversions. */
-  unit_info.lengthtokpc = 1.0;
-  unit_info.velocitytokms = 1.0;
-  unit_info.masstosolarmass = 1.0;
-  unit_info.energyperunitmass = 1.0;
+  struct unitinfo unit_info;
+  unit_info.lengthtokpc =
+      units_cgs_conversion_factor(swift_us, UNIT_CONV_LENGTH) /
+      (1000. * cgs_pc.const_parsec);
+  unit_info.velocitytokms =
+      units_cgs_conversion_factor(swift_us, UNIT_CONV_VELOCITY) / 1.0e5;
+  unit_info.masstosolarmass =
+      units_cgs_conversion_factor(swift_us, UNIT_CONV_MASS) /
+      cgs_pc.const_solar_mass;
+  unit_info.energyperunitmass =
+      units_cgs_conversion_factor(swift_us, UNIT_CONV_ENERGY_PER_UNIT_MASS) /
+      (1.0e10);
   unit_info.gravity = e->physical_constants->const_newton_G;
   unit_info.hubbleunit = e->cosmology->H0 / e->cosmology->h;
 
-  message("Length conversion factor: %e", unit_info.lengthtokpc);
-  message("Velocity conversion factor: %e", unit_info.velocitytokms);
-  message("Mass conversion factor: %e", unit_info.masstosolarmass);
-  message("Potential conversion factor: %e", unit_info.energyperunitmass);
-  message("G: %e", unit_info.gravity);
-  message("H: %e", unit_info.hubbleunit);
-
-  /* TODO: Find the total number of DM particles when running with star
-   * particles and BHs. */
-  const int total_nr_dmparts = e->total_nr_gparts - e->total_nr_parts;
+  /* Gather some information about the simulation */
+  struct siminfo sim_info;
 
-  /* Set simulation information. */
-  if (e->s->periodic) {
-    sim_info.period =
-        unit_info.lengthtokpc *
-        s->dim[0]; /* Physical size of box in VELOCIraptor units (kpc). */
-  } else
-    sim_info.period = 0.0;
-  sim_info.zoomhigresolutionmass = -1.0; /* Placeholder. */
-  sim_info.interparticlespacing = sim_info.period / cbrt(total_nr_dmparts);
-  if (e->policy & engine_policy_cosmology)
+  /* Are we running with cosmology? */
+  if (e->policy & engine_policy_cosmology) {
     sim_info.icosmologicalsim = 1;
-  else
+  } else {
     sim_info.icosmologicalsim = 0;
-  sim_info.spacedimension[0] = unit_info.lengthtokpc * s->dim[0];
-  sim_info.spacedimension[1] = unit_info.lengthtokpc * s->dim[1];
-  sim_info.spacedimension[2] = unit_info.lengthtokpc * s->dim[2];
-  sim_info.numcells = s->nr_cells;
-
-  sim_info.cellwidth[0] = unit_info.lengthtokpc * s->cells_top[0].width[0];
-  sim_info.cellwidth[1] = unit_info.lengthtokpc * s->cells_top[0].width[1];
-  sim_info.cellwidth[2] = unit_info.lengthtokpc * s->cells_top[0].width[2];
-
-  sim_info.icellwidth[0] = s->iwidth[0] / unit_info.lengthtokpc;
-  sim_info.icellwidth[1] = s->iwidth[1] / unit_info.lengthtokpc;
-  sim_info.icellwidth[2] = s->iwidth[2] / unit_info.lengthtokpc;
-
-  /* Only allocate cell location array on first call to velociraptor_init(). */
-  if (e->cell_loc == NULL) {
-    /* Allocate and populate top-level cell locations. */
-    if (posix_memalign((void **)&(e->cell_loc), 32,
-                       s->nr_cells * sizeof(struct cell_loc)) != 0)
-      error("Failed to allocate top-level cell locations for VELOCIraptor.");
-
-    for (int i = 0; i < s->nr_cells; i++) {
-      e->cell_loc[i].loc[0] = unit_info.lengthtokpc * s->cells_top[i].loc[0];
-      e->cell_loc[i].loc[1] = unit_info.lengthtokpc * s->cells_top[i].loc[1];
-      e->cell_loc[i].loc[2] = unit_info.lengthtokpc * s->cells_top[i].loc[2];
-    }
   }
-
-  sim_info.cell_loc = e->cell_loc;
-
-  char configfilename[PARSER_MAX_LINE_SIZE],
-      outputFileName[PARSER_MAX_LINE_SIZE + 128];
-  parser_get_param_string(e->parameter_file,
-                          "StructureFinding:config_file_name", configfilename);
-  snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s.VELOCIraptor",
-           e->stfBaseName);
-
-  message("Config file name: %s", configfilename);
-  message("Period: %e", sim_info.period);
-  message("Zoom high res mass: %e", sim_info.zoomhigresolutionmass);
-  message("Inter-particle spacing: %e", sim_info.interparticlespacing);
-  message("Cosmological: %d", sim_info.icosmologicalsim);
-  message("Space dimensions: (%e,%e,%e)", sim_info.spacedimension[0],
-          sim_info.spacedimension[1], sim_info.spacedimension[2]);
-  message("No. of top-level cells: %d", sim_info.numcells);
-  message("Top-level cell locations range: (%e,%e,%e) -> (%e,%e,%e)",
-          sim_info.cell_loc[0].loc[0], sim_info.cell_loc[0].loc[1],
-          sim_info.cell_loc[0].loc[2],
-          sim_info.cell_loc[sim_info.numcells - 1].loc[0],
-          sim_info.cell_loc[sim_info.numcells - 1].loc[1],
-          sim_info.cell_loc[sim_info.numcells - 1].loc[2]);
+  sim_info.izoomsim = 0;
+
+  /* Tell VELOCIraptor what we have in the simulation */
+  sim_info.idarkmatter = (e->total_nr_gparts - e->total_nr_parts > 0);
+  sim_info.igas = (e->policy & engine_policy_hydro);
+  sim_info.istar = (e->policy & engine_policy_stars);
+  sim_info.ibh = 0;  // sim_info.ibh = (e->policy&engine_policy_bh);
+  sim_info.iother = 0;
+
+  /* Be nice, talk! */
+  if (e->verbose) {
+    message("VELOCIraptor conf: Length conversion factor: %e",
+            unit_info.lengthtokpc);
+    message("VELOCIraptor conf: Velocity conversion factor: %e",
+            unit_info.velocitytokms);
+    message("VELOCIraptor conf: Mass conversion factor: %e",
+            unit_info.masstosolarmass);
+    message("VELOCIraptor conf: Internal energy conversion factor: %e",
+            unit_info.energyperunitmass);
+    message("VELOCIraptor conf: G: %e", unit_info.gravity);
+    message("VELOCIraptor conf: H0/h: %e", unit_info.hubbleunit);
+    message("VELOCIraptor conf: Config file name: %s", e->stf_config_file_name);
+    message("VELOCIraptor conf: Cosmological Simulation: %d",
+            sim_info.icosmologicalsim);
+  }
 
   /* Initialise VELOCIraptor. */
-  if (!InitVelociraptor(configfilename, outputFileName, cosmo_info, unit_info,
-                        sim_info))
-    error("Exiting. VELOCIraptor initialisation failed.");
+  if (InitVelociraptor(e->stf_config_file_name, unit_info, sim_info,
+                       e->nr_threads) != 1)
+    error("VELOCIraptor initialisation failed.");
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 #else
   error("SWIFT not configure to run with VELOCIraptor.");
 #endif /* HAVE_VELOCIRAPTOR */
@@ -250,118 +272,287 @@ void velociraptor_init(struct engine *e) {
  * @brief Run VELOCIraptor with current particle data.
  *
  * @param e The #engine.
- *
+ * @param linked_with_snap Are we running at the same time as a snapshot dump?
  */
-void velociraptor_invoke(struct engine *e) {
+void velociraptor_invoke(struct engine *e, const int linked_with_snap) {
 
 #ifdef HAVE_VELOCIRAPTOR
-  struct space *s = e->s;
-  struct gpart *gparts = s->gparts;
-  struct part *parts = s->parts;
-  struct xpart *xparts = s->xparts;
+
+  const struct cosmology *cosmo = e->cosmology;
+  const struct hydro_props *hydro_props = e->hydro_properties;
+  const struct unit_system *us = e->internal_units;
+  const struct phys_const *phys_const = e->physical_constants;
+  const struct cooling_function_data *cool_func = e->cooling_func;
+
+  /* Handle on the particles */
+  const struct space *s = e->s;
+  const struct part *parts = s->parts;
+  const struct xpart *xparts = s->xparts;
+  const struct gpart *gparts = s->gparts;
+  const struct spart *sparts = s->sparts;
   const size_t nr_gparts = s->nr_gparts;
-  const size_t nr_hydro_parts = s->nr_parts;
+  const size_t nr_parts = s->nr_parts;
+  const size_t nr_sparts = s->nr_sparts;
   const int nr_cells = s->nr_cells;
-  int *cell_node_ids = NULL;
-  static int stf_output_count = 0;
+
+  const ticks tic = getticks();
 
   /* Allow thread to run on any core for the duration of the call to
-   * VELOCIraptor so that
-   * when OpenMP threads are spawned they can run on any core on the processor.
-   */
+   * VELOCIraptor so that  when OpenMP threads are spawned
+   * they can run on any core on the processor. */
   const int nr_cores = sysconf(_SC_NPROCESSORS_ONLN);
-  cpu_set_t cpuset;
   pthread_t thread = pthread_self();
 
   /* Set affinity mask to include all cores on the CPU for VELOCIraptor. */
+  cpu_set_t cpuset;
   CPU_ZERO(&cpuset);
   for (int j = 0; j < nr_cores; j++) CPU_SET(j, &cpuset);
-
   pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
 
-  ticks tic = getticks();
+  /* Set cosmology information for this point in time */
+  struct cosmoinfo cosmo_info;
+  cosmo_info.atime = e->cosmology->a;
+  cosmo_info.littleh = e->cosmology->h;
+  cosmo_info.Omega_m = e->cosmology->Omega_m;
+  cosmo_info.Omega_b = e->cosmology->Omega_b;
+  cosmo_info.Omega_r = e->cosmology->Omega_r;
+  cosmo_info.Omega_k = e->cosmology->Omega_k;
+  cosmo_info.Omega_nu = 0.;
+  cosmo_info.Omega_Lambda = e->cosmology->Omega_lambda;
+  cosmo_info.Omega_cdm = e->cosmology->Omega_m - e->cosmology->Omega_b;
+  cosmo_info.w_de = e->cosmology->w;
+
+  /* Report the cosmo info we use */
+  if (e->verbose) {
+    message("VELOCIraptor conf: Scale factor: %e", cosmo_info.atime);
+    message("VELOCIraptor conf: Little h: %e", cosmo_info.littleh);
+    message("VELOCIraptor conf: Omega_m: %e", cosmo_info.Omega_m);
+    message("VELOCIraptor conf: Omega_b: %e", cosmo_info.Omega_b);
+    message("VELOCIraptor conf: Omega_Lambda: %e", cosmo_info.Omega_Lambda);
+    message("VELOCIraptor conf: Omega_cdm: %e", cosmo_info.Omega_cdm);
+    message("VELOCIraptor conf: w_de: %e", cosmo_info.w_de);
+  }
+
+  /* Update the simulation information */
+  struct siminfo sim_info;
+
+  /* Period of the box (Note we assume a cubic box!) */
+  if (e->s->periodic) {
+    sim_info.period = s->dim[0];
+  } else {
+    sim_info.period = 0.0;
+  }
+
+  /* Tell VELOCIraptor this is not a zoom-in simulation */
+  sim_info.zoomhigresolutionmass = -1.0;
+
+  /* Are we running with cosmology? */
+  if (e->policy & engine_policy_cosmology) {
+    sim_info.icosmologicalsim = 1;
+    sim_info.izoomsim = 0;
+    const size_t total_nr_baryons = e->total_nr_parts + e->total_nr_sparts;
+    const size_t total_nr_dmparts = e->total_nr_gparts - total_nr_baryons;
+    sim_info.interparticlespacing = sim_info.period / cbrt(total_nr_dmparts);
+  } else {
+    sim_info.icosmologicalsim = 0;
+    sim_info.izoomsim = 0;
+    sim_info.interparticlespacing = -1;
+  }
+
+  /* Set the spatial extent of the simulation volume */
+  sim_info.spacedimension[0] = s->dim[0];
+  sim_info.spacedimension[1] = s->dim[1];
+  sim_info.spacedimension[2] = s->dim[2];
+
+  /* Store number of top-level cells */
+  sim_info.numcells = s->nr_cells;
+
+  /* Size and inverse size of the top-level cells in VELOCIraptor units */
+  sim_info.cellwidth[0] = s->cells_top[0].width[0];
+  sim_info.cellwidth[1] = s->cells_top[0].width[1];
+  sim_info.cellwidth[2] = s->cells_top[0].width[2];
+  sim_info.icellwidth[0] = s->iwidth[0];
+  sim_info.icellwidth[1] = s->iwidth[1];
+  sim_info.icellwidth[2] = s->iwidth[2];
+
+  /* Copy the poisiton of the top-level cells */
+  if (posix_memalign((void **)&sim_info.cell_loc, 32,
+                     s->nr_cells * sizeof(struct cell_loc)) != 0)
+    error("Failed to allocate top-level cell locations for VELOCIraptor.");
+  for (int i = 0; i < s->nr_cells; i++) {
+    sim_info.cell_loc[i].loc[0] = s->cells_top[i].loc[0];
+    sim_info.cell_loc[i].loc[1] = s->cells_top[i].loc[1];
+    sim_info.cell_loc[i].loc[2] = s->cells_top[i].loc[2];
+  }
+
+  if (e->verbose) {
+    message("VELOCIraptor conf: Space dimensions: (%e,%e,%e)",
+            sim_info.spacedimension[0], sim_info.spacedimension[1],
+            sim_info.spacedimension[2]);
+    message("VELOCIraptor conf: No. of top-level cells: %d", sim_info.numcells);
+    message(
+        "VELOCIraptor conf: Top-level cell locations range: (%e,%e,%e) -> "
+        "(%e,%e,%e)",
+        sim_info.cell_loc[0].loc[0], sim_info.cell_loc[0].loc[1],
+        sim_info.cell_loc[0].loc[2],
+        sim_info.cell_loc[sim_info.numcells - 1].loc[0],
+        sim_info.cell_loc[sim_info.numcells - 1].loc[1],
+        sim_info.cell_loc[sim_info.numcells - 1].loc[2]);
+  }
 
   /* Allocate and populate array of cell node IDs. */
+  int *cell_node_ids = NULL;
   if (posix_memalign((void **)&cell_node_ids, 32, nr_cells * sizeof(int)) != 0)
     error("Failed to allocate list of cells node IDs for VELOCIraptor.");
-
   for (int i = 0; i < nr_cells; i++) cell_node_ids[i] = s->cells_top[i].nodeID;
 
-  message("MPI rank %d sending %zu gparts to VELOCIraptor.", engine_rank,
-          nr_gparts);
+  /* Mention the number of particles being sent */
+  if (e->verbose)
+    message(
+        "VELOCIraptor conf: MPI rank %d sending %zu gparts to VELOCIraptor.",
+        engine_rank, nr_gparts);
 
-  /* Append base name with either the step number or time depending on what
-   * format is specified in the parameter file. */
+  /* Append base name with the current output number */
   char outputFileName[PARSER_MAX_LINE_SIZE + 128];
-  if (e->stf_output_freq_format == io_stf_steps) {
-    snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04i.VELOCIraptor",
-             e->stfBaseName, e->step);
-  } else if (e->stf_output_freq_format == io_stf_time) {
+
+  /* What should the filename be? */
+  if (linked_with_snap) {
+    snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128,
+             "stf_%s_%04i.VELOCIraptor", e->snapshot_base_name,
+             e->snapshot_output_count);
+  } else {
     snprintf(outputFileName, PARSER_MAX_LINE_SIZE + 128, "%s_%04i.VELOCIraptor",
-             e->stfBaseName, stf_output_count);
+             e->stf_base_name, e->stf_output_count);
+  }
+
+  /* What is the snapshot number? */
+  int snapnum;
+  if (linked_with_snap) {
+    snapnum = e->snapshot_output_count;
+  } else {
+    snapnum = e->stf_output_count;
   }
 
   /* Allocate and populate an array of swift_vel_parts to be passed to
    * VELOCIraptor. */
   struct swift_vel_part *swift_parts = NULL;
-
   if (posix_memalign((void **)&swift_parts, part_align,
                      nr_gparts * sizeof(struct swift_vel_part)) != 0)
     error("Failed to allocate array of particles for VELOCIraptor.");
 
-  bzero(swift_parts, nr_gparts * sizeof(struct swift_vel_part));
-
-  const float energy_scale = 1.0;
-  const float a = e->cosmology->a;
-
-  message("Energy scaling factor: %f", energy_scale);
-  message("a: %f", a);
+  const float a_inv = e->cosmology->a_inv;
 
-  /* Convert particle properties into VELOCIraptor units */
+  /* Convert particle properties into VELOCIraptor units.
+   * VELOCIraptor wants:
+   * - Co-moving positions,
+   * - Peculiar velocities,
+   * - Co-moving potential,
+   * - Physical internal energy (for the gas),
+   * - Temperatures (for the gas).
+   */
   for (size_t i = 0; i < nr_gparts; i++) {
+
     swift_parts[i].x[0] = gparts[i].x[0];
     swift_parts[i].x[1] = gparts[i].x[1];
     swift_parts[i].x[2] = gparts[i].x[2];
-    swift_parts[i].v[0] = gparts[i].v_full[0] / a;
-    swift_parts[i].v[1] = gparts[i].v_full[1] / a;
-    swift_parts[i].v[2] = gparts[i].v_full[2] / a;
+
+    swift_parts[i].v[0] = gparts[i].v_full[0] * a_inv;
+    swift_parts[i].v[1] = gparts[i].v_full[1] * a_inv;
+    swift_parts[i].v[2] = gparts[i].v_full[2] * a_inv;
+
     swift_parts[i].mass = gravity_get_mass(&gparts[i]);
     swift_parts[i].potential = gravity_get_comoving_potential(&gparts[i]);
+
     swift_parts[i].type = gparts[i].type;
 
+    swift_parts[i].index = i;
+#ifdef WITH_MPI
+    swift_parts[i].task = e->nodeID;
+#else
+    swift_parts[i].task = 0;
+#endif
+
     /* Set gas particle IDs from their hydro counterparts and set internal
      * energies. */
-    if (gparts[i].type == swift_type_gas) {
-      swift_parts[i].id = parts[-gparts[i].id_or_neg_offset].id;
-      swift_parts[i].u =
-          hydro_get_physical_internal_energy(
-              &parts[-gparts[i].id_or_neg_offset],
-              &xparts[-gparts[i].id_or_neg_offset], e->cosmology) *
-          energy_scale;
-    } else if (gparts[i].type == swift_type_dark_matter) {
-      swift_parts[i].id = gparts[i].id_or_neg_offset;
-      swift_parts[i].u = 0.f;
-    } else {
-      error("Particle type not handled by velociraptor (yet?) !");
+    switch (gparts[i].type) {
+
+      case swift_type_gas: {
+        const struct part *p = &parts[-gparts[i].id_or_neg_offset];
+        const struct xpart *xp = &xparts[-gparts[i].id_or_neg_offset];
+
+        swift_parts[i].id = parts[-gparts[i].id_or_neg_offset].id;
+        swift_parts[i].u = hydro_get_drifted_physical_internal_energy(p, cosmo);
+        swift_parts[i].T = cooling_get_temperature(phys_const, hydro_props, us,
+                                                   cosmo, cool_func, p, xp);
+      } break;
+
+      case swift_type_stars:
+
+        swift_parts[i].id = sparts[-gparts[i].id_or_neg_offset].id;
+        swift_parts[i].u = 0.f;
+        swift_parts[i].T = 0.f;
+        break;
+
+      case swift_type_dark_matter:
+
+        swift_parts[i].id = gparts[i].id_or_neg_offset;
+        swift_parts[i].u = 0.f;
+        swift_parts[i].T = 0.f;
+        break;
+
+      default:
+        error("Particle type not handled by VELOCIraptor.");
     }
   }
 
+  /* Values returned by VELOCIRaptor */
+  int num_gparts_in_groups = -1;
+  struct groupinfo *group_info = NULL;
+
   /* Call VELOCIraptor. */
-  if (!InvokeVelociraptor(nr_gparts, nr_hydro_parts, swift_parts, cell_node_ids,
-                          outputFileName))
+  group_info = (struct groupinfo *)InvokeVelociraptor(
+      snapnum, outputFileName, cosmo_info, sim_info, nr_gparts, nr_parts,
+      nr_sparts, swift_parts, cell_node_ids, e->nr_threads, linked_with_snap,
+      &num_gparts_in_groups);
+
+  /* Check that the ouput is valid */
+  if (linked_with_snap && group_info == NULL && num_gparts_in_groups < 0) {
     error("Exiting. Call to VELOCIraptor failed on rank: %d.", e->nodeID);
+  }
+  if (!linked_with_snap && group_info != NULL) {
+    error("VELOCIraptor returned an array whilst it should not have.");
+  }
+
+  /* Assign the group IDs back to the gparts */
+  if (linked_with_snap) {
+
+    if (posix_memalign((void **)&s->gpart_group_data, part_align,
+                       nr_gparts * sizeof(struct velociraptor_gpart_data)) != 0)
+      error("Failed to allocate array of gpart data for VELOCIraptor i/o.");
+
+    struct velociraptor_gpart_data *data = s->gpart_group_data;
+
+    /* Zero the array (gparts not in groups have an ID of 0) */
+    bzero(data, nr_gparts * sizeof(struct velociraptor_gpart_data));
+
+    /* Copy the data at the right place */
+    for (int i = 0; i < num_gparts_in_groups; i++) {
+      data[group_info[i].index].groupID = group_info[i].groupID;
+    }
+
+    /* Free the array returned by VELOCIraptor */
+    free(group_info);
+  }
 
   /* Reset the pthread affinity mask after VELOCIraptor returns. */
   pthread_setaffinity_np(thread, sizeof(cpu_set_t), engine_entry_affinity());
 
-  /* Free cell node ids after VELOCIraptor has copied them. */
-  free(cell_node_ids);
-  free(swift_parts);
-
-  stf_output_count++;
+  /* Increase output counter (if not linked with snapshots) */
+  if (!linked_with_snap) e->stf_output_count++;
 
-  message("VELOCIraptor took %.3f %s on rank %d.",
-          clocks_from_ticks(getticks() - tic), clocks_getunit(), engine_rank);
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 #else
   error("SWIFT not configure to run with VELOCIraptor.");
 #endif /* HAVE_VELOCIRAPTOR */
diff --git a/src/velociraptor_interface.h b/src/velociraptor_interface.h
index 1f29be11c9dd8e267c87201b0a438979fec3775b..2547fa56c1677e93b1c59a1435e9a6ab92c1f308 100644
--- a/src/velociraptor_interface.h
+++ b/src/velociraptor_interface.h
@@ -22,19 +22,11 @@
 /* Config parameters. */
 #include "../config.h"
 
-/**
- * @brief The different formats for when to run structure finding.
- */
-enum io_stf_output_format {
-  io_stf_steps = 0, /*!< Output every N steps */
-  io_stf_time       /*!< Output at fixed time intervals */
-};
-
 /* Forward declaration */
 struct engine;
 
 /* VELOCIraptor wrapper functions. */
 void velociraptor_init(struct engine *e);
-void velociraptor_invoke(struct engine *e);
+void velociraptor_invoke(struct engine *e, const int linked_with_snap);
 
 #endif /* SWIFT_VELOCIRAPTOR_INTERFACE_H */
diff --git a/src/velociraptor_io.h b/src/velociraptor_io.h
new file mode 100644
index 0000000000000000000000000000000000000000..f18398219bfbc5cd6bb58a37b103f29527fa5589
--- /dev/null
+++ b/src/velociraptor_io.h
@@ -0,0 +1,78 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_VELOCIRAPTOR_IO_H
+#define SWIFT_VELOCIRAPTOR_IO_H
+
+/* Config parameters. */
+#include "../config.h"
+
+INLINE static void velociraptor_convert_part_groupID(const struct engine* e,
+                                                     const struct part* p,
+                                                     const struct xpart* xp,
+                                                     long long* ret) {
+  if (p->gpart == NULL)
+    ret[0] = 0.f;
+  else {
+    const ptrdiff_t offset = p->gpart - e->s->gparts;
+    *ret = (e->s->gpart_group_data + offset)->groupID;
+  }
+}
+
+INLINE static void velociraptor_convert_spart_groupID(const struct engine* e,
+                                                      const struct spart* sp,
+                                                      long long* ret) {
+  if (sp->gpart == NULL)
+    ret[0] = 0.f;
+  else {
+    const ptrdiff_t offset = sp->gpart - e->s->gparts;
+    *ret = (e->s->gpart_group_data + offset)->groupID;
+  }
+}
+
+__attribute__((always_inline)) INLINE static int velociraptor_write_parts(
+    const struct part* parts, const struct xpart* xparts,
+    struct io_props* list) {
+
+  list[0] = io_make_output_field_convert_part(
+      "GroupID", LONGLONG, 1, UNIT_CONV_NO_UNITS, parts, xparts,
+      velociraptor_convert_part_groupID);
+
+  return 1;
+}
+
+__attribute__((always_inline)) INLINE static int velociraptor_write_gparts(
+    const struct velociraptor_gpart_data* group_data, struct io_props* list) {
+
+  list[0] = io_make_output_field("GroupID", LONGLONG, 1, UNIT_CONV_NO_UNITS,
+                                 group_data, groupID);
+
+  return 1;
+}
+
+__attribute__((always_inline)) INLINE static int velociraptor_write_sparts(
+    const struct spart* sparts, struct io_props* list) {
+
+  list[0] = io_make_output_field_convert_spart(
+      "GroupID", LONGLONG, 1, UNIT_CONV_NO_UNITS, sparts,
+      velociraptor_convert_spart_groupID);
+
+  return 1;
+}
+
+#endif /* SWIFT_VELOCIRAPTOR_IO_H */
diff --git a/src/velociraptor_struct.h b/src/velociraptor_struct.h
new file mode 100644
index 0000000000000000000000000000000000000000..b998263a6ba2fe0aaa6552f274cb8f4ee85d3b1c
--- /dev/null
+++ b/src/velociraptor_struct.h
@@ -0,0 +1,34 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2019 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_VELOCIRAPTOR_STRUCT_H
+#define SWIFT_VELOCIRAPTOR_STRUCT_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/**
+ * @brief Data returned by VELOCIraptor for each #gpart.
+ */
+struct velociraptor_gpart_data {
+
+  /*! Group ID of that #gpart. */
+  long long groupID;
+};
+
+#endif /* SWIFT_VELOCIRAPTOR_STRUCT_H */
diff --git a/tests/test125cells.c b/tests/test125cells.c
index 5a9c4ea9511b5d75a3098f7997b83607cdcbd715..5b518970ea118c98a8354e816f86ecd16a5f85cf 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -459,11 +459,10 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
 /* Just a forward declaration... */
 void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
                                    struct cell *cj);
-void runner_doself1_density(struct runner *r, struct cell *ci);
+void runner_doself1_branch_density(struct runner *r, struct cell *ci);
 void runner_dopair2_branch_force(struct runner *r, struct cell *ci,
                                  struct cell *cj);
-void runner_doself2_force(struct runner *r, struct cell *ci);
-void runner_doself2_force_vec(struct runner *r, struct cell *ci);
+void runner_doself2_branch_force(struct runner *r, struct cell *ci);
 
 /* And go... */
 int main(int argc, char *argv[]) {
@@ -707,7 +706,7 @@ int main(int argc, char *argv[]) {
 
     /* And now the self-interaction for the central cells*/
     for (int j = 0; j < 27; ++j)
-      runner_doself1_density(&runner, inner_cells[j]);
+      runner_doself1_branch_density(&runner, inner_cells[j]);
 
     /* Ghost to finish everything on the central cells */
     for (int j = 0; j < 27; ++j) runner_do_ghost(&runner, inner_cells[j], 0);
@@ -745,7 +744,7 @@ int main(int argc, char *argv[]) {
     ticks self_tic = getticks();
 
     /* And now the self-interaction for the main cell */
-    runner_doself2_force(&runner, main_cell);
+    runner_doself2_branch_force(&runner, main_cell);
 
     timings[26] += getticks() - self_tic;
 
diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat
index 9987f8a0703a6106f41b73c1a16b4cea8af3bc1e..95f5f78246a82b7c326c87f9b4edbac4f51c65e9 100644
--- a/tests/tolerance_125_perturbed.dat
+++ b/tests/tolerance_125_perturbed.dat
@@ -1,4 +1,4 @@
 #   ID    pos_x    pos_y    pos_z      v_x      v_y      v_z        h      rho    div_v        S        u        P        c      a_x      a_y      a_z     h_dt    v_sig    dS/dt    du/dt
     0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 1e-4	  1e-4	   1e-4	   1e-4	   1e-4	    1e-4     1e-4
-    0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 2.3e-3	  2e-3	   2e-3	   1e-4	   1e-4	    1e-4     1e-4
+    0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 3.6e-3	  2e-3	   2e-3	   1e-4	   1e-4	    1e-4     1e-4
     0	  1e-6	   1e-6	    1e-6       1e-6	1e-6	 1e-6	    1e-6   1e-6	  1e-6	       1e-6	1e-6	 1e-6	  1e-6	 2e-4	  2e-4	   2e-4	   1e-6	   1e-6	    1e-6     1e-6
diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py
index f2f198dfb80d6373e63296b6350fe6768191dd39..a2c3dd0f201fc47518d6bb0a6a918627db2f3e96 100755
--- a/tools/analyse_runtime.py
+++ b/tools/analyse_runtime.py
@@ -53,92 +53,52 @@ threshold = 0.008
 num_files = len(sys.argv) - 1
 
 labels = [
-    "Gpart assignment",
-    "Mesh comunication",
-    "Forward Fourier transform",
-    "Green function",
-    "Backwards Fourier transform",
-    "engine_recompute_displacement_constraint:",
-    "engine_exchange_top_multipoles:",
-    "updating particle counts",
-    "Making gravity tasks",
-    "Making hydro tasks",
-    "Splitting tasks",
-    "Counting and linking tasks",
-    "Setting super-pointers",
-    "Making extra hydroloop tasks",
-    "Making extra starsloop tasks",
-    "Linking gravity tasks",
-    "Creating send tasks",
-    "Exchanging cell tags",
-    "Creating recv tasks",
-    "Setting unlocks",
-    "Ranking the tasks",
-    "scheduler_reweight:",
-    "space_list_useful_top_level_cells:",
-    "space_rebuild:",
-    "engine_drift_all:",
-    "engine_unskip:",
-    "engine_collect_end_of_step:",
-    "engine_launch:",
-    "writing particle properties",
-    "engine_repartition:",
-    "engine_exchange_cells:",
-    "Dumping restart files",
-    "engine_print_stats:",
-    "engine_marktasks:",
-    "Reading initial conditions",
-    "engine_print_task_counts:",
-    "engine_drift_top_multipoles:",
-    "Communicating rebuild flag",
-    "engine_split:",
-    "space_init",
-    "engine_init",
-    "engine_repartition_trigger:"
-]
-is_rebuild = [
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    1,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-    1,
-    0,
-    0,
-    1,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0
+    ["Gpart assignment", 1],
+    ["Mesh comunication", 1],
+    ["Forward Fourier transform", 1],
+    ["Green function", 1],
+    ["Backwards Fourier transform", 1],
+    ["engine_recompute_displacement_constraint:", 1],
+    ["engine_exchange_top_multipoles:", 1],
+    ["updating particle counts", 1],
+    ["engine_estimate_nr_tasks:", 1],
+    ["Making gravity tasks", 1],
+    ["Making hydro tasks", 1],
+    ["Splitting tasks", 1],
+    ["Counting and linking tasks", 1],
+    ["Setting super-pointers", 1],
+    ["Making extra hydroloop tasks", 1],
+    ["Making extra starsloop tasks", 1],
+    ["Linking gravity tasks", 1],
+    ["Creating send tasks", 1],
+    ["Exchanging cell tags", 1],
+    ["Creating recv tasks", 1],
+    ["Counting number of foreign particles", 1],
+    ["Recursively linking foreign arrays", 1],
+    ["Setting unlocks", 1],
+    ["Ranking the tasks", 1],
+    ["scheduler_reweight:", 1],
+    ["space_list_useful_top_level_cells:", 1],
+    ["space_rebuild:", 1],
+    ["engine_drift_all:", 0],
+    ["engine_unskip:", 0],
+    ["engine_collect_end_of_step:", 0],
+    ["engine_launch:", 0],
+    ["writing particle properties", 0],
+    ["engine_repartition:", 0],
+    ["engine_exchange_cells:", 1],
+    ["Dumping restart files", 0],
+    ["engine_print_stats:", 0],
+    ["engine_marktasks:", 1],
+    ["Reading initial conditions", 0],
+    ["engine_print_task_counts:", 0],
+    ["engine_drift_top_multipoles:", 0],
+    ["Communicating rebuild flag", 0],
+    ["engine_split:", 0],
+    ["space_init", 0],
+    ["engine_init", 0],
+    ["engine_repartition_trigger:", 0],
+    ["velociraptor_invoke:", 0]
 ]
 times = np.zeros(len(labels))
 counts = np.zeros(len(labels))
@@ -178,20 +138,20 @@ for i in range(num_files):
         for i in range(len(labels)):
 
             # Extract the different blocks
-            if re.search("%s took" % labels[i], line):
+            if re.search("%s took" % labels[i][0], line):
                 counts[i] += 1.0
                 times[i] += float(
                     re.findall(r"[+-]?((\d+\.?\d*)|(\.\d+))", line)[-1][0]
                 )
 
-        # Find the last line with meaningful output (avoid crash report, batch system stuf....)
+        # Find the last line with meaningful output (avoid crash report, batch system stuff....)
         if re.findall(r"\[[0-9]{4}\][ ]\[*", line) or re.findall(
             r"^\[[0-9]*[.][0-9]+\][ ]", line
         ):
             lastline = line
 
     # Total run time
-    total_time += float(re.findall(r"[+-]?([0-9]*[.])?[0-9]+", lastline)[1])
+    total_time += float(re.findall(r"[+-]?(\[[0-9]\])?(\[[0-9]*[.][0-9]*\])+", lastline)[0][1][1:-1])
 
 # Conver to seconds
 times /= 1000.0
@@ -207,35 +167,33 @@ time_ratios = times / total_time
 
 # Better looking labels
 for i in range(len(labels)):
-    labels[i] = labels[i].replace("_", " ")
-    labels[i] = labels[i].replace(":", "")
-    labels[i] = labels[i].title()
+    labels[i][0] = labels[i][0].replace("_", " ")
+    labels[i][0] = labels[i][0].replace(":", "")
+    labels[i][0] = labels[i][0].title()
 
 times = np.array(times)
 time_ratios = np.array(time_ratios)
-is_rebuild = np.array(is_rebuild)
 
 # Sort in order of importance
 order = np.argsort(-times)
 times = times[order]
 counts = counts[order]
 time_ratios = time_ratios[order]
-is_rebuild = is_rebuild[order]
-labels = np.take(labels, order)
+labels = [labels[i] for i in order]
 
 # Keep only the important components
 important_times = [0.0]
 important_ratios = [0.0]
-important_labels = ["Others (all below %.1f\%%)" % (threshold * 100)]
 important_is_rebuild = [0]
+important_labels = ["Others (all below %.1f\%%)" % (threshold * 100)]
 need_print = True
 print("Time spent in the different code sections:")
 for i in range(len(labels)):
     if time_ratios[i] > threshold:
         important_times.append(times[i])
         important_ratios.append(time_ratios[i])
-        important_labels.append(labels[i])
-        important_is_rebuild.append(is_rebuild[i])
+        important_is_rebuild.append(labels[i][1])
+        important_labels.append(labels[i][0])
     else:
         if need_print:
             print("Elements in 'Other' category (<%.1f%%):" % (threshold * 100))
@@ -243,7 +201,7 @@ for i in range(len(labels)):
         important_times[0] += times[i]
         important_ratios[0] += time_ratios[i]
 
-    print(" - '%-40s' (%5d calls, time: %.4fs): %.4f%%" % (labels[i], counts[i], times[i], time_ratios[i] * 100))
+    print(" - '%-40s' (%5d calls, time: %.4fs): %.4f%%" % (labels[i][0], counts[i], times[i], time_ratios[i] * 100))
 
 # Anything unaccounted for?
 print(
@@ -254,8 +212,8 @@ print(
 important_ratios = np.array(important_ratios)
 important_is_rebuild = np.array(important_is_rebuild)
 
-figure()
 
+figure()
 
 def func(pct):
     return "$%4.2f\\%%$" % pct
diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py
index 5738ca068c215a78c6fb4ef2524ce3d73565633e..e897424a95be8937073bd16adf108fa4fa1456ad 100755
--- a/tools/task_plots/analyse_tasks.py
+++ b/tools/task_plots/analyse_tasks.py
@@ -82,6 +82,7 @@ TASKTYPES = [
     "kick1",
     "kick2",
     "timestep",
+    "timestep_limiter",
     "send",
     "recv",
     "grav_long_range",
@@ -104,6 +105,7 @@ SUBTYPES = [
     "density",
     "gradient",
     "force",
+    "limiter",
     "grav",
     "external_grav",
     "tend",
diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py
index 82dc882becfc2a7a8a537b822aceb8d9d226792d..12fd4d241a268c9d45fd72f5cdda2727221ba94d 100755
--- a/tools/task_plots/plot_tasks.py
+++ b/tools/task_plots/plot_tasks.py
@@ -167,6 +167,7 @@ TASKTYPES = [
     "kick1",
     "kick2",
     "timestep",
+    "timestep_limiter",
     "send",
     "recv",
     "grav_long_range",
@@ -189,6 +190,7 @@ SUBTYPES = [
     "density",
     "gradient",
     "force",
+    "limiter",
     "grav",
     "external_grav",
     "tend",
@@ -204,15 +206,23 @@ SUBTYPES = [
 
 #  Task/subtypes of interest.
 FULLTYPES = [
+    "self/limiter",
     "self/force",
+    "self/gradient",
     "self/density",
     "self/grav",
+    "sub_self/limiter",
     "sub_self/force",
+    "sub_self/gradient",
     "sub_self/density",
+    "pair/limiter",
     "pair/force",
+    "pair/gradient",
     "pair/density",
     "pair/grav",
+    "sub_pair/limiter",
     "sub_pair/force",
+    "sub_pair/gradient",
     "sub_pair/density",
     "recv/xv",
     "send/xv",