diff --git a/AUTHORS b/AUTHORS
index 6f283405b69a7d3a5397916f0a3afa7f4fb54a4a..7cbdaffcf7813aae0488a4f284c789cf6f3e30d9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,12 +2,13 @@ Pedro Gonnet            gonnet@google.com
 Matthieu Schaller       matthieu.schaller@durham.ac.uk
 Aidan Chalk             aidan.chalk@durham.ac.uk
 Peter W. Draper         p.w.draper@durham.ac.uk
-Bert Vandenbrouck       bert.vandenbroucke@gmail.com
+Bert Vandenbroucke      bert.vandenbroucke@gmail.com
 James S. Willis 	james.s.willis@durham.ac.uk
 John A. Regan 		john.a.regan@durham.ac.uk
 Angus Lepper		angus.lepper@ed.ac.uk
 Tom Theuns 		tom.theuns@durham.ac.uk
 Richard G. Bower	r.g.bower@durham.ac.uk
 Stefan Arridge		stefan.arridge@durham.ac.uk
-Massimiliano Culpo	massimiliano.culpo@googlemail.com
+Josh Borrow             joshua.borrow@durham.ac.uk
+Loic Hausammann		loic.hausammann@epfl.ch
 Yves Revaz   		yves.revaz@epfl.ch
diff --git a/README b/README
index 1ac1624b6a55fad43c73a8936b1a711ff956ca4d..b9209684d65c826ce94812871495743dc8e5cfba 100644
--- a/README
+++ b/README
@@ -29,7 +29,9 @@ Valid options are:
   -G                Run with self-gravity.
   -M                Reconstruct the multipoles every time-step.
   -n          {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop.
+  -o          {str} Generate a default output parameter file.
   -P  {sec:par:val} Set parameter value and overwrites values read from the parameters file. Can be used more than once.
+  -r                Continue using restart files.
   -s                Run with hydrodynamics.
   -S                Run with stars.
   -t          {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified.
@@ -37,6 +39,7 @@ Valid options are:
   -v           [12] Increase the level of verbosity:
                     1: MPI-rank 0 writes,
                     2: All MPI-ranks write.
+  -x                Run with structure finding.
   -y          {int} Time-step frequency at which task graphs are dumped.
   -Y          {int} Time-step frequency at which threadpool tasks are dumped.
   -h                Print this help message and exit.
diff --git a/README.md b/README.md
index e9ce99f10901f4a3aa5fe93d14dea4f36a54fe34..25f8e14b5b881149270a7e7b8a14ffe9535149ef 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,9 @@ Valid options are:
   -G                Run with self-gravity.
   -M                Reconstruct the multipoles every time-step.
   -n          {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop.
+  -o          {str} Generate a default output parameter file.
   -P  {sec:par:val} Set parameter value and overwrites values read from the parameters file. Can be used more than once.
+  -r                Continue using restart files.
   -s                Run with hydrodynamics.
   -S                Run with stars.
   -t          {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified.
@@ -85,6 +87,7 @@ Valid options are:
   -v           [12] Increase the level of verbosity:
                     1: MPI-rank 0 writes,
                     2: All MPI-ranks write.
+  -x                Run with structure finding.
   -y          {int} Time-step frequency at which task graphs are dumped.
   -Y          {int} Time-step frequency at which threadpool tasks are dumped.
   -h                Print this help message and exit.
diff --git a/configure.ac b/configure.ac
index 6ec1ece5ed2fef20ceab484eff4e09fce808e635..b9416805b8071e3ad9aaceb6a0a75c27e2a25b2a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -270,6 +270,18 @@ elif test "$gravity_force_checks" != "no"; then
    AC_DEFINE_UNQUOTED([SWIFT_GRAVITY_FORCE_CHECKS], [$enableval] ,[Enable gravity brute-force checks])
 fi
 
+# Check whether we want to switch on glass making
+AC_ARG_ENABLE([glass-making],
+   [AS_HELP_STRING([--enable-glass-making],
+     [Activate the glass-making procedure by reversing the sign of gravity @<:@yes/no@:>@]
+   )],
+   [gravity_glass_making="$enableval"],
+   [gravity_glass_making="no"]
+)
+if test "$gravity_glass_making" == "yes"; then
+   AC_DEFINE([SWIFT_MAKE_GRAVITY_GLASS], 1, [Make the code run in a way to produce a glass file for gravity/cosmology])
+fi
+
 # Check if we want to zero the gravity forces for all particles below some ID.
 AC_ARG_ENABLE([no-gravity-below-id],
    [AS_HELP_STRING([--enable-no-gravity-below-id],
@@ -1360,6 +1372,7 @@ AC_CONFIG_FILES([tests/testPeriodicBCPerturbed.sh], [chmod +x tests/testPeriodic
 AC_CONFIG_FILES([tests/testInteractions.sh], [chmod +x tests/testInteractions.sh])
 AC_CONFIG_FILES([tests/testParser.sh], [chmod +x tests/testParser.sh])
 AC_CONFIG_FILES([tests/testSelectOutput.sh], [chmod +x tests/testSelectOutput.sh])
+AC_CONFIG_FILES([tests/testFormat.sh], [chmod +x tests/testFormat.sh])
 
 # Save the compilation options
 AC_DEFINE_UNQUOTED([SWIFT_CONFIG_FLAGS],["$swift_config_flags"],[Flags passed to configure])
@@ -1408,6 +1421,7 @@ AC_MSG_RESULT([
    Gravity scheme      : $with_gravity
    Multipole order     : $with_multipole_order
    No gravity below ID : $no_gravity_below_id
+   Make gravity glass  : $gravity_glass_making
    External potential  : $with_potential
 
    Cooling function   : $with_cooling
diff --git a/doc/RTD/source/GettingStarted/index.rst b/doc/RTD/source/GettingStarted/index.rst
index d15a8eee2f3b9089a1c8ee033f9aa3ee7ad92a5f..36de8ea740490c16bc9d6b69d871290e80dc2091 100644
--- a/doc/RTD/source/GettingStarted/index.rst
+++ b/doc/RTD/source/GettingStarted/index.rst
@@ -23,3 +23,4 @@ and keep on your desk.
    parameter_file
    what_about_mpi
    running_on_large_systems
+   special_modes
diff --git a/doc/RTD/source/GettingStarted/parameter_file.rst b/doc/RTD/source/GettingStarted/parameter_file.rst
index 1bf4cbd3940a104c126aa256759edc24ca996338..550040ed25ec307633d6fade81eced58ed65a254 100644
--- a/doc/RTD/source/GettingStarted/parameter_file.rst
+++ b/doc/RTD/source/GettingStarted/parameter_file.rst
@@ -19,38 +19,51 @@ In the sections ``Snapshots`` and ``Statistics``, you can specify the options ``
 The ``output_list_on`` enable or not the output list and ``output_list`` is the filename containing the output times.
 With the file header, you can choose between writing redshifts, scale factors or times.
 
-Example of file containing with times (in internal units):
-::
-   # Time
-   0.5
-   1.5
-   3.0
-   12.5
-
-Example of file with scale factors:
-::
-   # Scale Factor
-   0.1
-   0.2
-   0.3
-
-Example of file with redshift: 
-::
-   # Redshift
-   20
-   15
-   10
-   5
+Example of file containing with times (in internal units)::
+
+  # Time
+  0.5
+  1.5
+  3.0
+  12.5
+
+Example of file with scale factors::
+
+  # Scale Factor
+  0.1
+  0.2
+  0.3
+
+Example of file with redshift::
+
+  # Redshift
+  20
+  15
+  10
+  5
 
 Output Selection
 ~~~~~~~~~~~~~~~~
 
-With SWIFT, you can select the particle fields to output in snapshot using the parameter file.
-In section ``SelectOutput``, you can remove a field by adding a parameter formatted in the
-following way ``field_parttype`` where ``field`` is the name of the field that you
-want to remove (e.g. ``Masses``) and ``parttype`` is the type of particles that
-contains this field (e.g. ``Gas``, ``DM`` or ``Star``).  For a parameter, the only
-values accepted are 0 (skip this field when writing) or 1 (default, do not skip
-this field when writing).
+With SWIFT, you can select the particle fields to output in snapshot
+using the parameter file.  In section ``SelectOutput``, you can remove
+a field by adding a parameter formatted in the following way
+``field_parttype`` where ``field`` is the name of the field that you
+want to remove (e.g. ``Masses``) and ``parttype`` is the type of
+particles that contains this field (e.g. ``Gas``, ``DM`` or ``Star``).
+For a parameter, the only values accepted are 0 (skip this field when
+writing) or 1 (default, do not skip this field when writing). By
+default all fields are written.
+
+This field is mostly used to remove unnecessary output by listing them
+with 0's. A classic use-case for this feature is a DM-only simulation
+(pure n-body) where all particles have the same mass. Outputing the
+mass field in the snapshots results in extra i/o time and unnecessary
+waste of disk space. The corresponding section of the ``yaml``
+parameter file would look like this::
+
+  SelectOutput:
+    Masses_DM:   0 
 
-You can generate a ``yaml`` file containing all the possible fields with ``./swift -o output.yml``. By default, all the fields are written.
+You can generate a ``yaml`` file containing all the possible fields
+available for a given configuration of SWIFT by running ``./swift -o output.yml``. 
diff --git a/doc/RTD/source/GettingStarted/special_modes.rst b/doc/RTD/source/GettingStarted/special_modes.rst
new file mode 100644
index 0000000000000000000000000000000000000000..636fc5e5d2237a6eaf4a2f4811cd17fa3e7010f5
--- /dev/null
+++ b/doc/RTD/source/GettingStarted/special_modes.rst
@@ -0,0 +1,43 @@
+.. Special modes
+   Matthieu Schaller, 20/08/2018
+
+Special modes
+=============
+
+SWIFT comes with a few special modes of operating to perform additional tasks.
+
+Static particles
+~~~~~~~~~~~~~~~~
+
+For some test problems it is convenient to have a set of particles that do not
+perceive any gravitational forces and just act as sources for the force
+calculation. This can be achieved by configuring SWIFT with the option
+``--enable-no-gravity-below-id=N``. This will zero the *accelerations* of all
+particles with ``id`` (strictly) lower than ``N`` at every time-step. Note that
+if these particles have an initial velocity they will keep moving at that
+speed.
+
+This will also naturally set their time-step to the maximal value
+(``TimeIntegration:dt_max``) set in the parameter file.
+
+A typical use-case for this feature is to study the evolution of one particle
+orbiting a static halo made of particles. This can be used to assess the
+quality of the gravity tree and time integration. As more particles are added
+to the halo, the orbits will get closer to the analytic solution as the noise
+in the sampling of the halo is reduced.
+
+Note also that this does not affect the hydrodynamic forces. This mode is
+purely designed for gravity-only accuracy tests.
+
+Gravity glasses
+~~~~~~~~~~~~~~~
+
+For many problems in cosmology, it is important to start a simulation with no
+initial noise in the particle distribution. Such a "glass" can be created by
+starting from a random distribution of particles and running with the sign of
+gravity reversed until all the particles reach a steady state. To run SWIFT in
+this mode, configure the code with ``--enable-glass-making``.
+
+Note that this will *not* generate the initial random distribution of
+particles. An initial condition file with particles still has to be provided.
+
diff --git a/examples/ConstantCosmoVolume/plotSolution.py b/examples/ConstantCosmoVolume/plotSolution.py
index 6f772b85d76890dcdcd8f9eb4fdb53305dd26732..f77889d7cb19c230accf25290b88a321e0f59616 100644
--- a/examples/ConstantCosmoVolume/plotSolution.py
+++ b/examples/ConstantCosmoVolume/plotSolution.py
@@ -27,7 +27,7 @@ z_i = 100.           # Initial redshift
 gas_gamma = 5./3.    # Gas adiabatic index
 
 # Physical constants needed for internal energy to temperature conversion
-k_in_J_K = 1.38064852e-23
+kB_in_SI = 1.38064852e-23
 mH_in_kg = 1.6737236e-27
 
 import matplotlib
@@ -45,12 +45,12 @@ params = {'axes.labelsize': 10,
 'ytick.labelsize': 10,
 'text.usetex': True,
  'figure.figsize' : (9.90,6.45),
-'figure.subplot.left'    : 0.08,
+'figure.subplot.left'    : 0.06,
 'figure.subplot.right'   : 0.99,
 'figure.subplot.bottom'  : 0.06,
 'figure.subplot.top'     : 0.99,
-'figure.subplot.wspace'  : 0.2,
-'figure.subplot.hspace'  : 0.12,
+'figure.subplot.wspace'  : 0.21,
+'figure.subplot.hspace'  : 0.13,
 'lines.markersize' : 6,
 'lines.linewidth' : 3.,
 'text.latex.unicode': True
@@ -73,28 +73,37 @@ H_0 = sim["/Cosmology"].attrs["H0 [internal units]"][0]
 unit_length_in_cgs = sim["/Units"].attrs["Unit length in cgs (U_L)"]
 unit_mass_in_cgs = sim["/Units"].attrs["Unit mass in cgs (U_M)"]
 unit_time_in_cgs = sim["/Units"].attrs["Unit time in cgs (U_t)"]
+m_gas = sim["/PartType0/Masses"][0]
+N = sim["/Header"].attrs["NumPart_Total"][0]
 sim.close()
 
+# Expected comoving quantities
+rho_0 = N * m_gas / boxSize**3
+u_0 = kB_in_SI * T_i / (gas_gamma - 1.) / mH_in_kg
+u_0 *= 1e-6 # conversion to internal units
+u_0 *= a**(-3*(1-gas_gamma))
+S_0 = (gas_gamma - 1.) * u_0 * rho_0**(-(gas_gamma - 1.))
+
 # Mean quantities over time
-z = np.zeros(120)
-a = np.zeros(120)
-S_mean = np.zeros(120)
-S_std = np.zeros(120)
-u_mean = np.zeros(120)
-u_std = np.zeros(120)
-P_mean = np.zeros(120)
-P_std = np.zeros(120)
-rho_mean = np.zeros(120)
-rho_std = np.zeros(120)
-
-vx_mean = np.zeros(120)
-vy_mean = np.zeros(120)
-vz_mean = np.zeros(120)
-vx_std = np.zeros(120)
-vy_std = np.zeros(120)
-vz_std = np.zeros(120)
-
-for i in range(120):
+z = np.zeros(119)
+a = np.zeros(119)
+S_mean = np.zeros(119)
+S_std = np.zeros(119)
+u_mean = np.zeros(119)
+u_std = np.zeros(119)
+P_mean = np.zeros(119)
+P_std = np.zeros(119)
+rho_mean = np.zeros(119)
+rho_std = np.zeros(119)
+
+vx_mean = np.zeros(119)
+vy_mean = np.zeros(119)
+vz_mean = np.zeros(119)
+vx_std = np.zeros(119)
+vy_std = np.zeros(119)
+vz_std = np.zeros(119)
+
+for i in range(119):
     sim = h5py.File("box_%04d.hdf5"%i, "r")
 
     z[i] = sim["/Cosmology"].attrs["Redshift"][0]
@@ -136,21 +145,45 @@ figure()
 
 # Density evolution --------------------------------
 subplot(231)#, yscale="log")
-semilogx(a, rho_mean, '-', color='r', lw=1)
+semilogx(a, rho_mean / rho_0, '-', color='r', lw=1)
+semilogx([1e-10, 1e10], np.ones(2), '-', color='0.6', lw=1.)
+semilogx([1e-10, 1e10], np.ones(2)*0.99, '--', color='0.6', lw=1.)
+semilogx([1e-10, 1e10], np.ones(2)*1.01, '--', color='0.6', lw=1.)
+text(1e-2, 1.0105, "+1\\%", color='0.6', fontsize=9)
+text(1e-2, 0.9895, "-1\\%", color='0.6', fontsize=9, va="top")
+text(1e-2, 1.015, "$\\rho_0=%.3f$"%rho_0) 
+ylim(0.98, 1.02)
+xlim(8e-3, 1.1)
 xlabel("${\\rm Scale-factor}$", labelpad=0.)
-ylabel("${\\rm Comoving~density}$", labelpad=0.)
+ylabel("${\\rm Comoving~density}~\\rho / \\rho_0$", labelpad=0.)
 
 # Thermal energy evolution --------------------------------
 subplot(232)#, yscale="log")
-semilogx(a, u_mean, '-', color='r', lw=1)
+semilogx(a, u_mean / u_0, '-', color='r', lw=1)
+semilogx([1e-10, 1e10], np.ones(2), '-', color='0.6', lw=1.)
+semilogx([1e-10, 1e10], np.ones(2)*0.99, '--', color='0.6', lw=1.)
+semilogx([1e-10, 1e10], np.ones(2)*1.01, '--', color='0.6', lw=1.)
+text(1e-2, 1.0105, "+1\\%", color='0.6', fontsize=9)
+text(1e-2, 0.9895, "-1\\%", color='0.6', fontsize=9, va="top")
+text(1e-2, 1.015, "$u_0=%.3e$"%(u_0)) 
+ylim(0.98, 1.02)
+xlim(8e-3, 1.1)
 xlabel("${\\rm Scale-factor}$", labelpad=0.)
-ylabel("${\\rm Comoving~internal~energy}$", labelpad=0.)
+ylabel("${\\rm Comoving~internal~energy}~u / u_0$", labelpad=0.)
 
 # Entropy evolution --------------------------------
 subplot(233)#, yscale="log")
-semilogx(a, S_mean, '-', color='r', lw=1)
+semilogx(a, S_mean / S_0, '-', color='r', lw=1)
+semilogx([1e-10, 1e10], np.ones(2), '-', color='0.6', lw=1.)
+semilogx([1e-10, 1e10], np.ones(2)*0.99, '--', color='0.6', lw=1.)
+semilogx([1e-10, 1e10], np.ones(2)*1.01, '--', color='0.6', lw=1.)
+text(1e-2, 1.0105, "+1\\%", color='0.6', fontsize=9)
+text(1e-2, 0.9895, "-1\\%", color='0.6', fontsize=9, va="top")
+text(1e-2, 1.015, "$A_0=%.3e$"%(S_0)) 
+ylim(0.98, 1.02)
+xlim(8e-3, 1.1)
 xlabel("${\\rm Scale-factor}$", labelpad=0.)
-ylabel("${\\rm Comoving~entropy}$", labelpad=0.)
+ylabel("${\\rm Comoving~entropy}~A / A_0$", labelpad=0.)
 
 # Peculiar velocity evolution ---------------------
 subplot(234)
@@ -158,7 +191,7 @@ semilogx(a, vx_mean, '-', color='r', lw=1)
 semilogx(a, vy_mean, '-', color='g', lw=1)
 semilogx(a, vz_mean, '-', color='b', lw=1)
 xlabel("${\\rm Scale-factor}$", labelpad=0.)
-ylabel("${\\rm Peculiar~velocity~mean}$", labelpad=0.)
+ylabel("${\\rm Peculiar~velocity~mean}$", labelpad=-5.)
 
 # Peculiar velocity evolution ---------------------
 subplot(235)
diff --git a/examples/Gravity_glass/README b/examples/Gravity_glass/README
new file mode 100644
index 0000000000000000000000000000000000000000..2df2eb1e72cd2979750f9232936a6e183e8636da
--- /dev/null
+++ b/examples/Gravity_glass/README
@@ -0,0 +1,8 @@
+This example can be used to generate a glass file for gravity calculation.
+The makeIC.py script will generate a uniform Poisson distribution of particles
+in a cubic box with zero initial velocities.
+
+By running the code with the SWIFT configuration option --enable-glass-making
+the code will run with gravity as a repulsive force and the particles will
+moe towards a state of minimal energy. These glass files can be used to then
+start simulations with a minimal level of noise.
diff --git a/examples/UniformDMBox/makeIC.py b/examples/Gravity_glass/makeIC.py
similarity index 77%
rename from examples/UniformDMBox/makeIC.py
rename to examples/Gravity_glass/makeIC.py
index 8f3cd943b3cf19c4ae231d125c5ef97d076e0e8e..1a3fde9e2868c8881923fa61d1c308bca0f2f095 100644
--- a/examples/UniformDMBox/makeIC.py
+++ b/examples/Gravity_glass/makeIC.py
@@ -20,7 +20,7 @@
 
 import h5py
 import sys
-from numpy import *
+import numpy as np
 
 # Generates a swift IC file containing a cartesian distribution of DM particles
 # with a density of 1
@@ -30,7 +30,7 @@ periodic= 1           # 1 For periodic box
 boxSize = 1.
 rho = 1.
 L = int(sys.argv[1])  # Number of particles along one axis
-fileName = "uniformDMBox_%d.hdf5"%L 
+fileName = "uniform_DM_box.hdf5"
 
 #---------------------------------------------------
 numPart = L**3
@@ -69,27 +69,16 @@ grp.attrs["Unit temperature in cgs (U_T)"] = 1.
 #Particle group
 grp = file.create_group("/PartType1")
 
-v  = zeros((numPart, 3))
-ds = grp.create_dataset('Velocities', (numPart, 3), 'f')
-ds[()] = v
-v = zeros(1)
+v  = np.zeros((numPart, 3))
+ds = grp.create_dataset('Velocities', (numPart, 3), 'f', data=v)
 
-m = full((numPart, 1), mass)
-ds = grp.create_dataset('Masses', (numPart,1), 'f')
-ds[()] = m
-m = zeros(1)
+m = np.full((numPart, 1), mass)
+ds = grp.create_dataset('Masses', (numPart,1), 'f', data=m)
 
-ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1))
+ids = np.linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1))
 ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L')
 ds[()] = ids + 1
-x      = ids % L;
-y      = ((ids - x) / L) % L;
-z      = (ids - x - L * y) / L**2;
-coords = zeros((numPart, 3))
-coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
-ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
-ds[()] = coords
+coords = np.random.rand(numPart, 3) * boxSize
+ds = grp.create_dataset('Coordinates', (numPart, 3), 'd', data=coords)
 
 file.close()
diff --git a/examples/Gravity_glass/uniform_DM_box.yml b/examples/Gravity_glass/uniform_DM_box.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8f3ef6f025afb1a92320eeb702b50e8bf4befce6
--- /dev/null
+++ b/examples/Gravity_glass/uniform_DM_box.yml
@@ -0,0 +1,44 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Let's overwrite G to make this more effective
+PhysicalConstants:
+  G:                   1.
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    
+  time_end:   100.  
+  dt_min:     1e-6  
+  dt_max:     1.  
+
+Scheduler:
+  max_top_level_cells: 8
+  
+# Parameters governing the snapshots
+Snapshots:
+  basename:            uniform_DM_box
+  time_first:          0. 
+  delta_time:          1.
+  compression:         4
+
+# Parameters for the self-gravity scheme
+Gravity:
+  eta:                     0.025  
+  theta:                   0.3    
+  mesh_side_length:        32
+  comoving_softening:      0.001
+  max_physical_softening:  0.001
+ 
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          0.1
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./uniform_DM_box.hdf5
diff --git a/examples/Makefile.am b/examples/Makefile.am
index e4a12fc1a90c0f52c767efd378f4bea646c0d425..02664eea177048a67690b9e6af04d10b2e22aa31 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -57,6 +57,7 @@ swift_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS)
 
 # Scripts to generate ICs
 EXTRA_DIST = CoolingBox/coolingBox.yml CoolingBox/energy_plot.py CoolingBox/makeIC.py CoolingBox/run.sh \
+             ConstantCosmoVolume/run.sh ConstantCosmoVolume/makeIC.py ConstantCosmoVolume/plotSolution.py ConstantCosmoVolume/constant_volume.yml \
 	     EAGLE_6/eagle_6.yml EAGLE_6/getIC.sh EAGLE_6/README EAGLE_6/run.sh \
 	     EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \
 	     EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \
@@ -94,7 +95,7 @@ EXTRA_DIST = CoolingBox/coolingBox.yml CoolingBox/energy_plot.py CoolingBox/make
 	     SquareTest_2D/makeIC.py SquareTest_2D/plotSolution.py SquareTest_2D/run.sh SquareTest_2D/square.yml \
 	     UniformBox_2D/makeIC.py UniformBox_2D/run.sh UniformBox_2D/uniformPlane.yml \
 	     UniformBox_3D/makeICbig.py UniformBox_3D/makeIC.py UniformBox_3D/run.sh UniformBox_3D/uniformBox.yml \
-	     UniformDMBox/makeIC.py \
+	     Gravity_glass/makeIC.py Gravity_glass/README Gravity_glass/uniform_DM_box.yml \
              ZeldovichPancake_3D/makeIC.py ZeldovichPancake_3D/zeldovichPancake.yml ZeldovichPancake_3D/run.sh ZeldovichPancake_3D/plotSolution.py
 
 # Default parameter file
diff --git a/examples/UniformDMBox/plot_gravity_checks.py b/examples/UniformDMBox/plot_gravity_checks.py
deleted file mode 100644
index 5efd5847ca9749fffaee48e586c0a1976fbac9d5..0000000000000000000000000000000000000000
--- a/examples/UniformDMBox/plot_gravity_checks.py
+++ /dev/null
@@ -1,219 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import glob
-import re
-import numpy as np
-import matplotlib.pyplot as plt
-
-params = {'axes.labelsize': 14,
-'axes.titlesize': 18,
-'font.size': 12,
-'legend.fontsize': 12,
-'xtick.labelsize': 14,
-'ytick.labelsize': 14,
-'text.usetex': True,
-'figure.figsize': (10, 10),
-'figure.subplot.left'    : 0.06,
-'figure.subplot.right'   : 0.99  ,
-'figure.subplot.bottom'  : 0.06  ,
-'figure.subplot.top'     : 0.985  ,
-'figure.subplot.wspace'  : 0.14  ,
-'figure.subplot.hspace'  : 0.14  ,
-'lines.markersize' : 6,
-'lines.linewidth' : 3.,
-'text.latex.unicode': True
-}
-plt.rcParams.update(params)
-plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']})
-
-min_error = 1e-6
-max_error = 1e-1
-num_bins = 51
-
-# Construct the bins
-bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1)
-bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins
-bins = 0.5*(bin_edges[1:] + bin_edges[:-1])
-bin_edges = 10**bin_edges
-bins = 10**bins
-
-# Colours
-cols = ['b', 'g', 'r', 'm']
-
-# Time-step to plot
-step = int(sys.argv[1])
-
-# Find the files for the different expansion orders
-order_list = glob.glob("gravity_checks_step%d_order*.dat"%step)
-num_order = len(order_list)
-
-# Get the multipole orders
-order = np.zeros(num_order)
-for i in range(num_order):
-    order[i] = int(order_list[i][26])
-    
-# Start the plot
-plt.figure()
-
-# Get the Gadget-2 data if existing
-gadget2_file_list = glob.glob("forcetest_gadget2.txt")
-if len(gadget2_file_list) != 0:
-
-    gadget2_data = np.loadtxt(gadget2_file_list[0])
-    gadget2_ids = gadget2_data[:,0]
-    gadget2_pos = gadget2_data[:,1:4]
-    gadget2_a_exact = gadget2_data[:,4:7]
-    gadget2_a_grav = gadget2_data[:, 7:10]
-
-    # Sort stuff
-    sort_index = np.argsort(gadget2_ids)
-    gadget2_ids = gadget2_ids[sort_index]
-    gadget2_pos = gadget2_pos[sort_index, :]
-    gadget2_a_exact = gadget2_a_exact[sort_index, :]
-    gadget2_a_grav = gadget2_a_grav[sort_index, :]
-    
-    # Compute the error norm
-    diff = gadget2_a_exact - gadget2_a_grav
-
-    norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2)
-    norm_a = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2)
-
-    norm_error = norm_diff / norm_a
-    error_x = abs(diff[:,0]) / norm_a
-    error_y = abs(diff[:,1]) / norm_a
-    error_z = abs(diff[:,2]) / norm_a
-    
-    # Bin the error
-    norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    
-    norm_median = np.median(norm_error)
-    median_x = np.median(error_x)
-    median_y = np.median(error_y)
-    median_z = np.median(error_z)
-
-    norm_per95 = np.percentile(norm_error,95)
-    per95_x = np.percentile(error_x,95)
-    per95_y = np.percentile(error_y,95)
-    per95_z = np.percentile(error_z,95)
-
-    plt.subplot(221)    
-    plt.semilogx(bins, norm_error_hist, 'k--', label="Gadget-2")
-    plt.plot([norm_median, norm_median], [2.7, 3], 'k-', lw=1)
-    plt.plot([norm_per95, norm_per95], [2.7, 3], 'k:', lw=1)
-    plt.subplot(222)
-    plt.semilogx(bins, error_x_hist, 'k--', label="Gadget-2")
-    plt.plot([median_x, median_x], [1.8, 2], 'k-', lw=1)
-    plt.plot([per95_x, per95_x], [1.8, 2], 'k:', lw=1)
-    plt.subplot(223)    
-    plt.semilogx(bins, error_y_hist, 'k--', label="Gadget-2")
-    plt.plot([median_y, median_y], [1.8, 2], 'k-', lw=1)
-    plt.plot([per95_y, per95_y], [1.8, 2], 'k:', lw=1)
-    plt.subplot(224)    
-    plt.semilogx(bins, error_z_hist, 'k--', label="Gadget-2")
-    plt.plot([median_z, median_z], [1.8, 2], 'k-', lw=1)
-    plt.plot([per95_z, per95_z], [1.8, 2], 'k:', lw=1)
-
-
-# Plot the different histograms
-for i in range(num_order-1, -1, -1):
-    data = np.loadtxt(order_list[i])
-    ids = data[:,0]
-    pos = data[:,1:4]
-    a_exact = data[:,4:7]
-    a_grav = data[:, 7:10]
-
-    # Sort stuff
-    sort_index = np.argsort(ids)
-    ids = ids[sort_index]
-    pos = pos[sort_index, :]
-    a_exact = a_exact[sort_index, :]
-    a_grav = a_grav[sort_index, :]
-
-    # Cross-checks
-    if not np.array_equal(ids, gadget2_ids):
-        print "Comparing different IDs !"
-
-    if not np.array_equal(pos, gadget2_pos):
-        print "Comparing different positions ! max difference:", np.max(pos - gadget2_pos)
-
-    if not np.array_equal(a_exact, gadget2_a_exact):
-        print "Comparing different exact accelerations ! max difference:", np.max(a_exact - gadget2_a_exact)
-        
-        
-    # Compute the error norm
-    diff = a_exact - a_grav
-
-    norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2)
-    norm_a = np.sqrt(a_exact[:,0]**2 + a_exact[:,1]**2 + a_exact[:,2]**2)
-
-    norm_error = norm_diff / norm_a
-    error_x = abs(diff[:,0]) / norm_a
-    error_y = abs(diff[:,1]) / norm_a
-    error_z = abs(diff[:,2]) / norm_a
-    
-    # Bin the error
-    norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-    error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size)
-
-    norm_median = np.median(norm_error)
-    median_x = np.median(error_x)
-    median_y = np.median(error_y)
-    median_z = np.median(error_z)
-
-    norm_per95 = np.percentile(norm_error,95)
-    per95_x = np.percentile(error_x,95)
-    per95_y = np.percentile(error_y,95)
-    per95_z = np.percentile(error_z,95)
-    
-    plt.subplot(221)
-    plt.semilogx(bins, norm_error_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i])
-    plt.plot([norm_median, norm_median], [2.7, 3],'-', color=cols[i], lw=1)
-    plt.plot([norm_per95, norm_per95], [2.7, 3],':', color=cols[i], lw=1)
-    plt.subplot(222)    
-    plt.semilogx(bins, error_x_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i])
-    plt.plot([median_x, median_x], [1.8, 2],'-', color=cols[i], lw=1)
-    plt.plot([per95_x, per95_x], [1.8, 2],':', color=cols[i], lw=1)
-    plt.subplot(223)    
-    plt.semilogx(bins, error_y_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i])
-    plt.plot([median_y, median_y], [1.8, 2],'-', color=cols[i], lw=1)
-    plt.plot([per95_y, per95_y], [1.8, 2],':', color=cols[i], lw=1)
-    plt.subplot(224)    
-    plt.semilogx(bins, error_z_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i])
-    plt.plot([median_z, median_z], [1.8, 2],'-', color=cols[i], lw=1)
-    plt.plot([per95_z, per95_z], [1.8, 2],':', color=cols[i], lw=1)
-
-    
-plt.subplot(221)
-plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$")
-plt.ylabel("Density")
-plt.xlim(min_error, 2*max_error)
-plt.ylim(0,3)
-plt.legend(loc="upper left")
-plt.subplot(222)    
-plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$")
-plt.ylabel("Density")
-plt.xlim(min_error, 2*max_error)
-plt.ylim(0,2)
-#plt.legend(loc="center left")
-plt.subplot(223)    
-plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$")
-plt.ylabel("Density")
-plt.xlim(min_error, 2*max_error)
-plt.ylim(0,2)
-#plt.legend(loc="center left")
-plt.subplot(224)    
-plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$")
-plt.ylabel("Density")
-plt.xlim(min_error, 2*max_error)
-plt.ylim(0,2)
-#plt.legend(loc="center left")
-
-
-
-plt.savefig("gravity_checks_step%d.png"%step)
diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml
deleted file mode 100644
index 1abb256671f1cc8c87daa711bd63f7ea6abdbbab..0000000000000000000000000000000000000000
--- a/examples/UniformDMBox/uniformBox.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Define the system of units to use internally. 
-InternalUnitSystem:
-  UnitMass_in_cgs:     1   # Grams
-  UnitLength_in_cgs:   1   # Centimeters
-  UnitVelocity_in_cgs: 1   # Centimeters per second
-  UnitCurrent_in_cgs:  1   # Amperes
-  UnitTemp_in_cgs:     1   # Kelvin
-
-# Parameters governing the time integration
-TimeIntegration:
-  time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   100.    # The end time of the simulation (in internal units).
-  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1.  # The maximal time-step size of the simulation (in internal units).
-
-Scheduler:
-  max_top_level_cells: 8
-  cell_split_size:     50
-  
-# Parameters governing the snapshots
-Snapshots:
-  basename:            uniformDMBox # Common part of the name of output files
-  time_first:          0.           # Time of the first output (in internal units)
-  delta_time:          10.         # Time difference between consecutive outputs (in internal units)
-
-# Parameters for the self-gravity scheme
-Gravity:
-  eta:                   0.025    # Constant dimensionless multiplier for time integration. 
-  theta:                 0.8      # Opening angle (Multipole acceptance criterion)
-  epsilon:               0.01     # Softening length (in internal units).
- 
-# Parameters governing the conserved quantities statistics
-Statistics:
-  delta_time:          5. # Time between statistics output
-
-# Parameters related to the initial conditions
-InitialConditions:
-  file_name:  ./uniformDMBox_16.hdf5     # The file to read
diff --git a/examples/main.c b/examples/main.c
index 89171561695d7b7031904064099ebc036dcde4bc..c0faa6ef84b03921080ace21818018201bba8d24 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -634,7 +634,7 @@ int main(int argc, char *argv[]) {
     /* Initialize unit system and constants */
     units_init_from_params(&us, params, "InternalUnitSystem");
     phys_const_init(&us, params, &prog_const);
-    if (myrank == 0 && verbose > 0) {
+    if (myrank == 0) {
       message("Internal unit system: U_M = %e g.", us.UnitMass_in_cgs);
       message("Internal unit system: U_L = %e cm.", us.UnitLength_in_cgs);
       message("Internal unit system: U_t = %e s.", us.UnitTime_in_cgs);
@@ -653,11 +653,20 @@ int main(int argc, char *argv[]) {
     /* Initialise the hydro properties */
     if (with_hydro)
       hydro_props_init(&hydro_properties, &prog_const, &us, params);
-    if (with_hydro) eos_init(&eos, &prog_const, &us, params);
+    else
+      bzero(&hydro_properties, sizeof(struct hydro_props));
+
+    /* Initialise the equation of state */
+    if (with_hydro)
+      eos_init(&eos, &prog_const, &us, params);
+    else
+      bzero(&eos, sizeof(struct eos_parameters));
 
     /* Initialise the gravity properties */
     if (with_self_gravity)
       gravity_props_init(&gravity_properties, params, &cosmo, with_cosmology);
+    else
+      bzero(&gravity_properties, sizeof(struct gravity_props));
 
     /* Read particles and space information from (GADGET) ICs */
     char ICfileName[200] = "";
@@ -936,6 +945,9 @@ int main(int argc, char *argv[]) {
     engine_dump_snapshot(&e);
     engine_print_stats(&e);
 
+    /* Is there a dump before the end of the first time-step? */
+    engine_check_for_dumps(&e);
+
 #ifdef HAVE_VELOCIRAPTOR
     /* Call VELOCIraptor for the first time after the first snapshot dump. */
     // if (e.policy & engine_policy_structure_finding) {
diff --git a/format.sh b/format.sh
index 9fea13bf363b1513f0e4356a67b2c9d1166771d1..91346334c9b2eaf9fbb343aba44f8a02d866d1ef 100755
--- a/format.sh
+++ b/format.sh
@@ -1,3 +1,80 @@
 #!/bin/bash
 
-clang-format-5.0 -style=file -i src/*.[ch] src/*/*.[ch] src/*/*/*.[ch] examples/main.c tests/*.[ch]
+# Clang format command, can be overridden using CLANG_FORMAT_CMD.
+# We currrently use version 5.0 so any overrides should provide that.
+clang=${CLANG_FORMAT_CMD:="clang-format-5.0"}
+
+# Formatting command
+cmd="$clang -style=file $(git ls-files | grep '\.[ch]$')"
+
+# Test if `clang-format-5.0` works
+command -v $clang > /dev/null
+if [[ $? -ne 0 ]]
+then
+    echo "ERROR: cannot find $clang"
+    exit 1
+fi
+
+# Print the help
+function show_help {
+    echo -e "This script formats SWIFT according to Google style"
+    echo -e "  -h, --help \t Show this help"
+    echo -e "  -t, --test \t Test if SWIFT is well formatted"
+}
+
+# Parse arguments (based on https://stackoverflow.com/questions/192249)
+TEST=0
+while [[ $# -gt 0 ]]
+do
+    key="$1"
+
+    case $key in
+	# print the help and exit
+	-h|--help)
+	    show_help
+	    exit
+	    ;;
+	# check if the code is well formatted
+	-t|--test)
+	    TEST=1
+	    shift
+	    ;;
+	# unknown option
+	*)
+	    echo "Argument '$1' not implemented"
+	    show_help
+	    exit
+	    ;;
+    esac
+done
+
+# Run the required commands
+if [[ $TEST -eq 1 ]]
+then
+    # Note trapping the exit status from both commands in the pipe. Also note
+    # do not use -q in grep as that closes the pipe on first match and we get
+    # a SIGPIPE error.
+    echo "Testing if SWIFT is correctly formatted"
+    $cmd -output-replacements-xml | grep "<replacement " > /dev/null
+    status=("${PIPESTATUS[@]}")
+
+    #  Trap if first command failed. Note 141 is SIGPIPE, that happens when no
+    #  output
+    if [[ ${status[0]} -ne 0 ]]
+    then
+       echo "ERROR: $clang command failed"
+       exit 1
+    fi
+
+    # Check formatting
+    if [[ ${status[1]} -eq 0 ]]
+    then
+ 	echo "ERROR: needs formatting"
+ 	exit 1
+    else
+        echo "...is correctly formatted"
+    fi
+else
+    echo "Formatting SWIFT"
+    $cmd -i
+fi
diff --git a/src/cell.c b/src/cell.c
index 81fe668daa142dfbe078039836c8019ba44db9fb..78b9fdd2a2b6f252264b041b726488e59cc0350b 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -186,7 +186,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
   pc->count = c->count;
   pc->gcount = c->gcount;
   pc->scount = c->scount;
-  c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag;
+
 #ifdef SWIFT_DEBUG_CHECKS
   pc->cellID = c->cellID;
 #endif
@@ -197,8 +197,9 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
     if (c->progeny[k] != NULL) {
       pc->progeny[k] = count;
       count += cell_pack(c->progeny[k], &pc[count]);
-    } else
+    } else {
       pc->progeny[k] = -1;
+    }
 
   /* Return the number of packed cells used. */
   c->pcell_size = count;
@@ -210,6 +211,40 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
 #endif
 }
 
+/**
+ * @brief Pack the tag of the given cell and all it's sub-cells.
+ *
+ * @param c The #cell.
+ * @param tags Pointer to an array of packed tags.
+ *
+ * @return The number of packed tags.
+ */
+int cell_pack_tags(const struct cell *c, int *tags) {
+
+#ifdef WITH_MPI
+
+  /* Start by packing the data of the current cell. */
+  tags[0] = c->tag;
+
+  /* Fill in the progeny, depth-first recursion. */
+  int count = 1;
+  for (int k = 0; k < 8; k++)
+    if (c->progeny[k] != NULL)
+      count += cell_pack_tags(c->progeny[k], &tags[count]);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->pcell_size != count) error("Inconsistent tag and pcell count!");
+#endif  // SWIFT_DEBUG_CHECKS
+
+  /* Return the number of packed tags used. */
+  return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
+}
+
 /**
  * @brief Unpack the data of a given cell and its sub-cells.
  *
@@ -236,7 +271,6 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
   c->count = pc->count;
   c->gcount = pc->gcount;
   c->scount = pc->scount;
-  c->tag = pc->tag;
 #ifdef SWIFT_DEBUG_CHECKS
   c->cellID = pc->cellID;
 #endif
@@ -283,6 +317,43 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
 #endif
 }
 
+/**
+ * @brief Unpack the tags of a given cell and its sub-cells.
+ *
+ * @param tags An array of tags.
+ * @param c The #cell in which to unpack the tags.
+ *
+ * @return The number of tags created.
+ */
+int cell_unpack_tags(const int *tags, struct cell *restrict c) {
+
+#ifdef WITH_MPI
+
+  /* Unpack the current pcell. */
+  c->tag = tags[0];
+
+  /* Number of new cells created. */
+  int count = 1;
+
+  /* Fill the progeny recursively, depth-first. */
+  for (int k = 0; k < 8; k++)
+    if (c->progeny[k] != NULL) {
+      count += cell_unpack_tags(&tags[count], c->progeny[k]);
+    }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->pcell_size != count) error("Inconsistent tag and pcell count!");
+#endif  // SWIFT_DEBUG_CHECKS
+
+  /* Return the total number of unpacked tags. */
+  return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
+}
+
 /**
  * @brief Pack the time information of the given cell and all it's sub-cells.
  *
@@ -2456,6 +2527,11 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) {
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &((struct cell *)map_data)[ind];
 
+    /* All top-level cells get an MPI tag. */
+#ifdef WITH_MPI
+    if (c->tag < 0 && c->sendto) cell_tag(c);
+#endif
+
     /* Super-pointer for hydro */
     if (e->policy & engine_policy_hydro) cell_set_super_hydro(c, NULL);
 
diff --git a/src/cell.h b/src/cell.h
index 6a7662b46d40298e9ce7c84cce15c4cea1bb7ac8..356e03773c7b7018db9a1a746988934158e10016 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -116,9 +116,6 @@ struct pcell {
   /*! Number of #spart in this cell. */
   int scount;
 
-  /*! tag used for MPI communication. */
-  int tag;
-
   /*! Relative indices of the cell's progeny. */
   int progeny[8];
 
@@ -490,6 +487,8 @@ int cell_slocktree(struct cell *c);
 void cell_sunlocktree(struct cell *c);
 int cell_pack(struct cell *c, struct pcell *pc);
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
+int cell_pack_tags(const struct cell *c, int *tags);
+int cell_unpack_tags(const int *tags, struct cell *c);
 int cell_pack_end_step(struct cell *c, struct pcell_step *pcell);
 int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell);
 int cell_pack_multipoles(struct cell *c, struct gravity_tensors *m);
@@ -642,4 +641,21 @@ __attribute__((always_inline)) INLINE static int cell_need_rebuild_for_pair(
           cj->dmin);
 }
 
+/**
+ * @brief Add a unique tag to a cell.
+ */
+__attribute((always_inline)) INLINE static void cell_tag(struct cell *c) {
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->tag > 0) error("setting tag for already tagged cell");
+#endif
+
+  if (c->tag < 0 && (c->tag = atomic_inc(&cell_next_tag)) > cell_max_tag)
+    error("Ran out of cell tags.");
+#else
+  error("SWIFT was not compiled with MPI enabled.");
+#endif  // WITH_MPI
+}
+
 #endif /* SWIFT_CELL_H */
diff --git a/src/cosmology.c b/src/cosmology.c
index e9d35125f65777ffb7f143896029c5720c14cc54..f209997dd3faae3ba884632ffa41cca5d6ae0710 100644
--- a/src/cosmology.c
+++ b/src/cosmology.c
@@ -567,8 +567,10 @@ void cosmology_init_no_cosmo(struct cosmology *c) {
   c->drift_fac_interp_table = NULL;
   c->grav_kick_fac_interp_table = NULL;
   c->hydro_kick_fac_interp_table = NULL;
+  c->hydro_kick_corr_interp_table = NULL;
   c->time_interp_table = NULL;
   c->time_interp_table_offset = 0.;
+  c->scale_factor_interp_table = NULL;
 
   c->time_begin = 0.;
   c->time_end = 0.;
diff --git a/src/engine.c b/src/engine.c
index d34da676e2705962abf4106061ae172b59f0f566..ab8c4dd691ef5bec4658db49573fe5b0273384df 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1324,13 +1324,16 @@ void engine_addtasks_send_hydro(struct engine *e, struct cell *ci,
     /* Create the tasks and their dependencies? */
     if (t_xv == NULL) {
 
-      t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv,
-                               6 * ci->tag + 0, 0, ci, cj);
-      t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho,
-                                6 * ci->tag + 1, 0, ci, cj);
+      /* Create a tag for this cell. */
+      if (ci->tag < 0) cell_tag(ci);
+
+      t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, ci->tag, 0,
+                               ci, cj);
+      t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, ci->tag, 0,
+                                ci, cj);
 #ifdef EXTRA_HYDRO_LOOP
       t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient,
-                                     6 * ci->tag + 3, 0, ci, cj);
+                                     ci->tag, 0, ci, cj);
 #endif
 
 #ifdef EXTRA_HYDRO_LOOP
@@ -1413,8 +1416,11 @@ void engine_addtasks_send_gravity(struct engine *e, struct cell *ci,
     /* Create the tasks and their dependencies? */
     if (t_grav == NULL) {
 
-      t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart,
-                                 6 * ci->tag + 4, 0, ci, cj);
+      /* Create a tag for this cell. */
+      if (ci->tag < 0) cell_tag(ci);
+
+      t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart, ci->tag,
+                                 0, ci, cj);
 
       /* The sends should unlock the down pass. */
       scheduler_addunlock(s, t_grav, ci->super_gravity->grav_down);
@@ -1473,8 +1479,11 @@ void engine_addtasks_send_timestep(struct engine *e, struct cell *ci,
     /* Create the tasks and their dependencies? */
     if (t_ti == NULL) {
 
-      t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
-                               6 * ci->tag + 2, 0, ci, cj);
+      /* Create a tag for this cell. */
+      if (ci->tag < 0) cell_tag(ci);
+
+      t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, ci->tag, 0,
+                               ci, cj);
 
       /* The super-cell's timestep task should unlock the send_ti task. */
       scheduler_addunlock(s, ci->super->timestep, t_ti);
@@ -1514,14 +1523,19 @@ void engine_addtasks_recv_hydro(struct engine *e, struct cell *c,
   /* Have we reached a level where there are any hydro tasks ? */
   if (t_xv == NULL && c->density != NULL) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Make sure this cell has a valid tag. */
+    if (c->tag < 0) error("Trying to receive from untagged cell.");
+#endif  // SWIFT_DEBUG_CHECKS
+
     /* Create the tasks. */
-    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 6 * c->tag + 0,
-                             0, c, NULL);
-    t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho,
-                              6 * c->tag + 1, 0, c, NULL);
+    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, c->tag, 0, c,
+                             NULL);
+    t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, c->tag, 0, c,
+                              NULL);
 #ifdef EXTRA_HYDRO_LOOP
     t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient,
-                                   6 * c->tag + 3, 0, c, NULL);
+                                   c->tag, 0, c, NULL);
 #endif
   }
 
@@ -1575,9 +1589,14 @@ void engine_addtasks_recv_gravity(struct engine *e, struct cell *c,
   /* Have we reached a level where there are any gravity tasks ? */
   if (t_grav == NULL && c->grav != NULL) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Make sure this cell has a valid tag. */
+    if (c->tag < 0) error("Trying to receive from untagged cell.");
+#endif  // SWIFT_DEBUG_CHECKS
+
     /* Create the tasks. */
-    t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart,
-                               6 * c->tag + 4, 0, c, NULL);
+    t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart, c->tag, 0,
+                               c, NULL);
   }
 
   c->recv_grav = t_grav;
@@ -1612,8 +1631,13 @@ void engine_addtasks_recv_timestep(struct engine *e, struct cell *c,
   /* Have we reached a level where there are any self/pair tasks ? */
   if (t_ti == NULL && (c->grav != NULL || c->density != NULL)) {
 
-    t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
-                             6 * c->tag + 2, 0, c, NULL);
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Make sure this cell has a valid tag. */
+    if (c->tag < 0) error("Trying to receive from untagged cell.");
+#endif  // SWIFT_DEBUG_CHECKS
+
+    t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, c->tag, 0, c,
+                             NULL);
   }
 
   c->recv_ti = t_ti;
@@ -1645,80 +1669,11 @@ void engine_exchange_cells(struct engine *e) {
 #ifdef WITH_MPI
 
   struct space *s = e->s;
-  struct cell *cells = s->cells_top;
-  const int nr_cells = s->nr_cells;
   const int nr_proxies = e->nr_proxies;
-  int offset[nr_cells];
-  MPI_Request reqs_in[engine_maxproxies];
-  MPI_Request reqs_out[engine_maxproxies];
-  MPI_Status status;
   const ticks tic = getticks();
 
-  /* Run through the cells and get the size of the ones that will be sent off.
-   */
-  int count_out = 0;
-  for (int k = 0; k < nr_cells; k++) {
-    offset[k] = count_out;
-    if (cells[k].sendto)
-      count_out += (cells[k].pcell_size = cell_getsize(&cells[k]));
-  }
-
-  /* Allocate the pcells. */
-  struct pcell *pcells = NULL;
-  if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
-                     sizeof(struct pcell) * count_out) != 0)
-    error("Failed to allocate pcell buffer.");
-
-  /* Pack the cells. */
-  cell_next_tag = 0;
-  for (int k = 0; k < nr_cells; k++)
-    if (cells[k].sendto) {
-      cell_pack(&cells[k], &pcells[offset[k]]);
-      cells[k].pcell = &pcells[offset[k]];
-    }
-
-  /* Launch the proxies. */
-  for (int k = 0; k < nr_proxies; k++) {
-    proxy_cells_exch1(&e->proxies[k]);
-    reqs_in[k] = e->proxies[k].req_cells_count_in;
-    reqs_out[k] = e->proxies[k].req_cells_count_out;
-  }
-
-  /* Wait for each count to come in and start the recv. */
-  for (int k = 0; k < nr_proxies; k++) {
-    int pid = MPI_UNDEFINED;
-    if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
-        pid == MPI_UNDEFINED)
-      error("MPI_Waitany failed.");
-    // message( "request from proxy %i has arrived." , pid );
-    proxy_cells_exch2(&e->proxies[pid]);
-  }
-
-  /* Wait for all the sends to have finished too. */
-  if (MPI_Waitall(nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
-    error("MPI_Waitall on sends failed.");
-
-  /* Set the requests for the cells. */
-  for (int k = 0; k < nr_proxies; k++) {
-    reqs_in[k] = e->proxies[k].req_cells_in;
-    reqs_out[k] = e->proxies[k].req_cells_out;
-  }
-
-  /* Wait for each pcell array to come in from the proxies. */
-  for (int k = 0; k < nr_proxies; k++) {
-    int pid = MPI_UNDEFINED;
-    if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
-        pid == MPI_UNDEFINED)
-      error("MPI_Waitany failed.");
-    // message( "cell data from proxy %i has arrived." , pid );
-    for (int count = 0, j = 0; j < e->proxies[pid].nr_cells_in; j++)
-      count += cell_unpack(&e->proxies[pid].pcells_in[count],
-                           e->proxies[pid].cells_in[j], e->s);
-  }
-
-  /* Wait for all the sends to have finished too. */
-  if (MPI_Waitall(nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
-    error("MPI_Waitall on sends failed.");
+  /* Exchange the cell structure with neighbouring ranks. */
+  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s);
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
@@ -1778,9 +1733,6 @@ void engine_exchange_cells(struct engine *e) {
   s->nr_gparts_foreign = gparts - s->gparts_foreign;
   s->nr_sparts_foreign = sparts - s->sparts_foreign;
 
-  /* Free the pcell buffer. */
-  free(pcells);
-
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -1904,7 +1856,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   MPI_Request reqs_in[4 * engine_maxproxies];
   MPI_Request reqs_out[4 * engine_maxproxies];
   for (int k = 0; k < e->nr_proxies; k++) {
-    proxy_parts_exch1(&e->proxies[k]);
+    proxy_parts_exchange_first(&e->proxies[k]);
     reqs_in[k] = e->proxies[k].req_parts_count_in;
     reqs_out[k] = e->proxies[k].req_parts_count_out;
   }
@@ -1917,7 +1869,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
         pid == MPI_UNDEFINED)
       error("MPI_Waitany failed.");
     // message( "request from proxy %i has arrived." , pid );
-    proxy_parts_exch2(&e->proxies[pid]);
+    proxy_parts_exchange_second(&e->proxies[pid]);
   }
 
   /* Wait for all the sends to have finished too. */
@@ -3280,6 +3232,12 @@ void engine_maketasks(struct engine *e) {
 
   tic2 = getticks();
 
+  /* Re-set the tag counter. MPI tags are defined for top-level cells in
+   * cell_set_super_mapper. */
+#ifdef WITH_MPI
+  cell_next_tag = 0;
+#endif
+
   /* Now that the self/pair tasks are at the right level, set the super
    * pointers. */
   threadpool_map(&e->threadpool, cell_set_super_mapper, cells, nr_cells,
@@ -3321,33 +3279,16 @@ void engine_maketasks(struct engine *e) {
   /* Add the communication tasks if MPI is being used. */
   if (e->policy & engine_policy_mpi) {
 
-    /* Loop over the proxies. */
+    /* Loop over the proxies and add the send tasks, which also generates the
+     * cell tags for super-cells. */
     for (int pid = 0; pid < e->nr_proxies; pid++) {
 
       /* Get a handle on the proxy. */
       struct proxy *p = &e->proxies[pid];
 
-      for (int k = 0; k < p->nr_cells_in; k++)
-        engine_addtasks_recv_timestep(e, p->cells_in[k], NULL);
-
       for (int k = 0; k < p->nr_cells_out; k++)
         engine_addtasks_send_timestep(e, p->cells_out[k], p->cells_in[0], NULL);
 
-      /* Loop through the proxy's incoming cells and add the
-         recv tasks for the cells in the proxy that have a hydro connection. */
-      if (e->policy & engine_policy_hydro)
-        for (int k = 0; k < p->nr_cells_in; k++)
-          if (p->cells_in_type[k] & proxy_cell_type_hydro)
-            engine_addtasks_recv_hydro(e, p->cells_in[k], NULL, NULL, NULL);
-
-      /* Loop through the proxy's incoming cells and add the
-         recv tasks for the cells in the proxy that have a gravity connection.
-         */
-      if (e->policy & engine_policy_self_gravity)
-        for (int k = 0; k < p->nr_cells_in; k++)
-          if (p->cells_in_type[k] & proxy_cell_type_gravity)
-            engine_addtasks_recv_gravity(e, p->cells_in[k], NULL);
-
       /* Loop through the proxy's outgoing cells and add the
          send tasks for the cells in the proxy that have a hydro connection. */
       if (e->policy & engine_policy_hydro)
@@ -3365,6 +3306,35 @@ void engine_maketasks(struct engine *e) {
             engine_addtasks_send_gravity(e, p->cells_out[k], p->cells_in[0],
                                          NULL);
     }
+
+    /* Exchange the cell tags. */
+    proxy_tags_exchange(e->proxies, e->nr_proxies, s);
+
+    /* Loop over the proxies and add the recv tasks, which relies on having the
+     * cell tags. */
+    for (int pid = 0; pid < e->nr_proxies; pid++) {
+
+      /* Get a handle on the proxy. */
+      struct proxy *p = &e->proxies[pid];
+
+      for (int k = 0; k < p->nr_cells_in; k++)
+        engine_addtasks_recv_timestep(e, p->cells_in[k], NULL);
+
+      /* Loop through the proxy's incoming cells and add the
+         recv tasks for the cells in the proxy that have a hydro connection. */
+      if (e->policy & engine_policy_hydro)
+        for (int k = 0; k < p->nr_cells_in; k++)
+          if (p->cells_in_type[k] & proxy_cell_type_hydro)
+            engine_addtasks_recv_hydro(e, p->cells_in[k], NULL, NULL, NULL);
+
+      /* Loop through the proxy's incoming cells and add the
+         recv tasks for the cells in the proxy that have a gravity connection.
+         */
+      if (e->policy & engine_policy_self_gravity)
+        for (int k = 0; k < p->nr_cells_in; k++)
+          if (p->cells_in_type[k] & proxy_cell_type_gravity)
+            engine_addtasks_recv_gravity(e, p->cells_in[k], NULL);
+    }
   }
 #endif
 
@@ -3981,14 +3951,15 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) {
   engine_exchange_cells(e);
 
   if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e);
-
-  if (e->policy & engine_policy_self_gravity)
-    engine_exchange_proxy_multipoles(e);
 #endif
 
   /* Re-build the tasks. */
   engine_maketasks(e);
 
+#ifdef WITH_MPI
+  if (e->policy & engine_policy_self_gravity) engine_exchange_proxy_multipoles(e);
+#endif
+
   /* Make the list of top-level cells that have tasks */
   space_list_cells_with_tasks(e->s);
 
@@ -4791,7 +4762,7 @@ void engine_step(struct engine *e) {
   if (e->verbose) engine_print_task_counts(e);
 
     /* Dump local cells and active particle counts. */
-    /* dumpCells("cells", 0, 0, 0, 0, e->s, e->nodeID, e->step); */
+    // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that we have the correct total mass in the top-level multipoles */
@@ -4841,6 +4812,30 @@ void engine_step(struct engine *e) {
   /* Create a restart file if needed. */
   engine_dump_restarts(e, 0, e->restart_onexit && engine_is_done(e));
 
+  engine_check_for_dumps(e);
+
+  TIMER_TOC2(timer_step);
+
+  clocks_gettime(&time2);
+  e->wallclock_time = (float)clocks_diff(&time1, &time2);
+
+#ifdef SWIFT_DEBUG_TASKS
+  /* Time in ticks at the end of this step. */
+  e->toc_step = getticks();
+#endif
+}
+
+/**
+ * @brief Check whether any kind of i/o has to be performed during this
+ * step.
+ *
+ * This includes snapshots, stats and halo finder. We also handle the case
+ * of multiple outputs between two steps.
+ *
+ * @param e The #engine.
+ */
+void engine_check_for_dumps(struct engine *e) {
+
   /* Save some statistics ? */
   int save_stats = 0;
   if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0) save_stats = 1;
@@ -4865,52 +4860,85 @@ void engine_step(struct engine *e) {
   timebin_t max_active_bin = e->max_active_bin;
   double time = e->time;
 
-  /* Write some form of output */
-  if (dump_snapshot && save_stats) {
+  while (save_stats || dump_snapshot || run_stf) {
 
-    /* If both, need to figure out which one occurs first */
-    if (e->ti_next_stats == e->ti_next_snapshot) {
+    /* Write some form of output */
+    if (dump_snapshot && save_stats) {
 
-      /* Let's fake that we are at the common dump time */
-      e->ti_current = e->ti_next_snapshot;
-      e->max_active_bin = 0;
-      if (!(e->policy & engine_policy_cosmology))
-        e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
+      /* If both, need to figure out which one occurs first */
+      if (e->ti_next_stats == e->ti_next_snapshot) {
 
-      /* Drift everyone */
-      engine_drift_all(e);
+        /* Let's fake that we are at the common dump time */
+        e->ti_current = e->ti_next_snapshot;
+        e->max_active_bin = 0;
+        if (!(e->policy & engine_policy_cosmology))
+          e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
 
-      /* Dump everything */
-      engine_print_stats(e);
-      engine_dump_snapshot(e);
+        /* Drift everyone */
+        engine_drift_all(e);
 
-    } else if (e->ti_next_stats < e->ti_next_snapshot) {
+        /* Dump everything */
+        engine_print_stats(e);
+        engine_dump_snapshot(e);
 
-      /* Let's fake that we are at the stats dump time */
-      e->ti_current = e->ti_next_stats;
-      e->max_active_bin = 0;
-      if (!(e->policy & engine_policy_cosmology))
-        e->time = e->ti_next_stats * e->time_base + e->time_begin;
+      } else if (e->ti_next_stats < e->ti_next_snapshot) {
 
-      /* Drift everyone */
-      engine_drift_all(e);
+        /* Let's fake that we are at the stats dump time */
+        e->ti_current = e->ti_next_stats;
+        e->max_active_bin = 0;
+        if (!(e->policy & engine_policy_cosmology))
+          e->time = e->ti_next_stats * e->time_base + e->time_begin;
 
-      /* Dump stats */
-      engine_print_stats(e);
+        /* Drift everyone */
+        engine_drift_all(e);
 
-      /* Let's fake that we are at the snapshot dump time */
-      e->ti_current = e->ti_next_snapshot;
-      e->max_active_bin = 0;
-      if (!(e->policy & engine_policy_cosmology))
-        e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
+        /* Dump stats */
+        engine_print_stats(e);
 
-      /* Drift everyone */
-      engine_drift_all(e);
+        /* Let's fake that we are at the snapshot dump time */
+        e->ti_current = e->ti_next_snapshot;
+        e->max_active_bin = 0;
+        if (!(e->policy & engine_policy_cosmology))
+          e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
 
-      /* Dump snapshot */
-      engine_dump_snapshot(e);
+        /* Drift everyone */
+        engine_drift_all(e);
+
+        /* Dump snapshot */
+        engine_dump_snapshot(e);
+
+      } else if (e->ti_next_stats > e->ti_next_snapshot) {
+
+        /* Let's fake that we are at the snapshot dump time */
+        e->ti_current = e->ti_next_snapshot;
+        e->max_active_bin = 0;
+        if (!(e->policy & engine_policy_cosmology))
+          e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
+
+        /* Drift everyone */
+        engine_drift_all(e);
+
+        /* Dump snapshot */
+        engine_dump_snapshot(e);
+
+        /* Let's fake that we are at the stats dump time */
+        e->ti_current = e->ti_next_stats;
+        e->max_active_bin = 0;
+        if (!(e->policy & engine_policy_cosmology))
+          e->time = e->ti_next_stats * e->time_base + e->time_begin;
 
-    } else if (e->ti_next_stats > e->ti_next_snapshot) {
+        /* Drift everyone */
+        engine_drift_all(e);
+
+        /* Dump stats */
+        engine_print_stats(e);
+      }
+
+      /* Let's compute the time of the next outputs */
+      engine_compute_next_snapshot_time(e);
+      engine_compute_next_statistics_time(e);
+
+    } else if (dump_snapshot) {
 
       /* Let's fake that we are at the snapshot dump time */
       e->ti_current = e->ti_next_snapshot;
@@ -4921,9 +4949,14 @@ void engine_step(struct engine *e) {
       /* Drift everyone */
       engine_drift_all(e);
 
-      /* Dump snapshot */
+      /* Dump... */
       engine_dump_snapshot(e);
 
+      /* ... and find the next output time */
+      engine_compute_next_snapshot_time(e);
+
+    } else if (save_stats) {
+
       /* Let's fake that we are at the stats dump time */
       e->ti_current = e->ti_next_stats;
       e->max_active_bin = 0;
@@ -4933,77 +4966,56 @@ void engine_step(struct engine *e) {
       /* Drift everyone */
       engine_drift_all(e);
 
-      /* Dump stats */
+      /* Dump */
       engine_print_stats(e);
-    }
-
-    /* Let's compute the time of the next outputs */
-    engine_compute_next_snapshot_time(e);
-    engine_compute_next_statistics_time(e);
-
-  } else if (dump_snapshot) {
-
-    /* Let's fake that we are at the snapshot dump time */
-    e->ti_current = e->ti_next_snapshot;
-    e->max_active_bin = 0;
-    if (!(e->policy & engine_policy_cosmology))
-      e->time = e->ti_next_snapshot * e->time_base + e->time_begin;
-
-    /* Drift everyone */
-    engine_drift_all(e);
-
-    /* Dump... */
-    engine_dump_snapshot(e);
-
-    /* ... and find the next output time */
-    engine_compute_next_snapshot_time(e);
-  } else if (save_stats) {
-
-    /* Let's fake that we are at the stats dump time */
-    e->ti_current = e->ti_next_stats;
-    e->max_active_bin = 0;
-    if (!(e->policy & engine_policy_cosmology))
-      e->time = e->ti_next_stats * e->time_base + e->time_begin;
-
-    /* Drift everyone */
-    engine_drift_all(e);
 
-    /* Dump */
-    engine_print_stats(e);
-
-    /* and move on */
-    engine_compute_next_statistics_time(e);
-  }
+      /* and move on */
+      engine_compute_next_statistics_time(e);
+    }
 
-  /* Perform structure finding? */
-  if (run_stf) {
+    /* Perform structure finding? */
+    if (run_stf) {
 
-  // MATTHIEU: Add a drift_all here. And check the order with the order i/o
-  // options.
+    // MATTHIEU: Add a drift_all here. And check the order with the order i/o
+    // options.
 
 #ifdef HAVE_VELOCIRAPTOR
-    velociraptor_init(e);
-    velociraptor_invoke(e);
+      velociraptor_init(e);
+      velociraptor_invoke(e);
 
-    /* ... and find the next output time */
-    if (e->stf_output_freq_format == TIME) engine_compute_next_stf_time(e);
+      /* ... and find the next output time */
+      if (e->stf_output_freq_format == TIME) engine_compute_next_stf_time(e);
 #endif
+    }
+
+    /* We need to see whether whether we are in the pathological case
+     * where there can be another dump before the next step. */
+
+    /* Save some statistics ? */
+    save_stats = 0;
+    if (e->ti_end_min > e->ti_next_stats && e->ti_next_stats > 0)
+      save_stats = 1;
+
+    /* Do we want a snapshot? */
+    dump_snapshot = 0;
+    if (e->ti_end_min > e->ti_next_snapshot && e->ti_next_snapshot > 0)
+      dump_snapshot = 1;
+
+    /* Do we want to perform structure finding? */
+    run_stf = 0;
+    if ((e->policy & engine_policy_structure_finding)) {
+      if (e->stf_output_freq_format == STEPS && e->step % e->deltaStepSTF == 0)
+        run_stf = 1;
+      else if (e->stf_output_freq_format == TIME &&
+               e->ti_end_min > e->ti_nextSTF && e->ti_nextSTF > 0)
+        run_stf = 1;
+    }
   }
 
   /* Restore the information we stored */
   e->ti_current = ti_current;
   e->max_active_bin = max_active_bin;
   e->time = time;
-
-  TIMER_TOC2(timer_step);
-
-  clocks_gettime(&time2);
-  e->wallclock_time = (float)clocks_diff(&time1, &time2);
-
-#ifdef SWIFT_DEBUG_TASKS
-  /* Time in ticks at the end of this step. */
-  e->toc_step = getticks();
-#endif
 }
 
 /**
@@ -6280,6 +6292,7 @@ void engine_config(int restart, struct engine *e, struct swift_params *params,
 #ifdef WITH_MPI
   part_create_mpi_types();
   stats_create_MPI_type();
+  task_create_mpi_comms();
 #endif
 
   /* Initialise the collection group. */
diff --git a/src/engine.h b/src/engine.h
index cfd656712bdea84484101cf7e83795f795200f5f..65cb7f22a715d01c745268948c796db3a1f735ab 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -382,6 +382,7 @@ void engine_drift_all(struct engine *e);
 void engine_drift_top_multipoles(struct engine *e);
 void engine_reconstruct_multipoles(struct engine *e);
 void engine_print_stats(struct engine *e);
+void engine_check_for_dumps(struct engine *e);
 void engine_dump_snapshot(struct engine *e);
 void engine_init_output_lists(struct engine *e, struct swift_params *params);
 void engine_init(struct engine *e, struct space *s, struct swift_params *params,
diff --git a/src/hydro_properties.c b/src/hydro_properties.c
index 905bf6973447b1ddd1c174b2e65d6841917ef736..9520781be45f7d9b59534c57e542e0802759aaec 100644
--- a/src/hydro_properties.c
+++ b/src/hydro_properties.c
@@ -93,7 +93,7 @@ void hydro_props_init(struct hydro_props *p,
   p->initial_temperature = parser_get_opt_param_float(
       params, "SPH:initial_temperature", hydro_props_default_init_temp);
 
-  /* Initial temperature */
+  /* Minimal temperature */
   p->minimal_temperature = parser_get_opt_param_float(
       params, "SPH:minimal_temperature", hydro_props_default_min_temp);
 
@@ -106,11 +106,12 @@ void hydro_props_init(struct hydro_props *p,
       params, "SPH:H_mass_fraction", hydro_props_default_H_fraction);
 
   /* Compute the initial energy (Note the temp. read is in internal units) */
+  /* u_init = k_B T_init / (mu m_p (gamma - 1)) */
   double u_init = phys_const->const_boltzmann_k / phys_const->const_proton_mass;
   u_init *= p->initial_temperature;
   u_init *= hydro_one_over_gamma_minus_one;
 
-  /* Correct for hydrogen mass fraction */
+  /* Correct for hydrogen mass fraction (mu) */
   double mean_molecular_weight;
   if (p->initial_temperature *
           units_cgs_conversion_factor(us, UNIT_CONV_TEMPERATURE) >
@@ -122,11 +123,12 @@ void hydro_props_init(struct hydro_props *p,
   p->initial_internal_energy = u_init / mean_molecular_weight;
 
   /* Compute the minimal energy (Note the temp. read is in internal units) */
+  /* u_min = k_B T_min / (mu m_p (gamma - 1)) */
   double u_min = phys_const->const_boltzmann_k / phys_const->const_proton_mass;
   u_min *= p->minimal_temperature;
   u_min *= hydro_one_over_gamma_minus_one;
 
-  /* Correct for hydrogen mass fraction */
+  /* Correct for hydrogen mass fraction (mu) */
   if (p->minimal_temperature *
           units_cgs_conversion_factor(us, UNIT_CONV_TEMPERATURE) >
       1e4)
@@ -135,14 +137,6 @@ void hydro_props_init(struct hydro_props *p,
     mean_molecular_weight = 4. / (1. + 3. * p->hydrogen_mass_fraction);
 
   p->minimal_internal_energy = u_min / mean_molecular_weight;
-
-#ifdef PLANETARY_SPH
-#ifdef PLANETARY_SPH_BALSARA
-  message("Planetary SPH: Balsara switch enabled");
-#else
-  message("Planetary SPH: Balsara switch disabled");
-#endif  // PLANETARY_SPH_BALSARA
-#endif  // PLANETARY_SPH
 }
 
 /**
@@ -184,6 +178,15 @@ void hydro_props_print(const struct hydro_props *p) {
 
   if (p->minimal_temperature != hydro_props_default_min_temp)
     message("Minimal gas temperature set to %f", p->minimal_temperature);
+
+    // Matthieu: Temporary location for this i/o business.
+#ifdef PLANETARY_SPH
+#ifdef PLANETARY_SPH_BALSARA
+  message("Planetary SPH: Balsara switch enabled");
+#else
+  message("Planetary SPH: Balsara switch disabled");
+#endif  // PLANETARY_SPH_BALSARA
+#endif  // PLANETARY_SPH
 }
 
 #if defined(HAVE_HDF5)
diff --git a/src/parallel_io.c b/src/parallel_io.c
index a3a71d0ac5703f194bfa8e53c9b0b62b25e6a602..449756293abc8345a092341217a9e36bca14c1d5 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -31,6 +31,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 
 /* This object's header. */
 #include "parallel_io.h"
@@ -968,6 +969,8 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6],
   io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1);
   io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1);
   io_write_attribute_s(h_grp, "Code", "SWIFT");
+  time_t tm = time(NULL);
+  io_write_attribute_s(h_grp, "Snapshot date", ctime(&tm));
 
   /* GADGET-2 legacy values */
   /* Number of particles of each type */
diff --git a/src/proxy.c b/src/proxy.c
index 9d170a517f6f24c907b10330c6d1e33215bcce1b..61562913780f3b20650e4ab05f0a4583b5f0a115 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -38,14 +38,234 @@
 #include "proxy.h"
 
 /* Local headers. */
+#include "cell.h"
 #include "error.h"
+#include "space.h"
 
 /**
- * @brief Exchange cells with a remote node.
+ * @brief Exchange tags between nodes.
+ *
+ * Note that this function assumes that the cell structures have already
+ * been exchanged, e.g. via #proxy_cells_exchange.
+ *
+ * @param proxies The list of #proxy that will send/recv tags
+ * @param num_proxies The number of proxies.
+ * @param s The space into which the tags will be unpacked.
+ */
+void proxy_tags_exchange(struct proxy *proxies, int num_proxies,
+                         struct space *s) {
+
+#ifdef WITH_MPI
+
+  /* Run through the cells and get the size of the tags that will be sent off.
+   */
+  int count_out = 0;
+  int offset_out[s->nr_cells];
+  for (int k = 0; k < s->nr_cells; k++) {
+    offset_out[k] = count_out;
+    if (s->cells_top[k].sendto) {
+      count_out += s->cells_top[k].pcell_size;
+    }
+  }
+
+  /* Run through the proxies and get the count of incoming tags. */
+  int count_in = 0;
+  int offset_in[s->nr_cells];
+  for (int k = 0; k < num_proxies; k++) {
+    for (int j = 0; j < proxies[k].nr_cells_in; j++) {
+      offset_in[proxies[k].cells_in[j] - s->cells_top] = count_in;
+      count_in += proxies[k].cells_in[j]->pcell_size;
+    }
+  }
+
+  /* Allocate the tags. */
+  int *tags_in = NULL;
+  int *tags_out = NULL;
+  if (posix_memalign((void **)&tags_in, SWIFT_CACHE_ALIGNMENT,
+                     sizeof(int) * count_in) != 0 ||
+      posix_memalign((void **)&tags_out, SWIFT_CACHE_ALIGNMENT,
+                     sizeof(int) * count_out) != 0)
+    error("Failed to allocate tags buffers.");
+
+  /* Pack the local tags. */
+  for (int k = 0; k < s->nr_cells; k++) {
+    if (s->cells_top[k].sendto) {
+      cell_pack_tags(&s->cells_top[k], &tags_out[offset_out[k]]);
+    }
+  }
+
+  /* Allocate the incoming and outgoing request handles. */
+  int num_reqs_out = 0;
+  int num_reqs_in = 0;
+  for (int k = 0; k < num_proxies; k++) {
+    num_reqs_in += proxies[k].nr_cells_in;
+    num_reqs_out += proxies[k].nr_cells_out;
+  }
+  MPI_Request *reqs_in;
+  int *cids_in;
+  if ((reqs_in = (MPI_Request *)malloc(sizeof(MPI_Request) *
+                                       (num_reqs_in + num_reqs_out))) == NULL ||
+      (cids_in = (int *)malloc(sizeof(int) * (num_reqs_in + num_reqs_out))) ==
+          NULL)
+    error("Failed to allocate MPI_Request arrays.");
+  MPI_Request *reqs_out = &reqs_in[num_reqs_in];
+  int *cids_out = &cids_in[num_reqs_in];
+
+  /* Emit the sends and recvs. */
+  for (int send_rid = 0, recv_rid = 0, k = 0; k < num_proxies; k++) {
+    for (int j = 0; j < proxies[k].nr_cells_in; j++) {
+      const int cid = proxies[k].cells_in[j] - s->cells_top;
+      cids_in[recv_rid] = cid;
+      int err = MPI_Irecv(
+          &tags_in[offset_in[cid]], proxies[k].cells_in[j]->pcell_size, MPI_INT,
+          proxies[k].nodeID, cid, MPI_COMM_WORLD, &reqs_in[recv_rid]);
+      if (err != MPI_SUCCESS) mpi_error(err, "Failed to irecv tags.");
+      recv_rid += 1;
+    }
+    for (int j = 0; j < proxies[k].nr_cells_out; j++) {
+      const int cid = proxies[k].cells_out[j] - s->cells_top;
+      cids_out[send_rid] = cid;
+      int err = MPI_Isend(
+          &tags_out[offset_out[cid]], proxies[k].cells_out[j]->pcell_size, MPI_INT,
+          proxies[k].nodeID, cid, MPI_COMM_WORLD, &reqs_out[send_rid]);
+      if (err != MPI_SUCCESS) mpi_error(err, "Failed to isend tags.");
+      send_rid += 1;
+    }
+  }
+
+  /* Wait for each recv and unpack the tags into the local cells. */
+  for (int k = 0; k < num_reqs_in; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_reqs_in, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    const int cid = cids_in[pid];
+    cell_unpack_tags(&tags_in[offset_in[cid]], &s->cells_top[cid]);
+  }
+
+  /* Wait for all the sends to have completed. */
+  if (MPI_Waitall(num_reqs_out, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Clean up. */
+  free(tags_in);
+  free(tags_out);
+  free(reqs_in);
+  free(cids_in);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Exchange the cell structures with all proxies.
+ *
+ * @param proxies The list of #proxy that will send/recv cells.
+ * @param num_proxies The number of proxies.
+ * @param s The space into which the particles will be unpacked.
+ */
+void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
+                          struct space *s) {
+
+#ifdef WITH_MPI
+
+  MPI_Request *reqs;
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) ==
+      NULL)
+    error("Failed to allocate request buffers.");
+  MPI_Request *reqs_in = reqs;
+  MPI_Request *reqs_out = &reqs[num_proxies];
+
+  /* Run through the cells and get the size of the ones that will be sent off.
+   */
+  int count_out = 0;
+  int offset[s->nr_cells];
+  for (int k = 0; k < s->nr_cells; k++) {
+    offset[k] = count_out;
+    if (s->cells_top[k].sendto)
+      count_out +=
+          (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k]));
+  }
+
+  /* Allocate the pcells. */
+  struct pcell *pcells = NULL;
+  if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
+                     sizeof(struct pcell) * count_out) != 0)
+    error("Failed to allocate pcell buffer.");
+
+  /* Pack the cells. */
+  for (int k = 0; k < s->nr_cells; k++)
+    if (s->cells_top[k].sendto) {
+      cell_pack(&s->cells_top[k], &pcells[offset[k]]);
+      s->cells_top[k].pcell = &pcells[offset[k]];
+    }
+
+  /* Launch the first part of the exchange. */
+  for (int k = 0; k < num_proxies; k++) {
+    proxy_cells_exchange_first(&proxies[k]);
+    reqs_in[k] = proxies[k].req_cells_count_in;
+    reqs_out[k] = proxies[k].req_cells_count_out;
+  }
+
+  /* Wait for each count to come in and start the recv. */
+  for (int k = 0; k < num_proxies; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    // message( "request from proxy %i has arrived." , pid );
+    proxy_cells_exchange_second(&proxies[pid]);
+  }
+
+  /* Wait for all the sends to have finished too. */
+  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Set the requests for the cells. */
+  for (int k = 0; k < num_proxies; k++) {
+    reqs_in[k] = proxies[k].req_cells_in;
+    reqs_out[k] = proxies[k].req_cells_out;
+  }
+
+  /* Wait for each pcell array to come in from the proxies. */
+  for (int k = 0; k < num_proxies; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    // message( "cell data from proxy %i has arrived." , pid );
+    for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++)
+      count += cell_unpack(&proxies[pid].pcells_in[count],
+                           proxies[pid].cells_in[j], s);
+  }
+
+  /* Wait for all the sends to have finished too. */
+  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Clean up. */
+  free(reqs);
+  free(pcells);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Exchange cells with a remote node, first part.
+ *
+ * The first part of the transaction sends the local cell count and the packed
+ * #pcell array to the destination node, and enqueues an @c MPI_Irecv for
+ * the foreign cell counts.
  *
  * @param p The #proxy.
  */
-void proxy_cells_exch1(struct proxy *p) {
+void proxy_cells_exchange_first(struct proxy *p) {
 
 #ifdef WITH_MPI
 
@@ -96,7 +316,16 @@ void proxy_cells_exch1(struct proxy *p) {
 #endif
 }
 
-void proxy_cells_exch2(struct proxy *p) {
+/**
+ * @brief Exchange cells with a remote node, second part.
+ *
+ * Once the incomming cell count has been received, allocate a buffer
+ * for the foreign packed #pcell array and emit the @c MPI_Irecv for
+ * it.
+ *
+ * @param p The #proxy.
+ */
+void proxy_cells_exchange_second(struct proxy *p) {
 
 #ifdef WITH_MPI
 
@@ -219,7 +448,7 @@ void proxy_addcell_out(struct proxy *p, struct cell *c, int type) {
  *
  * @param p The #proxy.
  */
-void proxy_parts_exch1(struct proxy *p) {
+void proxy_parts_exchange_first(struct proxy *p) {
 
 #ifdef WITH_MPI
 
@@ -279,7 +508,7 @@ void proxy_parts_exch1(struct proxy *p) {
 #endif
 }
 
-void proxy_parts_exch2(struct proxy *p) {
+void proxy_parts_exchange_second(struct proxy *p) {
 
 #ifdef WITH_MPI
 
diff --git a/src/proxy.h b/src/proxy.h
index b45f6fcca86b0320a49b5c2b879539cbf8c73116..63f51846d0bfb646e1f2f9209413437b4c966983 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -22,6 +22,7 @@
 /* Includes. */
 #include "cell.h"
 #include "part.h"
+#include "space.h"
 
 /* Some constants. */
 #define proxy_buffgrow 1.5
@@ -96,11 +97,15 @@ void proxy_parts_load(struct proxy *p, const struct part *parts,
                       const struct xpart *xparts, int N);
 void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N);
 void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N);
-void proxy_parts_exch1(struct proxy *p);
-void proxy_parts_exch2(struct proxy *p);
+void proxy_parts_exchange_first(struct proxy *p);
+void proxy_parts_exchange_second(struct proxy *p);
 void proxy_addcell_in(struct proxy *p, struct cell *c, int type);
 void proxy_addcell_out(struct proxy *p, struct cell *c, int type);
-void proxy_cells_exch1(struct proxy *p);
-void proxy_cells_exch2(struct proxy *p);
+void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
+                          struct space *s);
+void proxy_cells_exchange_first(struct proxy *p);
+void proxy_cells_exchange_second(struct proxy *p);
+void proxy_tags_exchange(struct proxy *proxies, int num_proxies,
+                         struct space *s);
 
 #endif /* SWIFT_PROXY_H */
diff --git a/src/runner.c b/src/runner.c
index 60c2999b9a3dcc3907eeea17406401f3f56e5f7f..94d36f0f9c0c450c20a961327808cf203f13bafb 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1757,6 +1757,14 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
         /* Finish the force calculation */
         gravity_end_force(gp, G_newton, potential_normalisation, periodic);
 
+#ifdef SWIFT_MAKE_GRAVITY_GLASS
+
+        /* Negate the gravity forces */
+        gp->a_grav[0] *= -1.f;
+        gp->a_grav[1] *= -1.f;
+        gp->a_grav[2] *= -1.f;
+#endif
+
 #ifdef SWIFT_NO_GRAVITY_BELOW_ID
 
         /* Get the ID of the gpart */
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 79c7760b86a7e8cc92bc7aef5d0bab093464033c..942455194da947c3d3ef170f674d9d7b816f9381 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -2297,10 +2297,16 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
     /* Do any of the cells need to be sorted first? */
     if (!(ci->sorted & (1 << sid)) ||
         ci->dx_max_sort_old > ci->dmin * space_maxreldx)
-      error("Interacting unsorted cell.");
+      error(
+          "Interacting unsorted cell. ci->dx_max_sort_old=%e ci->dmin=%e "
+          "ci->sorted=%d sid=%d",
+          ci->dx_max_sort_old, ci->dmin, ci->sorted, sid);
     if (!(cj->sorted & (1 << sid)) ||
         cj->dx_max_sort_old > cj->dmin * space_maxreldx)
-      error("Interacting unsorted cell.");
+      error(
+          "Interacting unsorted cell. cj->dx_max_sort_old=%e cj->dmin=%e "
+          "cj->sorted=%d sid=%d",
+          cj->dx_max_sort_old, cj->dmin, cj->sorted, sid);
 
     /* Compute the interactions. */
     DOPAIR1_BRANCH(r, ci, cj);
@@ -2588,10 +2594,16 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
     /* Do any of the cells need to be sorted first? */
     if (!(ci->sorted & (1 << sid)) ||
         ci->dx_max_sort_old > ci->dmin * space_maxreldx)
-      error("Interacting unsorted cells.");
+      error(
+          "Interacting unsorted cell. ci->dx_max_sort_old=%e ci->dmin=%e "
+          "ci->sorted=%d sid=%d",
+          ci->dx_max_sort_old, ci->dmin, ci->sorted, sid);
     if (!(cj->sorted & (1 << sid)) ||
         cj->dx_max_sort_old > cj->dmin * space_maxreldx)
-      error("Interacting unsorted cells.");
+      error(
+          "Interacting unsorted cell. cj->dx_max_sort_old=%e cj->dmin=%e "
+          "cj->sorted=%d sid=%d",
+          cj->dx_max_sort_old, cj->dmin, cj->sorted, sid);
 
     /* Compute the interactions. */
     DOPAIR2_BRANCH(r, ci, cj);
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 69213095add9b9f4bf4425ddf0cd3f723497deb3..34d3e690dd27f1d55511847b8a921b9c9ba84aac 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -1573,6 +1573,11 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
       /* Need to account for the interactions we missed */
       multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
 #endif
+
+      /* Record that this multipole received a contribution */
+      multi_i->pot.interacted = 1;
+
+      /* We are done here. */
       continue;
     }
 
diff --git a/src/scheduler.c b/src/scheduler.c
index a641df326805c42f6de431fdb6985cfcc6317d10..a90a3f1b0fa2401a6bc128069486c679df535435 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -1550,27 +1550,31 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
                                                 t->ci->pcell_size);
           err = MPI_Irecv(
               t->buff, t->ci->pcell_size * sizeof(struct pcell_step), MPI_BYTE,
-              t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+              t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], &t->req);
         } else if (t->subtype == task_subtype_xv ||
                    t->subtype == task_subtype_rho ||
                    t->subtype == task_subtype_gradient) {
           err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
-                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                          t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype],
+                          &t->req);
           // message( "receiving %i parts with tag=%i from %i to %i." ,
           //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
           // fflush(stdout);
         } else if (t->subtype == task_subtype_gpart) {
           err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
-                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                          t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype],
+                          &t->req);
         } else if (t->subtype == task_subtype_spart) {
           err = MPI_Irecv(t->ci->sparts, t->ci->scount, spart_mpi_type,
-                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                          t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype],
+                          &t->req);
         } else if (t->subtype == task_subtype_multipole) {
           t->buff = (struct gravity_tensors *)malloc(
               sizeof(struct gravity_tensors) * t->ci->pcell_size);
-          err = MPI_Irecv(
-              t->buff, sizeof(struct gravity_tensors) * t->ci->pcell_size,
-              MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          err = MPI_Irecv(t->buff,
+                          sizeof(struct gravity_tensors) * t->ci->pcell_size,
+                          MPI_BYTE, t->ci->nodeID, t->flags,
+                          subtaskMPI_comms[t->subtype], &t->req);
         } else {
           error("Unknown communication sub-type");
         }
@@ -1590,46 +1594,55 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
           cell_pack_end_step(t->ci, (struct pcell_step *)t->buff);
           if ((t->ci->pcell_size * sizeof(struct pcell_step)) >
               s->mpi_message_limit)
-            err = MPI_Isend(
-                t->buff, t->ci->pcell_size * sizeof(struct pcell_step),
-                MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+            err = MPI_Isend(t->buff,
+                            t->ci->pcell_size * sizeof(struct pcell_step),
+                            MPI_BYTE, t->cj->nodeID, t->flags,
+                            subtaskMPI_comms[t->subtype], &t->req);
           else
-            err = MPI_Issend(
-                t->buff, t->ci->pcell_size * sizeof(struct pcell_step),
-                MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+            err = MPI_Issend(t->buff,
+                             t->ci->pcell_size * sizeof(struct pcell_step),
+                             MPI_BYTE, t->cj->nodeID, t->flags,
+                             subtaskMPI_comms[t->subtype], &t->req);
         } else if (t->subtype == task_subtype_xv ||
                    t->subtype == task_subtype_rho ||
                    t->subtype == task_subtype_gradient) {
           if ((t->ci->count * sizeof(struct part)) > s->mpi_message_limit)
             err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
-                            t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                            t->cj->nodeID, t->flags,
+                            subtaskMPI_comms[t->subtype], &t->req);
           else
             err = MPI_Issend(t->ci->parts, t->ci->count, part_mpi_type,
-                             t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                             t->cj->nodeID, t->flags,
+                             subtaskMPI_comms[t->subtype], &t->req);
           // message( "sending %i parts with tag=%i from %i to %i." ,
           //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
           // fflush(stdout);
         } else if (t->subtype == task_subtype_gpart) {
           if ((t->ci->gcount * sizeof(struct gpart)) > s->mpi_message_limit)
             err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
-                            t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                            t->cj->nodeID, t->flags,
+                            subtaskMPI_comms[t->subtype], &t->req);
           else
             err = MPI_Issend(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
-                             t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                             t->cj->nodeID, t->flags,
+                             subtaskMPI_comms[t->subtype], &t->req);
         } else if (t->subtype == task_subtype_spart) {
           if ((t->ci->scount * sizeof(struct spart)) > s->mpi_message_limit)
             err = MPI_Isend(t->ci->sparts, t->ci->scount, spart_mpi_type,
-                            t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                            t->cj->nodeID, t->flags,
+                            subtaskMPI_comms[t->subtype], &t->req);
           else
             err = MPI_Issend(t->ci->sparts, t->ci->scount, spart_mpi_type,
-                             t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+                             t->cj->nodeID, t->flags,
+                             subtaskMPI_comms[t->subtype], &t->req);
         } else if (t->subtype == task_subtype_multipole) {
           t->buff = (struct gravity_tensors *)malloc(
               sizeof(struct gravity_tensors) * t->ci->pcell_size);
           cell_pack_multipoles(t->ci, (struct gravity_tensors *)t->buff);
-          err = MPI_Isend(
-              t->buff, t->ci->pcell_size * sizeof(struct gravity_tensors),
-              MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          err = MPI_Isend(t->buff,
+                          t->ci->pcell_size * sizeof(struct gravity_tensors),
+                          MPI_BYTE, t->cj->nodeID, t->flags,
+                          subtaskMPI_comms[t->subtype], &t->req);
         } else {
           error("Unknown communication sub-type");
         }
diff --git a/src/serial_io.c b/src/serial_io.c
index 3a7d2e5a68873ca9523fe09bbf19fb2e185482dd..fa7fbb220b9b56a3b5ea87660f618dc1a47bb886 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -31,6 +31,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 
 /* This object's header. */
 #include "serial_io.h"
@@ -822,6 +823,8 @@ void write_output_serial(struct engine* e, const char* baseName,
     io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1);
     io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1);
     io_write_attribute_s(h_grp, "Code", "SWIFT");
+    time_t tm = time(NULL);
+    io_write_attribute_s(h_grp, "Snapshot date", ctime(&tm));
 
     /* GADGET-2 legacy values */
     /* Number of particles of each type */
diff --git a/src/single_io.c b/src/single_io.c
index 8cbb6743d38a022581273a0a0b03c9b3b6fda32e..0238c21b10b7f35bd2e2618868ce8126562f736e 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 
 /* This object's header. */
 #include "single_io.h"
@@ -672,6 +673,8 @@ void write_output_single(struct engine* e, const char* baseName,
   io_write_attribute(h_grp, "Redshift", DOUBLE, &e->cosmology->z, 1);
   io_write_attribute(h_grp, "Scale-factor", DOUBLE, &e->cosmology->a, 1);
   io_write_attribute_s(h_grp, "Code", "SWIFT");
+  time_t tm = time(NULL);
+  io_write_attribute_s(h_grp, "Snapshot date", ctime(&tm));
 
   /* GADGET-2 legacy values */
   /* Number of particles of each type */
diff --git a/src/space.c b/src/space.c
index f9f8411b9dd8d7f4ff9541da20626ece6fd97a36..9a9dd05c237a69d034bf618490c834ade5ee33f7 100644
--- a/src/space.c
+++ b/src/space.c
@@ -218,6 +218,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
         c->sort[i] = NULL;
       }
 #if WITH_MPI
+    c->tag = -1;
+
     c->recv_xv = NULL;
     c->recv_rho = NULL;
     c->recv_gradient = NULL;
@@ -431,6 +433,9 @@ void space_regrid(struct space *s, int verbose) {
           c->ti_old_part = ti_current;
           c->ti_old_gpart = ti_current;
           c->ti_old_multipole = ti_current;
+#ifdef WITH_MPI
+          c->tag = -1;
+#endif  // WITH_MPI
           if (s->gravity) c->multipole = &s->multipoles_top[cid];
 #ifdef SWIFT_DEBUG_CHECKS
           c->cellID = -last_cell_id;
@@ -1852,6 +1857,9 @@ void space_split_recursive(struct space *s, struct cell *c,
       cp->do_sub_sort = 0;
       cp->do_grav_sub_drift = 0;
       cp->do_sub_drift = 0;
+#ifdef WITH_MPI
+      cp->tag = -1;
+#endif  // WITH_MPI
 #ifdef SWIFT_DEBUG_CHECKS
       cp->cellID = last_cell_id++;
 #endif
diff --git a/src/task.c b/src/task.c
index 2782dabfc1369dedd43e9b42855a8b43acf2f1b7..10f2ddf5cec885ec23c4f65db5cdea50f0e5097b 100644
--- a/src/task.c
+++ b/src/task.c
@@ -63,6 +63,11 @@ const char *subtaskID_names[task_subtype_count] = {
     "none", "density", "gradient", "force", "grav",      "external_grav",
     "tend", "xv",      "rho",      "gpart", "multipole", "spart"};
 
+#ifdef WITH_MPI
+/* MPI communicators for the subtypes. */
+MPI_Comm subtaskMPI_comms[task_subtype_count];
+#endif
+
 /**
  * @brief Computes the overlap between the parts array of two given cells.
  *
@@ -485,3 +490,14 @@ void task_print(const struct task *t) {
           taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
           t->nr_unlock_tasks, t->skip);
 }
+
+#ifdef WITH_MPI
+/**
+ * @brief Create global communicators for each of the subtasks.
+ */
+void task_create_mpi_comms(void) {
+  for (int i = 0; i < task_subtype_count; i++) {
+    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
+  }
+}
+#endif
diff --git a/src/task.h b/src/task.h
index 072d3979ce04990aaef46c5cc5eb0b8c62fdc860..58ea3a8cbb93b38b47ab7b6a243c3ee6c85d40b7 100644
--- a/src/task.h
+++ b/src/task.h
@@ -110,6 +110,13 @@ extern const char *taskID_names[];
  */
 extern const char *subtaskID_names[];
 
+/**
+ *  @brief The MPI communicators for the different subtypes.
+ */
+#ifdef WITH_MPI
+extern MPI_Comm subtaskMPI_comms[task_subtype_count];
+#endif
+
 /**
  * @brief A task to be run by the #scheduler.
  */
@@ -187,5 +194,7 @@ float task_overlap(const struct task *ta, const struct task *tb);
 int task_lock(struct task *t);
 void task_do_rewait(struct task *t);
 void task_print(const struct task *t);
-
+#ifdef WITH_MPI
+void task_create_mpi_comms(void);
+#endif
 #endif /* SWIFT_TASK_H */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ae234581228b2ea6035af845292e9cc22d6bcaa8..d99b68f224f542dcdc60ae59fc6e2042ae20d9b7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -28,7 +28,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
         testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \
 	testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \
 	testPotentialPair testEOS testUtilities testSelectOutput.sh \
-	testCbrt testCosmology testOutputList
+	testCbrt testCosmology testOutputList testFormat.sh
 
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
diff --git a/tests/testFormat.sh.in b/tests/testFormat.sh.in
new file mode 100644
index 0000000000000000000000000000000000000000..1d0fdeb1334ea7e9ac7b6605c23d7567a2c8c62b
--- /dev/null
+++ b/tests/testFormat.sh.in
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+
+cd @srcdir@/..
+./format.sh --test