diff --git a/.gitignore b/.gitignore
index 1a43373acd789366119becb30662be2855db4a51..db06575cf9f291d1fb9fa253fb5146c065523dd9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ examples/*/*/*.xmf
 examples/*/*/*.hdf5
 examples/*/*/*.txt
 examples/*/*/used_parameters.yml
+examples/*/*.png
 
 tests/testPair
 tests/brute_force_standard.dat
@@ -62,6 +63,7 @@ tests/testFFT
 tests/testInteractions
 tests/testSymmetry
 tests/testMaths
+tests/testThreadpool
 tests/testParser
 tests/parser_output.yml
 tests/test27cells.sh
@@ -71,7 +73,11 @@ tests/testPair.sh
 tests/testPairPerturbed.sh
 tests/testParser.sh
 tests/testReading.sh
-
+tests/testAdiabaticIndex
+tests/testRiemannExact
+tests/testRiemannTRRS
+tests/testRiemannHLLC
+tests/testMatrixInversion
 
 theory/latex/swift.pdf
 theory/kernel/kernels.pdf
diff --git a/AUTHORS b/AUTHORS
index 4d43745609fc9d0ac2f149256a76ed6c581c9144..6f283405b69a7d3a5397916f0a3afa7f4fb54a4a 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,3 +8,6 @@ John A. Regan 		john.a.regan@durham.ac.uk
 Angus Lepper		angus.lepper@ed.ac.uk
 Tom Theuns 		tom.theuns@durham.ac.uk
 Richard G. Bower	r.g.bower@durham.ac.uk
+Stefan Arridge		stefan.arridge@durham.ac.uk
+Massimiliano Culpo	massimiliano.culpo@googlemail.com
+Yves Revaz   		yves.revaz@epfl.ch
diff --git a/README b/README
index cd2a397a18e872e7914b24fd58cc588ec1d6c8c0..562a54f3104884b1bf4c5e607700279161fad4c9 100644
--- a/README
+++ b/README
@@ -19,19 +19,21 @@ Usage: swift [OPTION]... PARAMFILE
 Valid options are:
   -a          Pin runners using processor affinity
   -c          Run with cosmological time integration
+  -C          Run with cooling
   -d          Dry run. Read the parameter file, allocate memory but does not read 
               the particles from ICs and exit before the start of time integration.
               Allows user to check validy of parameter and IC files as well as memory limits.
+  -D          Always drift all particles even the ones far from active particles.
   -e          Enable floating-point exceptions (debugging mode)
   -f    {int} Overwrite the CPU frequency (Hz) to be used for time measurements
   -g          Run with an external gravitational potential
   -G          Run with self-gravity
-  -n    {int} Execute a fixed number of time steps. When unset use the time_end
-              parameter to stop. 
+  -n    {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. 
   -s          Run with SPH
   -t    {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified.
-  -v     [12] Increase the level of verbosity 1: MPI-rank 0 writes 
-              2: All MPI-ranks write
+  -v     [12] Increase the level of verbosity
+  	      1: MPI-rank 0 writes
+	      2: All MPI-ranks write
   -y    {int} Time-step frequency at which task graphs are dumped
   -h          Print this help message and exit
 
diff --git a/configure.ac b/configure.ac
index a798372c2f219a8da71f2b79321c140aba23e790..82382447fde7c411f61dbd62f7db388a6a8d9cf9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -16,7 +16,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Init the project.
-AC_INIT([SWIFT],[0.3.0])
+AC_INIT([SWIFT],[0.4.0])
 AC_CONFIG_SRCDIR([src/space.c])
 AC_CONFIG_AUX_DIR([.])
 AM_INIT_AUTOMAKE
@@ -466,7 +466,7 @@ if test "$enable_warn" != "no"; then
     # We will do this by hand instead and only default to the macro for unknown compilers
     case "$ax_cv_c_compiler_vendor" in
           gnu | clang)
-             CFLAGS="$CFLAGS -Wall"
+             CFLAGS="$CFLAGS -Wall -Wextra -Wno-unused-parameter"
           ;;
 	  intel)
              CFLAGS="$CFLAGS -w2 -Wunused-variable"
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 4703c7091550c8c496952d9e96b623e180c78a69..2a5aeba7d1db0b1e1e56a9a6eed3059aba6a09ff 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -760,8 +760,11 @@ WARN_LOGFILE           =
 # Note: If this tag is empty the current directory is searched.
 
 INPUT                  =  @top_srcdir@ @top_srcdir@/src @top_srcdir@/tests @top_srcdir@/examples
-INPUT		       += @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default
-INPUT		       += @top_srcdir@/src/riemann 
+INPUT		       += @top_srcdir@/src/hydro/Minimal
+INPUT		       += @top_srcdir@/src/gravity/Default
+INPUT		       += @top_srcdir@/src/riemann
+INPUT		       += @top_srcdir@/src/potential/point_mass
+INPUT		       += @top_srcdir@/src/cooling/const_du
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/examples/BigCosmoVolume/makeIC.py b/examples/BigCosmoVolume/makeIC.py
index 411ac54b41fadc4209b314b5b9976e5ac95d8000..c141337c06fb28aa4049e2823fcc7cd3e9d5513c 100644
--- a/examples/BigCosmoVolume/makeIC.py
+++ b/examples/BigCosmoVolume/makeIC.py
@@ -133,6 +133,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/CoolingBox/coolingBox.yml b/examples/CoolingBox/coolingBox.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e13de6095066836853d9e9068330938f6260f38e
--- /dev/null
+++ b/examples/CoolingBox/coolingBox.yml
@@ -0,0 +1,44 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     2.0e33   # Solar masses
+  UnitLength_in_cgs:   3.01e21   # Kilparsecs
+  UnitVelocity_in_cgs: 1.0e5   # Time unit is cooling time
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1.0    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            coolingBox # Common part of the name of output files
+  time_first:          0.         # Time of the first output (in internal units)
+  delta_time:          1.0e-1       # Time difference between consecutive outputs (in internal units)
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1e-2 # Time between statistics output
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  max_smoothing_length:  0.1      # Maximal smoothing length allowed (in internal units).
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./coolingBox.hdf5     # The file to read
+
+# Dimensionless pre-factor for the time-step condition
+LambdaCooling:
+  lambda:                      0.0    # Cooling rate (in cgs units)
+  minimum_temperature:         1.0e4  # Minimal temperature (Kelvin)
+  mean_molecular_weight:       0.59   # Mean molecular weight
+  hydrogen_mass_abundance:     0.75   # Hydrogen mass abundance (dimensionless)
+  cooling_tstep_mult:          1.0    # Dimensionless pre-factor for the time-step condition
+  
diff --git a/examples/CoolingBox/energy_plot.py b/examples/CoolingBox/energy_plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..28cf9ab64decb5b56e98118a407221fac2bd4f16
--- /dev/null
+++ b/examples/CoolingBox/energy_plot.py
@@ -0,0 +1,84 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py as h5
+import sys
+
+stats_filename = "./energy.txt"
+snap_filename = "coolingBox_000.hdf5"
+#plot_dir = "./"
+
+#some constants in cgs units
+k_b = 1.38E-16 #boltzmann
+m_p = 1.67e-24 #proton mass
+#initial conditions set in makeIC.py
+rho = 3.2e3
+P = 4.5e6
+n_H_cgs = 0.0001
+gamma = 5./3.
+T_init = 1.0e5
+
+#Read the units parameters from the snapshot
+f = h5.File(snap_filename,'r')
+units = f["InternalCodeUnits"]
+unit_mass = units.attrs["Unit mass in cgs (U_M)"]
+unit_length = units.attrs["Unit length in cgs (U_L)"]
+unit_time = units.attrs["Unit time in cgs (U_t)"]
+parameters = f["Parameters"]
+cooling_lambda = float(parameters.attrs["LambdaCooling:lambda"])
+min_T = float(parameters.attrs["LambdaCooling:minimum_temperature"])
+mu = float(parameters.attrs["LambdaCooling:mean_molecular_weight"])
+X_H = float(parameters.attrs["LambdaCooling:hydrogen_mass_abundance"])
+
+#get number of particles
+header = f["Header"]
+n_particles = header.attrs["NumPart_ThisFile"][0]
+#read energy and time arrays
+array = np.genfromtxt(stats_filename,skip_header = 1)
+time = array[:,0]
+total_energy = array[:,2]
+total_mass = array[:,1]
+
+time = time[1:]
+total_energy = total_energy[1:]
+total_mass = total_mass[1:]
+
+#conversions to cgs
+rho_cgs = rho * unit_mass / (unit_length)**3
+time_cgs = time * unit_time
+u_init_cgs = total_energy[0]/(total_mass[0]) * unit_length**2 / (unit_time)**2 
+
+#find the energy floor
+print min_T
+u_floor_cgs = k_b * min_T / (mu * m_p * (gamma - 1.))
+#find analytic solution
+analytic_time = np.linspace(time_cgs[0],time_cgs[-1],1000)
+print time_cgs[1]
+print analytic_time[1]
+du_dt_cgs = -cooling_lambda * n_H_cgs**2 / rho_cgs
+u_analytic = du_dt_cgs*(analytic_time - analytic_time[0]) + u_init_cgs
+cooling_time = u_init_cgs/(-du_dt_cgs)
+#rescale energy to initial energy
+total_energy /= total_energy[0]
+u_analytic /= u_init_cgs
+u_floor_cgs /= u_init_cgs
+# plot_title = r"$\Lambda \, = \, %1.1g \mathrm{erg}\mathrm{cm^3}\mathrm{s^{-1}} \, \, T_{init} = %1.1g\mathrm{K} \, \, T_{floor} = %1.1g\mathrm{K} \, \, n_H = %1.1g\mathrm{cm^{-3}}$" %(cooling_lambda,T_init,T_floor,n_H)
+# plot_filename = "energy_plot_creasey_no_cooling_T_init_1p0e5_n_H_0p1.png"
+#analytic_solution = np.zeros(n_snaps-1)
+for i in range(u_analytic.size):
+    if u_analytic[i]<u_floor_cgs:
+        u_analytic[i] = u_floor_cgs
+plt.plot(time_cgs,total_energy,'k',label = "Numerical solution")
+plt.plot(analytic_time,u_analytic,'--r',lw = 2.0,label = "Analytic Solution")
+plt.plot((cooling_time,cooling_time),(0,1),'b',label = "Cooling time")
+plt.plot((time_cgs[0],time_cgs[0]),(0,1),'m',label = "First output")
+plt.title(r"$n_H = %1.1e \, \mathrm{cm}^{-3}$" %n_H_cgs)
+plt.xlabel("Time (seconds)")
+plt.ylabel("Energy/Initial energy")
+plt.ylim(0.999,1.001)
+plt.xlim(0,min(10.0*cooling_time,time_cgs[-1]))
+plt.legend(loc = "upper right")    
+if (int(sys.argv[1])==0):
+    plt.show()
+else:
+    plt.savefig(full_plot_filename,format = "png")
+    plt.close()
diff --git a/examples/CoolingBox/makeIC.py b/examples/CoolingBox/makeIC.py
new file mode 100644
index 0000000000000000000000000000000000000000..f35c9243d4fa71f872fd27520de14a23073c4b9d
--- /dev/null
+++ b/examples/CoolingBox/makeIC.py
@@ -0,0 +1,109 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ #                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
+import h5py
+import sys
+from numpy import *
+
+# Generates a swift IC file containing a cartesian distribution of particles
+# at a constant density and pressure in a cubic box
+
+# Parameters
+periodic= 1           # 1 For periodic box
+boxSize = 1           #1 kiloparsec    
+L = int(sys.argv[1])  # Number of particles along one axis
+rho = 3.2e3          # Density in code units (0.01 hydrogen atoms per cm^3)
+P = 4.5e6          # Pressure in code units (at 10^5K)
+gamma = 5./3.         # Gas adiabatic index
+eta = 1.2349          # 48 ngbs with cubic spline kernel
+fileName = "coolingBox.hdf5" 
+
+#---------------------------------------------------
+numPart = L**3
+mass = boxSize**3 * rho / numPart
+print mass
+internalEnergy = P / ((gamma - 1.)*rho)
+
+#--------------------------------------------------
+
+#File
+file = h5py.File(fileName, 'w')
+
+# Header
+grp = file.create_group("/Header")
+grp.attrs["BoxSize"] = boxSize
+grp.attrs["NumPart_Total"] =  [numPart, 0, 0, 0, 0, 0]
+grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0]
+grp.attrs["Time"] = 0.0
+grp.attrs["NumFilesPerSnapshot"] = 1
+grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+grp.attrs["Flag_Entropy_ICs"] = 0
+
+#Runtime parameters
+grp = file.create_group("/RuntimePars")
+grp.attrs["PeriodicBoundariesOn"] = periodic
+
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 3.08e21 
+grp.attrs["Unit mass in cgs (U_M)"] = 2.0e33 
+grp.attrs["Unit time in cgs (U_t)"] = 3.08e16 
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
+#Particle group
+grp = file.create_group("/PartType0")
+
+v  = zeros((numPart, 3))
+ds = grp.create_dataset('Velocities', (numPart, 3), 'f')
+ds[()] = v
+v = zeros(1)
+
+m = full((numPart, 1), mass)
+ds = grp.create_dataset('Masses', (numPart,1), 'f')
+ds[()] = m
+m = zeros(1)
+
+h = full((numPart, 1), eta * boxSize / L)
+ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f')
+ds[()] = h
+h = zeros(1)
+
+u = full((numPart, 1), internalEnergy)
+ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f')
+ds[()] = u
+u = zeros(1)
+
+
+ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1))
+ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L')
+ds[()] = ids + 1
+x      = ids % L;
+y      = ((ids - x) / L) % L;
+z      = (ids - x - L * y) / L**2;
+coords = zeros((numPart, 3))
+coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
+coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
+coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
+ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
+ds[()] = coords
+
+file.close()
diff --git a/examples/CoolingBox/run.sh b/examples/CoolingBox/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c78eec9da6c486bc31a60ab7a8521ce6a6a63165
--- /dev/null
+++ b/examples/CoolingBox/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Generate the initial conditions if they are not present.
+echo "Generating initial conditions for the cooling box example..."
+
+python makeIC.py 10
+
+../swift -s -t 1 coolingBox.yml -C 2>&1 | tee output.log
+
+python energy_plot.py 0
diff --git a/examples/CosmoVolume/cosmoVolume.yml b/examples/CosmoVolume/cosmoVolume.yml
index 548b85078952954f2bf97280be6feb25eb6ef444..13cea318144d296183d630a53d78c69d050c1abe 100644
--- a/examples/CosmoVolume/cosmoVolume.yml
+++ b/examples/CosmoVolume/cosmoVolume.yml
@@ -6,11 +6,6 @@ InternalUnitSystem:
   UnitCurrent_in_cgs:  1   # Amperes
   UnitTemp_in_cgs:     1   # Kelvin
 
-# Parameters for the task scheduling
-Scheduler:
-  cell_sub_size:    6000     # Value used for the original scaling tests
-  cell_split_size:  300      # Value used for the original scaling tests
-
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
diff --git a/examples/DiscPatch/GravityOnly/README b/examples/DiscPatch/GravityOnly/README
new file mode 100644
index 0000000000000000000000000000000000000000..5bf2638fc5ed48ebe248773223dde888af0c3bc8
--- /dev/null
+++ b/examples/DiscPatch/GravityOnly/README
@@ -0,0 +1,10 @@
+Setup for a potential of a patch disk, see Creasey, Theuns &
+Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948
+
+The density is given by
+rho(z) = (Sigma/2b) / cosh^2(z/b)
+where Sigma is the surface density, and b the scale height.
+
+The corresponding force is
+dphi/dz = 2 pi G Sigma tanh(z/b),
+which satifies d^2phi/dz^2 = 4 pi G rho.
diff --git a/examples/DiscPatch/GravityOnly/disc-patch.yml b/examples/DiscPatch/GravityOnly/disc-patch.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c76e4f612250d180f2ba2fccd0c6209878173433
--- /dev/null
+++ b/examples/DiscPatch/GravityOnly/disc-patch.yml
@@ -0,0 +1,43 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.9885e33     # Grams
+  UnitLength_in_cgs:   3.0856776e18  # Centimeters
+  UnitVelocity_in_cgs: 1e5           # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   480.  # The end time of the simulation (in internal units).
+  dt_min:     1e-3  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1     # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1.0 # Time between statistics output
+  
+# Parameters governing the snapshots
+Snapshots:
+  basename:            Disc-Patch # Common part of the name of output files
+  time_first:          0.         # Time of the first output (in internal units)
+  delta_time:          8.         # Time difference between consecutive outputs (in internal units)
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  40.      # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  Disc-Patch.hdf5       # The file to read
+
+# External potential parameters
+DiscPatchPotential:
+  surface_density: 10.
+  scale_height:    100.
+  z_disc:          300.
+  timestep_mult:   0.03
diff --git a/examples/DiscPatch/GravityOnly/makeIC.py b/examples/DiscPatch/GravityOnly/makeIC.py
new file mode 100644
index 0000000000000000000000000000000000000000..42cd26e235deb17a899a65050ef5caa9c832c59c
--- /dev/null
+++ b/examples/DiscPatch/GravityOnly/makeIC.py
@@ -0,0 +1,161 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 John A. Regan (john.a.regan@durham.ac.uk)
+ #                    Tom Theuns (tom.theuns@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
+import h5py
+import sys
+import numpy
+import math
+import random
+
+# Generates N particles in a box of [0:BoxSize,0:BoxSize,-2scale_height:2scale_height]
+# see Creasey, Theuns & Bower, 2013, for the equations:
+# disc parameters are: surface density sigma
+#                      scale height b
+# density: rho(z) = (sigma/2b) sech^2(z/b)
+# isothermal velocity dispersion = <v_z^2? = b pi G sigma
+# grad potential  = 2 pi G sigma tanh(z/b)
+# potential       = ln(cosh(z/b)) + const
+# Dynamical time  = sqrt(b / (G sigma))
+# to obtain the 1/ch^2(z/b) profile from a uniform profile (a glass, say, or a uniform random variable), note that, when integrating in z
+# \int 0^z dz/ch^2(z) = tanh(z)-tanh(0) = \int_0^x dx = x (where the last integral refers to a uniform density distribution), so that z = atanh(x)
+# usage: python makeIC.py 1000 
+
+# physical constants in cgs
+NEWTON_GRAVITY_CGS  = 6.672e-8
+SOLAR_MASS_IN_CGS   = 1.9885e33
+PARSEC_IN_CGS       = 3.0856776e18
+PROTON_MASS_IN_CGS  = 1.6726231e24
+YEAR_IN_CGS         = 3.154e+7
+
+# choice of units
+const_unit_length_in_cgs   =   (PARSEC_IN_CGS)
+const_unit_mass_in_cgs     =   (SOLAR_MASS_IN_CGS)
+const_unit_velocity_in_cgs =   (1e5)
+
+print "UnitMass_in_cgs:     ", const_unit_mass_in_cgs 
+print "UnitLength_in_cgs:   ", const_unit_length_in_cgs
+print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs
+
+
+# parameters of potential
+surface_density = 10.
+scale_height    = 100.
+
+# derived units
+const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs)
+const_G                = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs)))
+print 'G=', const_G
+v_disp                 = numpy.sqrt(scale_height * math.pi * const_G * surface_density)
+t_dyn                  = numpy.sqrt(scale_height / (const_G * surface_density))
+print 'dynamical time = ',t_dyn
+print ' velocity dispersion = ',v_disp
+
+# Parameters
+periodic= 1             # 1 For periodic box
+boxSize = 600.          #  
+Radius  = 100.          # maximum radius of particles [kpc]
+G       = const_G 
+
+N       = int(sys.argv[1])  # Number of particles
+
+# these are not used but necessary for I/O
+rho = 2.              # Density
+P = 1.                # Pressure
+gamma = 5./3.         # Gas adiabatic index
+fileName = "Disc-Patch.hdf5" 
+
+
+#---------------------------------------------------
+numPart        = N
+mass           = 1
+internalEnergy = P / ((gamma - 1.)*rho)
+
+#--------------------------------------------------
+
+#File
+file = h5py.File(fileName, 'w')
+
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = const_unit_length_in_cgs
+grp.attrs["Unit mass in cgs (U_M)"] = const_unit_mass_in_cgs 
+grp.attrs["Unit time in cgs (U_t)"] = const_unit_length_in_cgs / const_unit_velocity_in_cgs
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
+# Header
+grp = file.create_group("/Header")
+grp.attrs["BoxSize"] = boxSize
+grp.attrs["NumPart_Total"] =  [0, numPart, 0, 0, 0, 0]
+grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["NumPart_ThisFile"] = [0, numPart, 0, 0, 0, 0]
+grp.attrs["Time"] = 0.0
+grp.attrs["NumFilesPerSnapshot"] = 1
+grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["Dimension"] = 3
+
+#Runtime parameters
+grp = file.create_group("/RuntimePars")
+grp.attrs["PeriodicBoundariesOn"] = periodic
+
+# set seed for random number
+numpy.random.seed(1234)
+
+#Particle group
+#grp0 = file.create_group("/PartType0")
+grp1 = file.create_group("/PartType1")
+
+#generate particle positions
+r      = numpy.zeros((numPart, 3))
+r[:,0] = numpy.random.rand(N) * boxSize
+r[:,1] = numpy.random.rand(N) * boxSize
+z      = scale_height * numpy.arctanh(numpy.random.rand(2*N))
+gd     = z < boxSize / 2
+r[:,2] = z[gd][0:N]
+random = numpy.random.rand(N) > 0.5
+r[random,2] *= -1
+r[:,2] += 0.5 * boxSize
+
+#generate particle velocities
+v      = numpy.zeros((numPart, 3))
+v      = numpy.zeros(1)
+#v[:,2] = 
+
+
+ds = grp1.create_dataset('Velocities', (numPart, 3), 'f')
+ds[()] = v
+
+
+m = numpy.ones((numPart, ), dtype=numpy.float32) * mass
+ds = grp1.create_dataset('Masses', (numPart,), 'f')
+ds[()] = m
+m = numpy.zeros(1)
+
+
+ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False, dtype='L')
+ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L')
+ds[()] = ids
+
+ds = grp1.create_dataset('Coordinates', (numPart, 3), 'd')
+ds[()] = r
+
+
+file.close()
diff --git a/examples/DiscPatch/GravityOnly/run.sh b/examples/DiscPatch/GravityOnly/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9af1011ee653253f0d1b2cd26db0ac13cf11adc0
--- /dev/null
+++ b/examples/DiscPatch/GravityOnly/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Generate the initial conditions if they are not present.
+if [ ! -e Disc-Patch.hdf5 ]
+then
+    echo "Generating initial conditions for the disc-patch example..."
+    python makeIC.py 1000
+fi
+
+../../swift -g -t 2 disc-patch.yml
diff --git a/examples/DiscPatch/GravityOnly/test.pro b/examples/DiscPatch/GravityOnly/test.pro
new file mode 100644
index 0000000000000000000000000000000000000000..04e0afdf7e6d2b4f0122a3d7d1bd1084539c405e
--- /dev/null
+++ b/examples/DiscPatch/GravityOnly/test.pro
@@ -0,0 +1,158 @@
+;
+;  test energy / angular momentum conservation of test problem
+;
+
+iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy
+
+; set physical constants
+@physunits
+
+indir    = './'
+basefile = 'Disc-Patch_'
+
+; set properties of potential
+uL   = phys.pc                  ; unit of length
+uM   = phys.msun                ; unit of mass
+uV   = 1d5                      ; unit of velocity
+
+; properties of patch
+surface_density = 10.
+scale_height    = 100.
+
+; derived units
+constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
+pcentre  = [0.,0.,300.] * pc / uL
+
+;
+infile = indir + basefile + '*'
+spawn,'ls -1 '+infile,res
+nfiles = n_elements(res)
+
+
+; choose: calculate change of energy and Lz, comparing first and last
+; snapshots for all particles, or do so for a subset
+
+; compare all
+ifile   = 0
+inf     = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
+id      = h5rd(inf,'PartType1/ParticleIDs')
+nfollow = n_elements(id)
+
+; follow a subset
+nfollow  = 500                    ; number of particles to follow
+
+;
+if (iplot eq 1) then begin
+   nskip = 1
+   nsave = nfiles
+endif else begin
+   nskip = nfiles - 2
+   nsave = 2
+endelse
+
+;
+lout     = fltarr(nfollow, nsave) ; Lz
+xout     = fltarr(nfollow, nsave) ; x
+yout     = fltarr(nfollow, nsave) ; y
+zout     = fltarr(nfollow, nsave) ; z
+eout     = fltarr(nfollow, nsave) ; energies
+ekin     = fltarr(nfollow, nsave)
+epot     = fltarr(nfollow, nsave) ; 2 pi G Sigma b ln(cosh(z/b)) + const
+tout     = fltarr(nsave)
+
+
+
+ifile  = 0
+isave = 0
+for ifile=0,nfiles-1,nskip do begin
+   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
+   time   = h5ra(inf, 'Header','Time')
+   p      = h5rd(inf,'PartType1/Coordinates')
+   v      = h5rd(inf,'PartType1/Velocities')
+   id     = h5rd(inf,'PartType1/ParticleIDs')
+   indx   = sort(id)
+
+;; ;  if you want to sort particles by ID
+;;    id     = id[indx]
+;;    for ic=0,2 do begin
+;;       tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
+;;       tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
+;;    endfor
+   
+
+; calculate energy
+   dd  = size(p,/dimen) & npart = dd[1]
+   ener = fltarr(npart)
+   dr   = fltarr(npart) & dv = dr
+   for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2
+   for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2
+   xout[*,isave] = p[0,0:nfollow-1]-pcentre[0]
+   yout[*,isave] = p[1,0:nfollow-1]-pcentre[1]
+   zout[*,isave] = p[2,0:nfollow-1]-pcentre[2]
+   Lz  = (p[0,*]-pcentre[0]) * v[1,*] - (p[1,*]-pcentre[1]) * v[0,*]
+   dz  = reform(p[2,0:nfollow-1]-pcentre[2])
+;   print,'time = ',time,p[0,0],v[0,0],id[0]
+   ek   = 0.5 * dv
+   ep   = fltarr(nfollow)
+   ep   = 2 * !pi * constG * surface_density * scale_height * alog(cosh(abs(dz)/scale_height))
+   ener = ek + ep
+   tout(isave) = time
+   lout[*,isave] = lz[0:nfollow-1]
+   eout(*,isave) = ener[0:nfollow-1]
+   ekin(*,isave) = ek[0:nfollow-1]
+   epot(*,isave) = ep[0:nfollow-1]
+   print,format='('' time= '',f7.1,'' E= '',f9.2,'' Lz= '',e9.2)', time,eout[0],lz[0]
+   isave = isave + 1
+   
+endfor
+
+x0 = reform(xout[0,*])
+y0 = reform(xout[1,*])
+z0 = reform(xout[2,*])
+
+; calculate relative energy change
+de    = 0.0 * eout
+dl    = 0.0 * lout
+nsave = isave
+for ifile=1, nsave-1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0]
+for ifile=1, nsave-1 do dl[*,ifile] = (lout[*,ifile] - lout[*,0])/lout[*,0]
+
+
+; calculate statistics of energy changes
+print,' relatve energy change: (per cent) ',minmax(de) * 100.
+print,' relative Lz    change: (per cent) ',minmax(dl) * 100.
+
+; plot enery and Lz conservation for some particles
+if(iplot eq 1) then begin
+; plot results on energy conservation for some particles
+   nplot = min(10, nfollow)
+   win,0
+   xr = [min(tout), max(tout)]
+   yr = [-2,2]*1d-2             ; in percent
+   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='time',ytitle='dE/E, dL/L (%)'
+   for i=0,nplot-1 do oplot,tout,de[i,*]
+   for i=0,nplot-1 do oplot,tout,dl[i,*],color=red
+   legend,['dE/E','dL/L'],linestyle=[0,0],color=[black,red],box=0,/bottom,/left
+   screen_to_png,'e-time.png'
+
+;  plot vertical oscillation
+   win,2
+   xr = [min(tout), max(tout)]
+   yr = [-3,3]*scale_height
+   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/iso,/nodata,xtitle='x',ytitle='y'
+   color = floor(findgen(nplot)*255/float(nplot))
+   for i=0,nplot-1 do oplot,tout,zout[i,*],color=color(i)
+   screen_to_png,'orbit.png'
+
+; make histogram of energy changes at end
+   win,6
+   ohist,de,x,y,-0.05,0.05,0.001
+   plot,x,y,psym=10,xtitle='de (%)'
+   screen_to_png,'de-hist.png'
+
+
+endif
+
+end
+
+
diff --git a/examples/DiscPatch/HydroStatic/README b/examples/DiscPatch/HydroStatic/README
new file mode 100644
index 0000000000000000000000000000000000000000..42853e6b51983f2868528202adec3fc829c2ddbc
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/README
@@ -0,0 +1,20 @@
+Generates and evolves a disc-patch, where gas is in hydrostatic
+equilibrium with an imposed external gravitational force, using the
+equations from Creasey, Theuns & Bower, 2013, MNRAS, Volume 429,
+Issue 3, p.1922-1948.
+
+To generate ICs ready for a scientific run:
+
+1) Recover a uniform glass file by running 'getGlass.sh'.
+
+2) Generate pre-ICs by running the 'makeIC.py' script.
+
+3) Run SWIFT with an isothermal EoS, no cooling nor feedback, and the
+disc-patch potential switched on and using the parameters from
+'disc-patch-icc.yml'
+
+4) The ICs are then ready to be run for a science problem. Rename the last 
+output to 'Disc-Patch-dynamic.hdf5'. These are now the ICs for the actual test.
+
+When running SWIFT with the parameters from 'disc-patch.yml' and an
+ideal gas EoS on these ICs the disc should stay in equilibrium.
diff --git a/examples/DiscPatch/HydroStatic/disc-patch-icc.yml b/examples/DiscPatch/HydroStatic/disc-patch-icc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6a27016b8a3f484b7c1c9b74594073d5f28efe90
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/disc-patch-icc.yml
@@ -0,0 +1,44 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.9885e33     # Grams
+  UnitLength_in_cgs:   3.0856776e18  # Centimeters
+  UnitVelocity_in_cgs: 1e5           # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0     # The starting time of the simulation (in internal units).
+  time_end:   968.  # The end time of the simulation (in internal units).
+  dt_min:     1e-4  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1.    # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1 # Time between statistics output
+  
+# Parameters governing the snapshots
+Snapshots:
+  basename:            Disc-Patch   # Common part of the name of output files
+  time_first:          0.           # Time of the first output (in internal units)
+  delta_time:          12.          # Time difference between consecutive outputs (in internal units)
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  70.      # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  Disc-Patch.hdf5       # The file to read
+
+# External potential parameters
+DiscPatchPotential:
+  surface_density: 10.
+  scale_height:    100.
+  z_disc:          200.
+  timestep_mult:   0.03
+  growth_time:     5.
diff --git a/examples/DiscPatch/HydroStatic/disc-patch.yml b/examples/DiscPatch/HydroStatic/disc-patch.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8bd67c5b08de82bb6a3d47ccf3419f85e3e5c6b1
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/disc-patch.yml
@@ -0,0 +1,43 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.9885e33     # Grams
+  UnitLength_in_cgs:   3.0856776e18  # Centimeters
+  UnitVelocity_in_cgs: 1e5           # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 968   # The starting time of the simulation (in internal units).
+  time_end:   12000.  # The end time of the simulation (in internal units).
+  dt_min:     1e-4  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1.    # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1 # Time between statistics output
+  
+# Parameters governing the snapshots
+Snapshots:
+  basename:           Disc-Patch-dynamic # Common part of the name of output files
+  time_first:         968.               # Time of the first output (in internal units)
+  delta_time:         24.                 # Time difference between consecutive outputs (in internal units)
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  70.      # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  Disc-Patch-dynamic.hdf5       # The file to read
+
+# External potential parameters
+DiscPatchPotential:
+  surface_density: 10.
+  scale_height:    100.
+  z_disc:          200.
+  timestep_mult:   0.03
diff --git a/examples/DiscPatch/HydroStatic/dynamic.pro b/examples/DiscPatch/HydroStatic/dynamic.pro
new file mode 100644
index 0000000000000000000000000000000000000000..c02c65fe418e84cdd62978dbddcf5a641fa4c156
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/dynamic.pro
@@ -0,0 +1,129 @@
+;
+;  test energy / angular momentum conservation of test problem
+;
+
+iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy
+
+; set physical constants
+@physunits
+
+indir    = './'
+basefile = 'Disk-Patch-dynamic_'
+
+; set properties of potential
+uL   = phys.pc                  ; unit of length
+uM   = phys.msun                ; unit of mass
+uV   = 1d5                      ; unit of velocity
+
+; properties of patch
+surface_density = 10.
+scale_height    = 100.
+z_disk          = 200.;
+gamma           = 5./3.
+
+; derived units
+constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
+pcentre  = [0.,0.,z_disk] * pc / uL
+utherm     = !pi * constG * surface_density * scale_height / (gamma-1.)
+soundspeed = sqrt(gamma * (gamma-1.) * utherm)
+t_dyn      = sqrt(scale_height / (constG * surface_density))
+
+;
+infile = indir + basefile + '*'
+spawn,'ls -1 '+infile,res
+nfiles = n_elements(res)
+
+
+; choose: calculate change of energy and Lz, comparing first and last
+; snapshots for all particles, or do so for a subset
+
+; compare all
+ifile   = 0
+inf     = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
+id      = h5rd(inf,'PartType0/ParticleIDs')
+nfollow = n_elements(id)
+
+
+; compute anlytic profile
+nbins = 100
+zbins = findgen(nbins)/float(nbins-1) * 2 * scale_height
+rbins = (surface_density/(2.*scale_height)) / cosh(abs(zbins)/scale_height)^2
+
+
+; plot analytic profile
+wset,0
+plot,[0],[0],xr=[0,2*scale_height],yr=[0,max(rbins)],/nodata,xtitle='|z|',ytitle=textoidl('\rho')
+oplot,zbins,rbins,color=blue
+
+ifile  = 0
+nskip   = nfiles - 1
+isave  = 0
+nplot  = 8192 ; randomly plot particles
+color = floor(findgen(nfiles)/float(nfiles-1)*255)
+;for ifile=0,nfiles-1,nskip do begin
+tsave  = [0.]
+toplot = [1,nfiles-1]
+for iplot=0,1 do begin
+   ifile  = toplot[iplot]
+   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
+   time   = h5ra(inf, 'Header','Time')
+   tsave  = [tsave, time]
+   print,' time= ',time
+   p      = h5rd(inf,'PartType0/Coordinates')
+   v      = h5rd(inf,'PartType0/Velocities')
+   id     = h5rd(inf,'PartType0/ParticleIDs')
+   rho    = h5rd(inf,'PartType0/Density')
+   h      = h5rd(inf,'PartType0/SmoothingLength')
+   utherm = h5rd(inf,'PartType0/InternalEnergy')
+   indx   = sort(id)
+
+; substract disk centre
+   for ic=0,2 do p[ic,*]=p[ic,*] - pcentre[ic]
+
+
+;; ;  if you want to sort particles by ID
+;;    id     = id[indx]
+;;    rho    = rho[indx]
+;;    utherm = utherm[indx]
+;;    h      = h[indx]
+;;    for ic=0,2 do begin
+;;       tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
+;;       tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
+;;    endfor
+   
+   ip = floor(randomu(ifile+1,nplot)*n_elements(rho))
+   color = red
+   if(ifile eq 1) then begin
+      color=black
+   endif else begin
+      color=red
+   endelse
+   oplot,abs(p[2,ip]), rho[ip], psym=3, color=color
+
+   isave = isave + 1
+   
+endfor
+
+; time in units of dynamical time
+tsave = tsave[1:*] / t_dyn
+
+label = ['']
+for i=0,n_elements(tsave)-1 do label=[label,'time/t_dynamic='+string(tsave[i],format='(f8.0)')]
+label = label[1:*]
+legend,['analytic',label[0],label[1]],linestyle=[0,0,0],color=[blue,black,red],box=0,/top,/right
+
+; make histograms of particle velocities
+xr    = 1d-3 * [-1,1]
+bsize = 1.d-5
+ohist,v[0,*]/soundspeed,x,vx,xr[0],xr[1],bsize
+ohist,v[1,*]/soundspeed,y,vy,xr[0],xr[1],bsize
+ohist,v[2,*]/soundspeed,z,vz,xr[0],xr[1],bsize
+wset,2
+plot,x,vx,psym=10,xtitle='velocity/soundspeed',ytitle='pdf',/nodata,xr=xr,/xs
+oplot,x,vx,psym=10,color=black
+oplot,y,vy,psym=10,color=blue
+oplot,z,vz,psym=10,color=red
+legend,['vx/c','vy/c','vz/c'],linestyle=[0,0,0],color=[black,blue,red],box=0,/top,/right
+end
+
+
diff --git a/examples/DiscPatch/HydroStatic/getGlass.sh b/examples/DiscPatch/HydroStatic/getGlass.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ffd92e88deae6e91237059adac2a6c2067caee46
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/getGlass.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/glassCube_32.hdf5
diff --git a/examples/DiscPatch/HydroStatic/makeIC.py b/examples/DiscPatch/HydroStatic/makeIC.py
new file mode 100644
index 0000000000000000000000000000000000000000..48cc578658a9520477d40bf504e3eb7c3c8e5164
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/makeIC.py
@@ -0,0 +1,255 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 John A. Regan (john.a.regan@durham.ac.uk)
+ #                    Tom Theuns (tom.theuns@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
+import h5py
+import sys
+import numpy
+import math
+import random
+import matplotlib.pyplot as plt
+
+# Generates a disc-patch in hydrostatic equilibrium
+# see Creasey, Theuns & Bower, 2013, for the equations:
+# disc parameters are: surface density sigma
+#                      scale height b
+# density: rho(z) = (sigma/2b) sech^2(z/b)
+# isothermal velocity dispersion = <v_z^2? = b pi G sigma
+# grad potential  = 2 pi G sigma tanh(z/b)
+# potential       = ln(cosh(z/b)) + const
+# Dynamical time  = sqrt(b / (G sigma))
+# to obtain the 1/ch^2(z/b) profile from a uniform profile (a glass, say, or a uniform random variable), note that, when integrating in z
+# \int 0^z dz/ch^2(z) = tanh(z)-tanh(0) = \int_0^x dx = x (where the last integral refers to a uniform density distribution), so that z = atanh(x)
+# usage: python makeIC.py 1000 
+
+# physical constants in cgs
+NEWTON_GRAVITY_CGS  = 6.672e-8
+SOLAR_MASS_IN_CGS   = 1.9885e33
+PARSEC_IN_CGS       = 3.0856776e18
+PROTON_MASS_IN_CGS  = 1.6726231e24
+YEAR_IN_CGS         = 3.154e+7
+
+# choice of units
+const_unit_length_in_cgs   =   (PARSEC_IN_CGS)
+const_unit_mass_in_cgs     =   (SOLAR_MASS_IN_CGS)
+const_unit_velocity_in_cgs =   (1e5)
+
+print "UnitMass_in_cgs:     ", const_unit_mass_in_cgs 
+print "UnitLength_in_cgs:   ", const_unit_length_in_cgs
+print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs
+
+
+# parameters of potential
+surface_density = 10.
+scale_height    = 100.
+gamma           = 5./3.
+
+# derived units
+const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs)
+const_G                = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs)))
+print 'G=', const_G
+utherm                 = math.pi * const_G * surface_density * scale_height / (gamma-1)
+v_disp                 = numpy.sqrt(2 * utherm)
+soundspeed             = numpy.sqrt(utherm / (gamma * (gamma-1.)))
+t_dyn                  = numpy.sqrt(scale_height / (const_G * surface_density))
+t_cross                = scale_height / soundspeed
+print 'dynamical time = ',t_dyn,' sound crossing time = ',t_cross,' sound speed= ',soundspeed,' 3D velocity dispersion = ',v_disp,' thermal_energy= ',utherm
+
+
+# Parameters
+periodic= 1            # 1 For periodic box
+boxSize = 400.         #  [kpc]
+Radius  = 100.         # maximum radius of particles [kpc]
+G       = const_G 
+
+# File
+fileName = "Disc-Patch.hdf5" 
+
+#---------------------------------------------------
+mass           = 1
+
+#--------------------------------------------------
+
+
+# using glass ICs
+# read glass file and generate gas positions and tile it ntile times in each dimension
+ntile   = 1
+inglass = 'glassCube_32.hdf5'
+infile  = h5py.File(inglass, "r")
+one_glass_p = infile["/PartType0/Coordinates"][:,:]
+one_glass_h = infile["/PartType0/SmoothingLength"][:]
+
+# scale in [-0.5,0.5]*BoxSize / ntile
+one_glass_p[:,:] -= 0.5
+one_glass_p      *= boxSize / ntile
+one_glass_h      *= boxSize / ntile
+ndens_glass       = (one_glass_h.shape[0]) / (boxSize/ntile)**3
+h_glass           = numpy.amin(one_glass_h) * (boxSize/ntile)
+
+glass_p = []
+glass_h = []
+for ix in range(0,ntile):
+    for iy in range(0,ntile):
+        for iz in range(0,ntile):
+            shift = one_glass_p.copy()
+            shift[:,0] += (ix-(ntile-1)/2.) * boxSize / ntile
+            shift[:,1] += (iy-(ntile-1)/2.) * boxSize / ntile
+            shift[:,2] += (iz-(ntile-1)/2.) * boxSize / ntile
+            glass_p.append(shift)
+            glass_h.append(one_glass_h.copy())
+
+glass_p = numpy.concatenate(glass_p, axis=0)
+glass_h = numpy.concatenate(glass_h, axis=0)
+
+# random shuffle of glas ICs
+numpy.random.seed(12345)
+indx   = numpy.random.rand(numpy.shape(glass_h)[0])
+indx   = numpy.argsort(indx)
+glass_p = glass_p[indx, :]
+glass_h = glass_h[indx]
+
+# select numGas of them
+numGas = 8192
+pos    = glass_p[0:numGas,:]
+h      = glass_h[0:numGas]
+numGas = numpy.shape(pos)[0]
+
+# compute furthe properties of ICs
+column_density = surface_density * numpy.tanh(boxSize/2./scale_height)
+enclosed_mass  = column_density * boxSize * boxSize
+pmass          = enclosed_mass / numGas
+meanrho        = enclosed_mass / boxSize**3
+print 'pmass= ',pmass,' mean(rho) = ', meanrho,' entropy= ', (gamma-1) * utherm / meanrho**(gamma-1)
+
+# desired density
+rho            = surface_density / (2. * scale_height) / numpy.cosh(abs(pos[:,2])/scale_height)**2
+u              = (1. + 0 * h) * utherm 
+entropy        = (gamma-1) * u / rho**(gamma-1)
+mass           = 0.*h + pmass
+entropy_flag   = 0
+vel            = 0 + 0 * pos
+
+# move centre of disc to middle of box
+pos[:,:]     += boxSize/2
+
+
+# create numPart dm particles
+numPart = 0
+
+# Create and write output file
+
+#File
+file = h5py.File(fileName, 'w')
+
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = const_unit_length_in_cgs
+grp.attrs["Unit mass in cgs (U_M)"] = const_unit_mass_in_cgs 
+grp.attrs["Unit time in cgs (U_t)"] = const_unit_length_in_cgs / const_unit_velocity_in_cgs
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
+# Header
+grp = file.create_group("/Header")
+grp.attrs["BoxSize"] = boxSize
+grp.attrs["NumPart_Total"] =  [numGas, numPart, 0, 0, 0, 0]
+grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["NumPart_ThisFile"] = [numGas, numPart, 0, 0, 0, 0]
+grp.attrs["Time"] = 0.0
+grp.attrs["NumFilesPerSnapshot"] = 1
+grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+grp.attrs["Flag_Entropy_ICs"] = [entropy_flag]
+grp.attrs["Dimension"] = 3
+
+#Runtime parameters
+grp = file.create_group("/RuntimePars")
+grp.attrs["PeriodicBoundariesOn"] = periodic
+
+
+# write gas particles
+grp0   = file.create_group("/PartType0")
+
+ds     = grp0.create_dataset('Coordinates', (numGas, 3), 'f')
+ds[()] = pos
+
+ds     = grp0.create_dataset('Velocities', (numGas, 3), 'f')
+ds[()] = vel
+
+ds     = grp0.create_dataset('Masses', (numGas,), 'f')
+ds[()] = mass
+
+ds     = grp0.create_dataset('SmoothingLength', (numGas,), 'f')
+ds[()] = h
+
+ds = grp0.create_dataset('InternalEnergy', (numGas,), 'f')
+u = numpy.full((numGas, ), utherm)
+if (entropy_flag == 1):
+    ds[()] = entropy
+else:
+    ds[()] = u    
+
+ids = 1 + numpy.linspace(0, numGas, numGas, endpoint=False, dtype='L')
+ds = grp0.create_dataset('ParticleIDs', (numGas, ), 'L')
+ds[()] = ids
+
+print "Internal energy:", u[0]
+
+# generate dark matter particles if needed
+if(numPart > 0):
+    
+    # set seed for random number
+    numpy.random.seed(1234)
+    
+    grp1 = file.create_group("/PartType1")
+    
+    radius = Radius * (numpy.random.rand(N))**(1./3.) 
+    ctheta = -1. + 2 * numpy.random.rand(N)
+    stheta = numpy.sqrt(1.-ctheta**2)
+    phi    =  2 * math.pi * numpy.random.rand(N)
+    r      = numpy.zeros((numPart, 3))
+
+    speed  = vrot
+    v      = numpy.zeros((numPart, 3))
+    omega  = speed / radius
+    period = 2.*math.pi/omega
+    print 'period = minimum = ',min(period), ' maximum = ',max(period)
+    
+    v[:,0] = -omega * r[:,1]
+    v[:,1] =  omega * r[:,0]
+    
+    ds = grp1.create_dataset('Coordinates', (numPart, 3), 'd')
+    ds[()] = r
+    
+    ds = grp1.create_dataset('Velocities', (numPart, 3), 'f')
+    ds[()] = v
+    v = numpy.zeros(1)
+    
+    m = numpy.full((numPart, ),10)
+    ds = grp1.create_dataset('Masses', (numPart,), 'f')
+    ds[()] = m
+    m = numpy.zeros(1)
+        
+    ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False, dtype='L')
+    ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L')
+    ds[()] = ids
+
+
+file.close()
+
+sys.exit()
diff --git a/examples/DiscPatch/HydroStatic/test.pro b/examples/DiscPatch/HydroStatic/test.pro
new file mode 100644
index 0000000000000000000000000000000000000000..950aebc65d7d34cd7aaeb2368734e5492902a912
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/test.pro
@@ -0,0 +1,142 @@
+;
+;  test energy / angular momentum conservation of test problem
+;
+
+iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy
+
+; set physical constants
+@physunits
+
+indir    = './'
+basefile = 'Disc-Patch_'
+
+; set properties of potential
+uL   = phys.pc                  ; unit of length
+uM   = phys.msun                ; unit of mass
+uV   = 1d5                      ; unit of velocity
+
+; properties of patch
+surface_density = 10.
+scale_height    = 100.
+
+; derived units
+constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
+pcentre  = [0.,0.,200.] * pc / uL
+
+;
+infile = indir + basefile + '*'
+spawn,'ls -1 '+infile,res
+nfiles = n_elements(res)
+
+
+; choose: calculate change of energy and Lz, comparing first and last
+; snapshots for all particles, or do so for a subset
+
+; compare all
+ifile   = 0
+inf     = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
+id      = h5rd(inf,'PartType0/ParticleIDs')
+nfollow = n_elements(id)
+
+; follow a subset
+; nfollow  = min(4000, nfollow)   ; number of particles to follow
+
+;
+if (iplot eq 1) then begin
+   nskip = 1
+   nsave = nfiles
+endif else begin
+   nskip = nfiles - 2
+   nsave = 2
+endelse
+
+;
+lout     = fltarr(nfollow, nsave) ; Lz
+xout     = fltarr(nfollow, nsave) ; x
+yout     = fltarr(nfollow, nsave) ; y
+zout     = fltarr(nfollow, nsave) ; z
+vzout    = fltarr(nfollow, nsave) ; z
+rout     = fltarr(nfollow, nsave) ; rho
+hout     = fltarr(nfollow, nsave) ; h
+uout     = fltarr(nfollow, nsave) ; thermal energy
+eout     = fltarr(nfollow, nsave) ; energies
+ekin     = fltarr(nfollow, nsave)
+epot     = fltarr(nfollow, nsave) ; 2 pi G Sigma b ln(cosh(z/b)) + const
+tout     = fltarr(nsave)
+
+ifile  = 0
+isave = 0
+for ifile=0,nfiles-1,nskip do begin
+   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
+   time   = h5ra(inf, 'Header','Time')
+   p      = h5rd(inf,'PartType0/Coordinates')
+   v      = h5rd(inf,'PartType0/Velocities')
+   id     = h5rd(inf,'PartType0/ParticleIDs')
+   rho    = h5rd(inf,'PartType0/Density')
+   h      = h5rd(inf,'PartType0/SmoothingLength')
+   utherm = h5rd(inf,'PartType0/InternalEnergy')
+   indx   = sort(id)
+
+;  if you want to sort particles by ID
+   id     = id[indx]
+   rho    = rho[indx]
+   utherm = utherm[indx]
+   h      = h[indx]
+   for ic=0,2 do begin
+      tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
+      tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
+   endfor
+
+; calculate energy
+   dd  = size(p,/dimen) & npart = dd[1]
+   ener = fltarr(npart)
+   dr   = fltarr(npart) & dv = dr
+   for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2
+   for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2
+   xout[*,isave] = p[0,0:nfollow-1]-pcentre[0]
+   yout[*,isave] = p[1,0:nfollow-1]-pcentre[1]
+   zout[*,isave] = p[2,0:nfollow-1]-pcentre[2]
+   vzout[*,isave]= v[2,0:nfollow-1]
+   rout[*,isave] = rho[0:nfollow-1]
+   hout[*,isave] = h[0:nfollow-1]
+   uout[*,isave] = utherm[0:nfollow-1]
+   Lz  = (p[0,*]-pcentre[0]) * v[1,*] - (p[1,*]-pcentre[1]) * v[0,*]
+   dz  = reform(p[2,0:nfollow-1]-pcentre[2])
+;   print,'time = ',time,p[0,0],v[0,0],id[0]
+   ek   = 0.5 * dv
+   ep   = fltarr(nfollow)
+   ep   = 2 * !pi * constG * surface_density * scale_height * alog(cosh(abs(dz)/scale_height))
+   ener = ek + ep
+   tout(isave) = time
+   lout[*,isave] = lz[0:nfollow-1]
+   eout(*,isave) = ener[0:nfollow-1]
+   ekin(*,isave) = ek[0:nfollow-1]
+   epot(*,isave) = ep[0:nfollow-1]
+   print,format='('' time= '',f7.1,'' E= '',f9.2,'' Lz= '',e9.2)', time,eout[0],lz[0]
+   isave = isave + 1
+   
+endfor
+
+x0 = reform(xout[0,*])
+y0 = reform(xout[1,*])
+z0 = reform(xout[2,*])
+
+
+; plot density profile and compare to analytic profile
+nplot = nfollow
+
+                                ; plot density profile
+wset,0
+xr   = [0, 3*scale_height]
+nbins = 100
+zpos  = findgen(nbins)/float(nbins-1) * max(xr)
+dens  = (surface_density/(2.d0*scale_height)) * 1./cosh(zpos/scale_height)^2
+plot,[0],[0],xr=xr,/xs,yr=[0,max(dens)*1.4],/ys,/nodata,xtitle='|z|',ytitle='density'
+oplot,zpos,dens,color=black,thick=3
+;oplot,abs(zout[*,1]),rout[*,1],psym=3 ; initial profile
+oplot,abs(zout[*,nsave-1]),rout[*,nsave-1],psym=3,color=red
+
+
+end
+
+
diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml
index bac3e4684d4436403bb2497afd34a865ea4bab87..80714d87f4afa7d7e4d41ce7bf56faed856208ef 100644
--- a/examples/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_12/eagle_12.yml
@@ -6,11 +6,6 @@ InternalUnitSystem:
   UnitCurrent_in_cgs:  1             # Amperes
   UnitTemp_in_cgs:     1             # Kelvin
 
-# Parameters for the task scheduling
-Scheduler:
-  cell_sub_size:    6000     # Value used for the original scaling tests
-  cell_split_size:  300      # Value used for the original scaling tests
-
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml
index cd5a1211b68c4ec2bebcd565713cc52a7b8de6df..6afb737677040ba0605d4e3116800f079a059be4 100644
--- a/examples/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_25/eagle_25.yml
@@ -6,11 +6,6 @@ InternalUnitSystem:
   UnitCurrent_in_cgs:  1             # Amperes
   UnitTemp_in_cgs:     1             # Kelvin
 
-# Parameters for the task scheduling
-Scheduler:
-  cell_sub_size:    6000     # Value used for the original scaling tests
-  cell_split_size:  300      # Value used for the original scaling tests
-
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
diff --git a/examples/EAGLE_50/eagle_50.yml b/examples/EAGLE_50/eagle_50.yml
index 38a9c165796359dfdabef423154299ee04ae8c76..d9e5d46326780fe5b2abde025b50a7ec667b19b1 100644
--- a/examples/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_50/eagle_50.yml
@@ -6,11 +6,6 @@ InternalUnitSystem:
   UnitCurrent_in_cgs:  1             # Amperes
   UnitTemp_in_cgs:     1             # Kelvin
 
-# Parameters for the task scheduling
-Scheduler:
-  cell_sub_size:    6000     # Value used for the original scaling tests
-  cell_split_size:  300      # Value used for the original scaling tests
-
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml
index d06c165651ce8f33692d1512ddd8fdae80ffb556..ce300b32157361e7860d201c186823471a179c0a 100644
--- a/examples/ExternalPointMass/externalPointMass.yml
+++ b/examples/ExternalPointMass/externalPointMass.yml
@@ -38,7 +38,7 @@ InitialConditions:
   shift_z:    50.
 
 # External potential parameters
-PointMass:
+PointMassPotential:
   position_x:      50.     # location of external point mass in internal units
   position_y:      50.
   position_z:      50.	
diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py
index 37fc46a9243b2a4c42029de4587082f9efb11f43..326183398933c88d7348e72e00343064b3e3a64c 100644
--- a/examples/ExternalPointMass/makeIC.py
+++ b/examples/ExternalPointMass/makeIC.py
@@ -83,7 +83,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
-
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/Gradients/gradientsCartesian.yml b/examples/Gradients/gradientsCartesian.yml
new file mode 100644
index 0000000000000000000000000000000000000000..917a4803004c2ce89984beb857cb1691d9a1ec1b
--- /dev/null
+++ b/examples/Gradients/gradientsCartesian.yml
@@ -0,0 +1,36 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1e-6    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-6  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            gradients_cartesian # Common part of the name of output files
+  time_first:          0.  # Time of the first output (in internal units)
+  delta_time:          5e-7 # Time difference between consecutive outputs (in internal units)
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1e-6 # Time between statistics output
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  max_smoothing_length:  0.01     # Maximal smoothing length allowed (in internal units).
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./Gradients_cartesian.hdf5       # The file to read
+
diff --git a/examples/Gradients/gradientsRandom.yml b/examples/Gradients/gradientsRandom.yml
new file mode 100644
index 0000000000000000000000000000000000000000..209f30060f031f7d50a15ffbf8ad0e7fe5b013b8
--- /dev/null
+++ b/examples/Gradients/gradientsRandom.yml
@@ -0,0 +1,36 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1e-6    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-6  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            gradients_random # Common part of the name of output files
+  time_first:          0.  # Time of the first output (in internal units)
+  delta_time:          5e-7 # Time difference between consecutive outputs (in internal units)
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1e-6 # Time between statistics output
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  max_smoothing_length:  0.01     # Maximal smoothing length allowed (in internal units).
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./Gradients_random.hdf5       # The file to read
+
diff --git a/examples/Gradients/gradientsStretched.yml b/examples/Gradients/gradientsStretched.yml
new file mode 100644
index 0000000000000000000000000000000000000000..592a70762988fca764c3ec7dcbc9bfcc9a8f2751
--- /dev/null
+++ b/examples/Gradients/gradientsStretched.yml
@@ -0,0 +1,36 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1e-6    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-6  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            gradients_stretched # Common part of the name of output files
+  time_first:          0.  # Time of the first output (in internal units)
+  delta_time:          5e-7 # Time difference between consecutive outputs (in internal units)
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1e-6 # Time between statistics output
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  max_smoothing_length:  0.01     # Maximal smoothing length allowed (in internal units).
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./Gradients_stretched.hdf5       # The file to read
+
diff --git a/examples/Gradients/makeICs.py b/examples/Gradients/makeICs.py
new file mode 100644
index 0000000000000000000000000000000000000000..38d035d2ad2dd3dd6daacfd6f58d824e9daf6742
--- /dev/null
+++ b/examples/Gradients/makeICs.py
@@ -0,0 +1,177 @@
+################################################################################
+# This file is part of SWIFT.
+# Copyright (c) 2015 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+################################################################################
+
+import h5py
+import random
+import numpy as np
+import sys
+
+# Generates a swift IC file with some density gradients, to check the gradient
+# reconstruction
+
+# Parameters
+periodic= 1      # 1 For periodic box
+gamma = 5./3.     # Gas adiabatic index
+gridtype = "cartesian"
+if len(sys.argv) > 1:
+    gridtype = sys.argv[1]
+
+# stretched cartesian box ######################################################
+if gridtype == "stretched":
+    fileName = "Gradients_stretched.hdf5"
+    factor = 8
+    boxSize = [ 1.0 , 1.0/factor , 1.0/factor ]
+    L = 20
+    nx1 = factor*L/2
+    ny1 = L
+    nz1 = L
+    numfac = 2.
+    nx2 = int(nx1/numfac)
+    ny2 = int(ny1/numfac)
+    nz2 = int(nz1/numfac)
+    npart = nx1*ny1*nz1 + nx2*ny2*nz2
+    vol = boxSize[0] * boxSize[1] * boxSize[2]
+    partVol1 = 0.5*vol/(nx1*ny1*nz1)
+    partVol2 = 0.5*vol/(nx2*ny2*nz2)
+
+    coords = np.zeros((npart,3))
+    h = np.zeros((npart,1))
+    ids = np.zeros((npart,1), dtype='L')
+    idx = 0
+    dcell = 0.5/nx1
+    for i in range(nx1):
+        for j in range(ny1):
+            for k in range(nz1):
+                coords[idx,0] = (i+0.5)*dcell
+                coords[idx,1] = (j+0.5)*dcell
+                coords[idx,2] = (k+0.5)*dcell
+                h[idx] = 0.56/nx1
+                ids[idx] = idx
+                idx += 1
+    dcell = 0.5/nx2
+    for i in range(nx2):
+        for j in range(ny2):
+            for k in range(nz2):
+                coords[idx,0] = 0.5+(i+0.5)*dcell
+                coords[idx,1] = (j+0.5)*dcell
+                coords[idx,2] = (k+0.5)*dcell
+                h[idx] = 0.56/nx2
+                ids[idx] = idx
+                idx += 1
+
+# cartesian box ################################################################
+if gridtype == "cartesian":
+    fileName = "Gradients_cartesian.hdf5"
+    boxSize = [ 1.0 , 1.0 , 1.0 ]
+    nx = 20
+    npart = nx**3
+    partVol = 1./npart
+    coords = np.zeros((npart,3))
+    h = np.zeros((npart,1))
+    ids = np.zeros((npart,1), dtype='L')
+    idx = 0
+    dcell = 1./nx
+    for i in range(nx):
+        for j in range(nx):
+            for k in range(nx):
+                coords[idx,0] = (i+0.5)*dcell
+                coords[idx,1] = (j+0.5)*dcell
+                coords[idx,2] = (k+0.5)*dcell
+                h[idx] = 1.12/nx
+                ids[idx] = idx
+                idx += 1
+
+# random box ###################################################################
+if gridtype == "random":
+    fileName = "Gradients_random.hdf5"
+    boxSize = [ 1.0 , 1.0 , 1.0 ]
+    glass = h5py.File("../Glass/glass_50000.hdf5", "r")
+    coords = np.array(glass["/PartType0/Coordinates"])
+    npart = len(coords)
+    partVol = 1./npart
+    h = np.zeros((npart,1))
+    ids = np.zeros((npart,1), dtype='L')
+    for i in range(npart):
+        h[i] = 0.019
+        ids[i] = i
+
+v = np.zeros((npart,3))
+m = np.zeros((npart,1))
+rho = np.zeros((npart,1))
+u = np.zeros((npart,1))
+
+for i in range(npart):
+    rhox = coords[i,0]
+    if coords[i,0] < 0.75:
+        rhox = 0.75
+    if coords[i,0] < 0.25:
+        rhox = 1.-coords[i,0]
+    rhoy = 1.+boxSize[1]-coords[i,1]
+    if coords[i,1] < 0.75*boxSize[1]:
+        rhoy = 1. + 0.25*boxSize[1]
+    if coords[i,1] < 0.25*boxSize[1]:
+        rhoy = 1.+coords[i,1]
+    rhoz = 1.
+    rho[i] = rhox + rhoy + rhoz
+    P = 1.
+    u[i] = P / ((gamma-1.)*rho[i])
+    if gridtype == "stretched":
+        if coords[i,0] < 0.5:
+            m[i] = rho[i] * partVol1
+        else:
+            m[i] = rho[i] * partVol2
+    else:
+        m[i] = rho[i] * partVol
+
+#File
+file = h5py.File(fileName, 'w')
+
+# Header
+grp = file.create_group("/Header")
+grp.attrs["BoxSize"] = boxSize
+grp.attrs["NumPart_Total"] =  [npart, 0, 0, 0, 0, 0]
+grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["NumPart_ThisFile"] = [npart, 0, 0, 0, 0, 0]
+grp.attrs["Time"] = 0.0
+grp.attrs["NumFilesPerSnapshot"] = 1
+grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
+
+#Runtime parameters
+grp = file.create_group("/RuntimePars")
+grp.attrs["PeriodicBoundariesOn"] = periodic
+
+#Particle group
+grp = file.create_group("/PartType0")
+ds = grp.create_dataset('Coordinates', (npart, 3), 'd')
+ds[()] = coords
+ds = grp.create_dataset('Velocities', (npart, 3), 'f')
+ds[()] = v
+ds = grp.create_dataset('Masses', (npart,1), 'f')
+ds[()] = m
+ds = grp.create_dataset('Density', (npart,1), 'd')
+ds[()] = rho
+ds = grp.create_dataset('SmoothingLength', (npart,1), 'f')
+ds[()] = h
+ds = grp.create_dataset('InternalEnergy', (npart,1), 'd')
+ds[()] = u
+ds = grp.create_dataset('ParticleIDs', (npart,1), 'L')
+ds[()] = ids[:]
+
+file.close()
diff --git a/examples/Gradients/plot.py b/examples/Gradients/plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6750ffc581437ebbf402ec44bcb1d14cb82a698
--- /dev/null
+++ b/examples/Gradients/plot.py
@@ -0,0 +1,50 @@
+################################################################################
+# This file is part of SWIFT.
+# Copyright (c) 2015 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+################################################################################
+
+import scipy as sp
+import pylab as pl
+import numpy as np
+import h5py
+import sys
+
+# this file plots the gradients of the density in the x and y direction for
+# the given input file and saves the result as gradiens_NAME.png
+
+inputfile = sys.argv[1]
+outputfile = "gradients_{0}.png".format(sys.argv[2])
+
+f = h5py.File(inputfile, "r")
+rho = np.array(f["/PartType0/Density"])
+gradrho = np.array(f["/PartType0/GradDensity"])
+coords = np.array(f["/PartType0/Coordinates"])
+
+fig, ax = pl.subplots(1,2, sharey=True)
+
+ax[0].plot(coords[:,0], rho, "r.", label="density")
+ax[0].plot(coords[:,0], gradrho[:,0], "b.", label="grad density x")
+ax[0].set_xlabel("x")
+ax[0].legend(loc="best")
+
+ax[1].plot(coords[:,1], rho, "r.", label="density")
+ax[1].plot(coords[:,1], gradrho[:,1], "b.", label="grad density y")
+ax[1].set_xlabel("y")
+ax[1].legend(loc="best")
+
+pl.tight_layout()
+pl.savefig(outputfile)
diff --git a/examples/Gradients/run.sh b/examples/Gradients/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..cc1adc676427b257445f64a011ed8ebee87285ab
--- /dev/null
+++ b/examples/Gradients/run.sh
@@ -0,0 +1,13 @@
+#! /bin/bash
+
+python makeICs.py stretched
+../swift -s -t 2 gradientsStretched.yml
+python plot.py gradients_stretched_001.hdf5 stretched
+
+python makeICs.py cartesian
+../swift -s -t 2 gradientsCartesian.yml
+python plot.py gradients_cartesian_001.hdf5 cartesian
+
+python makeICs.py random
+../swift -s -t 2 gradientsRandom.yml
+python plot.py gradients_random_001.hdf5 random
diff --git a/examples/GreshoVortex_2D/makeIC.py b/examples/GreshoVortex_2D/makeIC.py
index 96fa7f098d5eb26c42a984e2b5ec94bafc710dc3..4f4ec3407b04971882fbf3d7d7479e74bf56c762 100644
--- a/examples/GreshoVortex_2D/makeIC.py
+++ b/examples/GreshoVortex_2D/makeIC.py
@@ -87,6 +87,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFileOutputsPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = fileOutput.create_group("/RuntimePars")
diff --git a/examples/IsothermalPotential/GravityOnly/makeIC.py b/examples/IsothermalPotential/GravityOnly/makeIC.py
index 88115668bd11e19f93765860540dcf33c6ae5c64..07993f19d40a9a3b9a4b86c9dd8c44f7e6fa3d7e 100644
--- a/examples/IsothermalPotential/GravityOnly/makeIC.py
+++ b/examples/IsothermalPotential/GravityOnly/makeIC.py
@@ -101,7 +101,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
-
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/IsothermalPotential/GravityOnly/test.pro b/examples/IsothermalPotential/GravityOnly/test.pro
index ac1f5e915c38c107408ffed9579c52944295f079..edfa50121d2e5adb7e039f3c38d6d4c0b4d5e34f 100644
--- a/examples/IsothermalPotential/GravityOnly/test.pro
+++ b/examples/IsothermalPotential/GravityOnly/test.pro
@@ -127,7 +127,7 @@ print,' relative Lz    change: (per cent) ',minmax(dl) * 100.
 if(iplot eq 1) then begin
 ; plot results on energy conservation for some particles
    nplot = min(10, nfollow)
-   wset,0
+   win,0
    xr = [min(tout), max(tout)]
    yr = [-2,2]*1d-2             ; in percent
    plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='time',ytitle='dE/E, dL/L (%)'
@@ -137,7 +137,7 @@ if(iplot eq 1) then begin
    screen_to_png,'e-time.png'
 
 ;  plot orbits of those particles
-   wset,2
+   win,2
    xr = [-100,100]
    yr = xr
    plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/iso,/nodata,xtitle='x',ytitle='y'
@@ -146,7 +146,7 @@ if(iplot eq 1) then begin
    screen_to_png,'orbit.png'
 
 ; plot radial position of these particles
-   wset,4
+   win,4
    xr = [min(tout), max(tout)]
    yr = [0,80]
    plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='t',ytitle='r'
@@ -155,7 +155,7 @@ for i=0,nplot-1 do begin dr = sqrt(reform(xout[i,*])^2 + reform(yout[i,*])^2) &
    screen_to_png,'r-time.png'
 
 ; make histogram of energy changes at end
-   wset,6
+   win,6
    ohist,de,x,y,-0.05,0.05,0.001
    plot,x,y,psym=10,xtitle='de (%)'
    screen_to_png,'de-hist.png'
diff --git a/examples/KelvinHelmholtz_2D/makeIC.py b/examples/KelvinHelmholtz_2D/makeIC.py
index 5c8632dea52ef301c453cfbf21c35923f12e2d5a..bd0f39ed90faf0d67ff4a508bff83067bf748d43 100644
--- a/examples/KelvinHelmholtz_2D/makeIC.py
+++ b/examples/KelvinHelmholtz_2D/makeIC.py
@@ -120,6 +120,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFileOutputsPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = fileOutput.create_group("/RuntimePars")
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 187abcf9898b8024b4e4f5089de7ca51e6dd2e3c..d9e1f2fe741098fe2051155fc1ff2d66d4751cee 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -65,19 +65,37 @@ swift_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="e
 swift_fixdt_mpi_LDADD =  ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS)
 
 # Scripts to generate ICs
-EXTRA_DIST = UniformBox/makeIC.py UniformBox/run.sh UniformBox/uniformBox.yml \
-	     UniformDMBox/makeIC.py \
-	     PerturbedBox/makeIC.py \
-	     SedovBlast/makeIC.py SedovBlast/makeIC_fcc.py SedovBlast/solution.py SedovBlast/run.sh SedovBlast/sedov.yml \
-	     SodShock/makeIC.py SodShock/solution.py SodShock/glass_001.hdf5 SodShock/glass_002.hdf5 SodShock/rhox.py SodShock/run.sh SodShock/sodShock.yml \
-	     CosmoVolume/getIC.sh CosmoVolume/run.sh CosmoVolume/cosmoVolume.yml \
-	     BigCosmoVolume/makeIC.py \
+EXTRA_DIST = BigCosmoVolume/makeIC.py \
 	     BigPerturbedBox/makeIC_fcc.py \
-             GreshoVortex/makeIC.py GreshoVortex/solution.py \
-             MultiTypes/makeIC.py \
-             parameter_example.yml
+	     CosmoVolume/cosmoVolume.yml CosmoVolume/getIC.sh CosmoVolume/run.sh \
+	     CoolingBox/coolingBox.yml CoolingBox/energy_plot.py CoolingBox/makeIC.py CoolingBox/run.sh \
+	     EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \
+	     EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \
+	     EAGLE_50/eagle_50.yml EAGLE_50/getIC.sh EAGLE_50/README EAGLE_50/run.sh \
+	     ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/test.pro \
+	     GreshoVortex_2D/getGlass.sh GreshoVortex_2D/gresho.yml GreshoVortex_2D/makeIC.py GreshoVortex_2D/plotSolution.py GreshoVortex_2D/run.sh \
+	     KelvinHelmholtz_2D/kelvinHelmholtz.yml KelvinHelmholtz_2D/makeIC.py KelvinHelmholtz_2D/plotSolution.py KelvinHelmholtz_2D/run.sh \
+	     MultiTypes/makeIC.py  MultiTypes/multiTypes.yml MultiTypes/run.sh \
+	     PerturbedBox_2D/makeIC.py PerturbedBox_2D/perturbedPlane.yml \
+	     PerturbedBox_3D/makeIC.py PerturbedBox_3D/perturbedBox.yml PerturbedBox_3D/run.sh \
+	     SedovBlast_1D/makeIC.py SedovBlast_1D/plotSolution.py SedovBlast_1D/run.sh SedovBlast_1D/sedov.yml \
+	     SedovBlast_2D/getGlass.sh SedovBlast_2D/makeIC.py SedovBlast_2D/plotSolution.py SedovBlast_2D/run.sh SedovBlast_2D/sedov.yml \
+	     SedovBlast_3D/getGlass.sh SedovBlast_3D/makeIC.py SedovBlast_3D/plotSolution.py SedovBlast_3D/run.sh SedovBlast_3D/sedov.yml \
+	     SodShock_1D/makeIC.py SodShock_1D/plotSolution.py SodShock_1D/run.sh SodShock_1D/sodShock.yml \
+	     SodShock_2D/getGlass.sh SodShock_2D/makeIC.py SodShock_2D/plotSolution.py SodShock_2D/run.sh SodShock_2D/sodShock.yml \
+	     SodShock_3D/getGlass.sh SodShock_3D/makeIC.py SodShock_3D/plotSolution.py SodShock_3D/run.sh SodShock_3D/sodShock.yml \
+	     SquareTest_2D/makeIC.py SquareTest_2D/plotSolution.py SquareTest_2D/run.sh SquareTest_2D/square.yml \
+	     UniformBox_2D/makeIC.py UniformBox_2D/run.sh UniformBox_2D/uniformPlane.yml \
+	     UniformBox_3D/makeICbig.py UniformBox_3D/makeIC.py UniformBox_3D/run.sh UniformBox_3D/uniformBox.yml \
+	     UniformDMBox/makeIC.py
+
+# Default parameter file
+EXTRA_DIST += parameter_example.yml
 
 # Scripts to plot task graphs
 EXTRA_DIST += plot_tasks_MPI.py plot_tasks.py \
 	      process_plot_tasks_MPI process_plot_tasks
 
+# Script for scaling plot
+EXTRA_DIST += plot_scaling_results.py
+
diff --git a/examples/MultiTypes/makeIC.py b/examples/MultiTypes/makeIC.py
index cf889f9b6eab502f692cd6c8b4506c31664ecdcb..229450b67c02258553b588483d7cbd4fef887817 100644
--- a/examples/MultiTypes/makeIC.py
+++ b/examples/MultiTypes/makeIC.py
@@ -61,6 +61,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, massDM, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/MultiTypes/multiTypes.yml b/examples/MultiTypes/multiTypes.yml
index 28d02fefa8168e35af696975a7c73a1bf767155e..51a6d2b478681e2e1c61e199f758e35c507ec195 100644
--- a/examples/MultiTypes/multiTypes.yml
+++ b/examples/MultiTypes/multiTypes.yml
@@ -35,8 +35,9 @@ InitialConditions:
   file_name:  ./multiTypes.hdf5     # The file to read
 
 # External potential parameters
-PointMass:
+PointMassPotential:
   position_x:      50.     # location of external point mass in internal units
   position_y:      50.
   position_z:      50.	
   mass:            1e10     # mass of external point mass in internal units
+  timestep_mult:   1e-2
diff --git a/examples/PerturbedBox_2D/makeIC.py b/examples/PerturbedBox_2D/makeIC.py
index 20b720419ff095016daad23828b81ca880ea9c2e..87a41517772570870e04c79d3694c115a909e214 100644
--- a/examples/PerturbedBox_2D/makeIC.py
+++ b/examples/PerturbedBox_2D/makeIC.py
@@ -84,6 +84,7 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
 grp.attrs["NumPart_Total"] = numPart
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/PerturbedBox_3D/makeIC.py b/examples/PerturbedBox_3D/makeIC.py
index cc7fffe14d4f361153a07101ddcec20a3c979b4a..1b0fc284e4c40b51fca45f117b92175a0ea45f31 100644
--- a/examples/PerturbedBox_3D/makeIC.py
+++ b/examples/PerturbedBox_3D/makeIC.py
@@ -86,6 +86,7 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
 grp.attrs["NumPart_Total"] = numPart
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SedovBlast_1D/makeIC.py b/examples/SedovBlast_1D/makeIC.py
index 4bdf69eee99d98956d5e657be3f963d0cf9ea15b..0c3a311703651003dbf17da099e53bf8a607b881 100644
--- a/examples/SedovBlast_1D/makeIC.py
+++ b/examples/SedovBlast_1D/makeIC.py
@@ -70,6 +70,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 1
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SedovBlast_2D/makeIC.py b/examples/SedovBlast_2D/makeIC.py
index 05233576f63f90b8d448aaa75fa6bfe7fce1f0e8..0e83c7b19b9ac9bd69e20950a64e8a49dd8d0df9 100644
--- a/examples/SedovBlast_2D/makeIC.py
+++ b/examples/SedovBlast_2D/makeIC.py
@@ -70,6 +70,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SedovBlast_3D/makeIC.py b/examples/SedovBlast_3D/makeIC.py
index 3c1e36a74b53ece8b886e2fcbe5d9178d9deefbc..e1b743c6cdcd8dcc2f8da14d1d5589fb9ed111f0 100644
--- a/examples/SedovBlast_3D/makeIC.py
+++ b/examples/SedovBlast_3D/makeIC.py
@@ -70,6 +70,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SedovBlast_3D/plotSolution.py b/examples/SedovBlast_3D/plotSolution.py
index f86ce17206ae1d15ff846fb14c61bbb6926e03bf..1eea372b08e084a37c001f9c53a61667277c765b 100644
--- a/examples/SedovBlast_3D/plotSolution.py
+++ b/examples/SedovBlast_3D/plotSolution.py
@@ -260,7 +260,7 @@ ylim(-5, 50)
 # Information -------------------------------------
 subplot(236, frameon=False)
 
-text(-0.49, 0.9, "Sedov blast with  $\\gamma=%.3f$ in 2D at $t=%.2f$"%(gas_gamma,time), fontsize=10)
+text(-0.49, 0.9, "Sedov blast with  $\\gamma=%.3f$ in 3D at $t=%.2f$"%(gas_gamma,time), fontsize=10)
 text(-0.49, 0.8, "Background $\\rho_0=%.2f$"%(rho_0), fontsize=10)
 text(-0.49, 0.7, "Energy injected $E_0=%.2f$"%(E_0), fontsize=10)
 plot([-0.49, 0.1], [0.62, 0.62], 'k-', lw=1)
diff --git a/examples/SodShock_1D/makeIC.py b/examples/SodShock_1D/makeIC.py
index e024188e867c1f2636187c2c53157c214752d6f7..a5c7f03b24d10e81057dbe25855f33f795218f19 100644
--- a/examples/SodShock_1D/makeIC.py
+++ b/examples/SodShock_1D/makeIC.py
@@ -90,6 +90,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 1
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SodShock_2D/makeIC.py b/examples/SodShock_2D/makeIC.py
index ac2b9ab45fb68921bce7971c46048b344955140d..fdc1610df8cb87b3057323b1330e4c3044f36241 100644
--- a/examples/SodShock_2D/makeIC.py
+++ b/examples/SodShock_2D/makeIC.py
@@ -96,6 +96,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SodShock_3D/makeIC.py b/examples/SodShock_3D/makeIC.py
index 84283732afc497825417546be8bc25e183ecb1cb..c71c07c6c97bb715c580f747cf8d39ddf08445c3 100644
--- a/examples/SodShock_3D/makeIC.py
+++ b/examples/SodShock_3D/makeIC.py
@@ -96,6 +96,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/SquareTest_2D/makeIC.py b/examples/SquareTest_2D/makeIC.py
index 2cb624f9944fcd421d95934ffeded089613e8bc9..186e653124a6ff62a964c37cf0fb2220f1152a0e 100644
--- a/examples/SquareTest_2D/makeIC.py
+++ b/examples/SquareTest_2D/makeIC.py
@@ -94,6 +94,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = fileOutput.create_group("/RuntimePars")
diff --git a/examples/UniformBox_2D/makeIC.py b/examples/UniformBox_2D/makeIC.py
index 41b7d695a2376b990703706977ef111be8f3a355..642896c6ec406a5a75127e024d19775ea4a8e09b 100644
--- a/examples/UniformBox_2D/makeIC.py
+++ b/examples/UniformBox_2D/makeIC.py
@@ -83,6 +83,7 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
 grp.attrs["NumPart_Total"] = numPart
+grp.attrs["Dimension"] = 2
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/UniformBox_3D/makeIC.py b/examples/UniformBox_3D/makeIC.py
index 1484f60596e68734f0f98685ab2ab845f2e0b407..01e37c67b6e2eec2984d62f4ffd503b23b5bd9ec 100644
--- a/examples/UniformBox_3D/makeIC.py
+++ b/examples/UniformBox_3D/makeIC.py
@@ -55,6 +55,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/UniformBox_3D/uniformBox.yml b/examples/UniformBox_3D/uniformBox.yml
index 7c9c74e1342bffb939131a265188cae269cc773f..8aaa802b64de46244f7066bce00f342cad8c5ef0 100644
--- a/examples/UniformBox_3D/uniformBox.yml
+++ b/examples/UniformBox_3D/uniformBox.yml
@@ -33,11 +33,3 @@ SPH:
 # Parameters related to the initial conditions
 InitialConditions:
   file_name:  ./uniformBox.hdf5     # The file to read
-
-
-  # External potential parameters
-PointMass:
-  position_x:      50.     # location of external point mass in internal units
-  position_y:      50.
-  position_z:      50.	
-  mass:            1e10     # mass of external point mass in internal units
diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py
index 2aee89798a5b8bbd425a6b73528779fb1aa7db23..8e032500016eb6cc8e0decc54968bb5b841d7f93 100644
--- a/examples/UniformDMBox/makeIC.py
+++ b/examples/UniformDMBox/makeIC.py
@@ -51,6 +51,7 @@ grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, mass, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
+grp.attrs["Dimension"] = 3
 
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
diff --git a/examples/main.c b/examples/main.c
index efd40c3da9802ebfab72a51eb841c035c8490eba..895bb9003e04a4723b13ea1eaa360c5aa44e7228 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -58,6 +58,7 @@ void print_help_message() {
   printf("Valid options are:\n");
   printf("  %2s %8s %s\n", "-a", "", "Pin runners using processor affinity");
   printf("  %2s %8s %s\n", "-c", "", "Run with cosmological time integration");
+  printf("  %2s %8s %s\n", "-C", "", "Run with cooling");
   printf(
       "  %2s %8s %s\n", "-d", "",
       "Dry run. Read the parameter file, allocate memory but does not read ");
@@ -67,6 +68,8 @@ void print_help_message() {
   printf("  %2s %8s %s\n", "", "",
          "Allows user to check validy of parameter and IC files as well as "
          "memory limits.");
+  printf("  %2s %8s %s\n", "-D", "",
+         "Always drift all particles even the ones far from active particles.");
   printf("  %2s %8s %s\n", "-e", "",
          "Enable floating-point exceptions (debugging mode)");
   printf("  %2s %8s %s\n", "-f", "{int}",
@@ -75,13 +78,14 @@ void print_help_message() {
          "Run with an external gravitational potential");
   printf("  %2s %8s %s\n", "-G", "", "Run with self-gravity");
   printf("  %2s %8s %s\n", "-n", "{int}",
-         "Execute a fixed number of time steps");
+         "Execute a fixed number of time steps. When unset use the time_end "
+         "parameter to stop.");
   printf("  %2s %8s %s\n", "-s", "", "Run with SPH");
   printf("  %2s %8s %s\n", "-t", "{int}",
          "The number of threads to use on each MPI rank. Defaults to 1 if not "
          "specified.");
-  printf("  %2s %8s %s\n", "-v", "[12]",
-         "Increase the level of verbosity 1: MPI-rank 0 writes ");
+  printf("  %2s %8s %s\n", "-v", "[12]", "Increase the level of verbosity");
+  printf("  %2s %8s %s\n", "", "", "1: MPI-rank 0 writes ");
   printf("  %2s %8s %s\n", "", "", "2: All MPI-ranks write");
   printf("  %2s %8s %s\n", "-y", "{int}",
          "Time-step frequency at which task graphs are dumped");
@@ -143,9 +147,11 @@ int main(int argc, char *argv[]) {
   int nsteps = -2;
   int with_cosmology = 0;
   int with_external_gravity = 0;
+  int with_cooling = 0;
   int with_self_gravity = 0;
   int with_hydro = 0;
   int with_fp_exceptions = 0;
+  int with_drift_all = 0;
   int verbose = 0;
   int nr_threads = 1;
   char paramFileName[200] = "";
@@ -153,16 +159,22 @@ int main(int argc, char *argv[]) {
 
   /* Parse the parameters */
   int c;
-  while ((c = getopt(argc, argv, "acdef:gGhn:st:v:y:")) != -1) switch (c) {
+  while ((c = getopt(argc, argv, "acCdDef:gGhn:st:v:y:")) != -1) switch (c) {
       case 'a':
         with_aff = 1;
         break;
       case 'c':
         with_cosmology = 1;
         break;
+      case 'C':
+        with_cooling = 1;
+        break;
       case 'd':
         dry_run = 1;
         break;
+      case 'D':
+        with_drift_all = 1;
+        break;
       case 'e':
         with_fp_exceptions = 1;
         break;
@@ -253,7 +265,7 @@ int main(int argc, char *argv[]) {
 
   /* Do we choke on FP-exceptions ? */
   if (with_fp_exceptions) {
-    feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+    feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW);
     if (myrank == 0) message("Floating point exceptions will be reported.");
   }
 
@@ -266,18 +278,6 @@ int main(int argc, char *argv[]) {
     message("sizeof(struct cell)  is %4zi bytes.", sizeof(struct cell));
   }
 
-/* Temporary abort to handle absence of vectorized functions */
-#ifdef WITH_VECTORIZATION
-
-#ifdef MINIMAL_SPH
-  error(
-      "Vectorized version of Minimal SPH routines not implemented yet. "
-      "Reconfigure with --disable-vec and recompile or use DEFAULT_SPH.");
-#endif
-
-#endif
-  /* End temporary fix */
-
   /* How vocal are we ? */
   const int talking = (verbose == 1 && myrank == 0) || (verbose == 2);
 
@@ -330,15 +330,12 @@ int main(int argc, char *argv[]) {
   struct hydro_props hydro_properties;
   hydro_props_init(&hydro_properties, params);
 
-  /* Initialise the external potential properties */
-  struct external_potential potential;
-  if (with_external_gravity) potential_init(params, &us, &potential);
-  if (with_external_gravity && myrank == 0) potential_print(&potential);
-
   /* Read particles and space information from (GADGET) ICs */
   char ICfileName[200] = "";
   parser_get_param_string(params, "InitialConditions:file_name", ICfileName);
   if (myrank == 0) message("Reading ICs from file '%s'", ICfileName);
+  fflush(stdout);
+
   struct part *parts = NULL;
   struct gpart *gparts = NULL;
   size_t Ngas = 0, Ngpart = 0;
@@ -435,19 +432,32 @@ int main(int argc, char *argv[]) {
     message("nr of cells at depth %i is %i.", data[0], data[1]);
   }
 
+  /* Initialise the external potential properties */
+  struct external_potential potential;
+  if (with_external_gravity)
+    potential_init(params, &prog_const, &us, &potential);
+  if (with_external_gravity && myrank == 0) potential_print(&potential);
+
+  /* Initialise the cooling function properties */
+  struct cooling_function_data cooling_func;
+  if (with_cooling) cooling_init(params, &us, &prog_const, &cooling_func);
+  if (with_cooling && myrank == 0) cooling_print(&cooling_func);
+
   /* Construct the engine policy */
   int engine_policies = ENGINE_POLICY | engine_policy_steal;
+  if (with_drift_all) engine_policies |= engine_policy_drift_all;
   if (with_hydro) engine_policies |= engine_policy_hydro;
   if (with_self_gravity) engine_policies |= engine_policy_self_gravity;
   if (with_external_gravity) engine_policies |= engine_policy_external_gravity;
   if (with_cosmology) engine_policies |= engine_policy_cosmology;
+  if (with_cooling) engine_policies |= engine_policy_cooling;
 
   /* Initialize the engine with the space and policies. */
   if (myrank == 0) clocks_gettime(&tic);
   struct engine e;
   engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, with_aff,
               engine_policies, talking, &us, &prog_const, &hydro_properties,
-              &potential);
+              &potential, &cooling_func);
   if (myrank == 0) {
     clocks_gettime(&toc);
     message("engine_init took %.3f %s.", clocks_diff(&tic, &toc),
@@ -478,6 +488,8 @@ int main(int argc, char *argv[]) {
 #endif
     if (myrank == 0)
       message("Time integration ready to start. End of dry-run.");
+    engine_clean(&e);
+    free(params);
     return 0;
   }
 
@@ -518,7 +530,7 @@ int main(int argc, char *argv[]) {
 
       /* Make sure output file is empty, only on one rank. */
       char dumpfile[30];
-      snprintf(dumpfile, 30, "thread_info_MPI-step%d.dat", j);
+      snprintf(dumpfile, 30, "thread_info_MPI-step%d.dat", j + 1);
       FILE *file_thread;
       if (myrank == 0) {
         file_thread = fopen(dumpfile, "w");
@@ -544,7 +556,7 @@ int main(int argc, char *argv[]) {
             if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) {
               fprintf(
                   file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n",
-                  myrank, e.sched.tasks[l].last_rid, e.sched.tasks[l].type,
+                  myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type,
                   e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
                   e.sched.tasks[l].tic, e.sched.tasks[l].toc,
                   (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->count
@@ -570,7 +582,7 @@ int main(int argc, char *argv[]) {
 
 #else
       char dumpfile[30];
-      snprintf(dumpfile, 30, "thread_info-step%d.dat", j);
+      snprintf(dumpfile, 30, "thread_info-step%d.dat", j + 1);
       FILE *file_thread;
       file_thread = fopen(dumpfile, "w");
       /* Add some information to help with the plots */
@@ -580,7 +592,7 @@ int main(int argc, char *argv[]) {
         if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit)
           fprintf(
               file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n",
-              e.sched.tasks[l].last_rid, e.sched.tasks[l].type,
+              e.sched.tasks[l].rid, e.sched.tasks[l].type,
               e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
               e.sched.tasks[l].tic, e.sched.tasks[l].toc,
               (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count,
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index edb0885d621975850a09e4298b8e035ebb45a3cd..be2f512613edffd72eaf03689bccee0dd755726b 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -9,8 +9,8 @@ InternalUnitSystem:
 # Parameters for the task scheduling
 Scheduler:
   nr_queues:        0        # (Optional) The number of task queues to use. Use 0  to let the system decide.
-  cell_max_size:    8000000  # (Optional) Maximal number of interactions per task (this is the default value).
-  cell_sub_size:    8000000  # (Optional) Maximal number of interactions per sub-task  (this is the default value).
+  cell_max_size:    8000000  # (Optional) Maximal number of interactions per task if we force the split (this is the default value).
+  cell_sub_size:    64000000 # (Optional) Maximal number of interactions per sub-task  (this is the default value).
   cell_split_size:  400      # (Optional) Maximal number of particles per cell (this is the default value).
 
 # Parameters governing the time integration
@@ -25,6 +25,7 @@ Snapshots:
   basename:   output      # Common part of the name of output files
   time_first: 0.          # Time of the first output (in internal units)
   delta_time: 0.01        # Time difference between consecutive outputs (in internal units)
+  compression: 0          # (Optional) Set the level of compression of the HDF5 datasets [0-9]. 0 does no compression.
   UnitMass_in_cgs:     1  # (Optional) Unit system for the outputs (Grams)
   UnitLength_in_cgs:   1  # (Optional) Unit system for the outputs (Centimeters)
   UnitVelocity_in_cgs: 1  # (Optional) Unit system for the outputs (Centimeters per second)
@@ -62,19 +63,45 @@ DomainDecomposition:
   initial_grid_z:    10
   repartition_type:   b     # (Optional) The re-decomposition strategy ("n", "b", "v", "e" or "x").
  
-# Parameters related to external potentials
+# Parameters related to external potentials --------------------------------------------
   
 # Point mass external potentials
-PointMass:
-  position_x:      50.     # location of external point mass in internal units
+PointMassPotential:
+  position_x:      50.      # location of external point mass (internal units)
   position_y:      50.
   position_z:      50.
-  mass:            1e10     # mass of external point mass in internal units
-  timestep_mult:   0.03     # Pre-factor for the time-step condition
+  mass:            1e10     # mass of external point mass (internal units)
+  timestep_mult:   0.03     # Dimensionless pre-factor for the time-step condition
 
+# Isothermal potential parameters
 IsothermalPotential:
-  position_x:      100.     # Location of centre of isothermal potential in internal units
+  position_x:      100.     # Location of centre of isothermal potential (internal units)
   position_y:      100.
   position_z:      100.
-  vrot:            200.     # Rotation speed of isothermal potential in internal units
-  timestep_mult:   0.03     # Pre-factor for the time-step condition
+  vrot:            200.     # Rotation speed of isothermal potential (internal units)
+  timestep_mult:   0.03     # Dimensionless pre-factor for the time-step condition
+
+# Disk-patch potential parameters
+DiscPatchPotential:
+  surface_density: 10.      # Surface density of the disc (internal units)
+  scale_height:    100.     # Scale height of the disc (internal units)
+  z_disc:          200.     # Position of the disc along the z-axis (internal units)
+  timestep_mult:   0.03     # Dimensionless pre-factor for the time-step condition
+  growth_time:     5.       # (Optional) Time for the disc to grow to its final size (multiple of the dynamical time)
+
+# Parameters related to cooling function  ----------------------------------------------
+
+# Constant du/dt cooling function
+ConstCooling:
+  cooling_rate: 1.          # Cooling rate (du/dt) (internal units)
+  min_energy:   1.          # Minimal internal energy per unit mass (internal units)
+  cooling_tstep_mult: 1.    # Dimensionless pre-factor for the time-step condition
+
+# Constant lambda cooling function
+LambdaCooling:
+  lambda:                      2.0   # Cooling rate (in cgs units)
+  minimum_temperature:         1.0e4 # Minimal temperature (Kelvin)
+  mean_molecular_weight:       0.59  # Mean molecular weight
+  hydrogen_mass_abundance:     0.75  # Hydrogen mass abundance (dimensionless)
+  cooling_tstep_mult:          1.0   # Dimensionless pre-factor for the time-step condition
+
diff --git a/examples/plot_scaling_results.py b/examples/plot_scaling_results.py
index 66365bfad2edb38efa4b90c0c1602fd38bd750fd..5a76e9870bd3ec55807c7b79c475c62b14119e5c 100755
--- a/examples/plot_scaling_results.py
+++ b/examples/plot_scaling_results.py
@@ -17,6 +17,26 @@ import re
 import numpy as np
 import matplotlib.pyplot as plt
 
+params = {'axes.labelsize': 14,
+'axes.titlesize': 18,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 14,
+'ytick.labelsize': 14,
+'text.usetex': True,
+'figure.subplot.left'    : 0.055,
+'figure.subplot.right'   : 0.98  ,
+'figure.subplot.bottom'  : 0.05  ,
+'figure.subplot.top'     : 0.95  ,
+'figure.subplot.wspace'  : 0.14  ,
+'figure.subplot.hspace'  : 0.12  ,
+'lines.markersize' : 6,
+'lines.linewidth' : 3.,
+'text.latex.unicode': True
+}
+plt.rcParams.update(params)
+plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
 version = []
 branch = []
 revision = []
@@ -25,7 +45,10 @@ hydro_kernel = []
 hydro_neighbours = []
 hydro_eta = []
 threadList = []
-linestyle = ('ro-','bo-','go-','yo-','mo-')
+hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77',
+           '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466',
+           '#4477AA']
+linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8])
 #cmdLine = './swift_fixdt -s -t 16 cosmoVolume.yml'
 #platform = 'KNL'
 
@@ -45,6 +68,9 @@ elif len(sys.argv) == 5:
 elif len(sys.argv) == 6:
   inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5])
   numOfSeries = 5
+elif len(sys.argv) == 7:
+  inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6])
+  numOfSeries = 6
 
 # Get the names of the branch, Git revision, hydro scheme and hydro kernel
 def parse_header(inputFile):
@@ -53,7 +79,8 @@ def parse_header(inputFile):
     for line in f:
       if 'Branch:' in line:
         s = line.split()
-        branch.append(s[2])
+        line = s[2:]
+        branch.append(" ".join(line))
       elif 'Revision:' in line:
         s = line.split() 
         revision.append(s[2])
@@ -106,8 +133,8 @@ def parse_files():
     parse_header(file_list[0])
     
     version.append(branch[i] + " " + revision[i] + "\n" + hydro_scheme[i] + 
-                   "\n" + hydro_kernel[i] + r", $N_{neigh}$=" + hydro_neighbours[i] + 
-                   r", $\eta$=" + hydro_eta[i] + "\n")                  
+                   "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + 
+                   r", $\eta=%.3f$"%float(hydro_eta[i]))
     times.append([])
     totalTime.append([])
     speedUp.append([])
@@ -116,7 +143,7 @@ def parse_files():
     # Loop over all files for a given series and load the times
     for j in range(0,len(file_list)):
       times[i].append([])
-      times[i][j].append(np.loadtxt(file_list[j],usecols=(5,)))
+      times[i][j].append(np.loadtxt(file_list[j],usecols=(5,), skiprows=11))
       totalTime[i].append(np.sum(times[i][j]))
 
     serialTime.append(totalTime[i][0])
@@ -153,47 +180,59 @@ def print_results(times,totalTime,parallelEff,version):
 
 def plot_results(times,totalTime,speedUp,parallelEff):
   
-  fig, axarr = plt.subplots(2, 2,figsize=(15,15))
+  fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True)
   speedUpPlot = axarr[0, 0]
   parallelEffPlot = axarr[0, 1]
   totalTimePlot = axarr[1, 0]
   emptyPlot = axarr[1, 1]
   
   # Plot speed up
+  speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2')
   for i in range(0,numOfSeries):
     speedUpPlot.plot(threadList[i],speedUp[i],linestyle[i],label=version[i])
   
-  speedUpPlot.plot(threadList[i],threadList[i],color='k',linestyle='--')
-  speedUpPlot.set_ylabel("Speed Up")
-  speedUpPlot.set_xlabel("No. of Threads")
+  speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.)
+  speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.)
+  speedUpPlot.set_xlim([0.7,threadList[i][-1]+1])
+  speedUpPlot.set_ylim([0.7,threadList[i][-1]+1])
 
   # Plot parallel efficiency
+  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2')
+  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2')
+  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2')
+  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2')
   for i in range(0,numOfSeries):
     parallelEffPlot.plot(threadList[i],parallelEff[i],linestyle[i])
-  
+
   parallelEffPlot.set_xscale('log')
-  parallelEffPlot.set_ylabel("Parallel Efficiency")
-  parallelEffPlot.set_xlabel("No. of Threads")
+  parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.)
+  parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.)
   parallelEffPlot.set_ylim([0,1.1])
+  parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[i][-1]))+0.5)])
 
   # Plot time to solution     
   for i in range(0,numOfSeries):
+    pts = [1, 10**np.floor(np.log10(threadList[i][-1])+1)]
+    totalTimePlot.loglog(pts,totalTime[i][0]/pts, 'k--', lw=1., color='0.2')
     totalTimePlot.loglog(threadList[i],totalTime[i],linestyle[i],label=version[i])
   
   totalTimePlot.set_xscale('log')
-  totalTimePlot.set_xlabel("No. of Threads")
-  totalTimePlot.set_ylabel("Time to Solution (ms)")
+  totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.)
+  totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.)
+  totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[i][-1]))+0.5)])
+  totalTimePlot.set_ylim([10**np.floor(np.log10(np.min(totalTime)*0.6)), 1.2*10**np.floor(np.log10(np.max(totalTime) * 1.5)+1)])
   
-  totalTimePlot.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,prop={'size':14})
+  totalTimePlot.legend(bbox_to_anchor=(1.14, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False)
   emptyPlot.axis('off')
   
   for i, txt in enumerate(threadList[0]):
-    speedUpPlot.annotate(txt, (threadList[0][i],speedUp[0][i]))
-    parallelEffPlot.annotate(txt, (threadList[0][i],parallelEff[0][i]))
-    totalTimePlot.annotate(txt, (threadList[0][i],totalTime[0][i]))
+    if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2
+      speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0])
+      parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0])
+      totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0])
 
   #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(len(times[0][0][0]),cmdLine,platform))
-  fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps".format(len(times[0][0][0])))
+  fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$"%len(times[0][0][0]), fontsize=16)
 
   return
 
@@ -204,4 +243,5 @@ plot_results(times,totalTime,speedUp,parallelEff)
 
 print_results(times,totalTime,parallelEff,version)
 
+# And plot
 plt.show()
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 6e71f476a106937f43bd4bd5973af01f65218afe..fb8b2ce57a47b4d397284bba9960b098c1e3ce62 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -56,9 +56,8 @@ pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
-             "drift", "kick", "kick_fixdt", "send", "recv", "grav_gather_m", "grav_fft",
-             "grav_mm", "grav_up", "grav_external", "part_sort", "gpart_sort",
-             "split_cell", "rewait", "count"]
+             "kick", "kick_fixdt", "send", "recv", "grav_gather_m", "grav_fft",
+             "grav_mm", "grav_up", "grav_external", "count"]
 
 TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
@@ -68,7 +67,6 @@ TASKCOLOURS = {"none": "black",
                "sub_pair": "navy",
                "init": "indigo",
                "ghost": "cyan",
-               "drift": "maroon",
                "kick": "green",
                "kick_fixdt": "green",
                "send": "yellow",
@@ -78,20 +76,17 @@ TASKCOLOURS = {"none": "black",
                "grav_mm": "mediumturquoise",
                "grav_up": "mediumvioletred",
                "grav_external": "darkred",
-               "part_sort": "steelblue",
-               "gpart_sort": "teal" ,
-               "split_cell": "seagreen",
-               "rewait": "olive",
                "count": "powerblue"}
 
-SUBTYPES = ["none", "density", "force", "grav", "tend", "count"]
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"]
 
 SUBCOLOURS = {"none": "black",
               "density": "red",
+              "gradient": "powerblue",
               "force": "blue",
               "grav": "indigo",
-              "tend": "grey"
-              "count": "purple"}
+              "tend": "grey",
+              "count": "black"}
 
 #  Show docs if help is requested.
 if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index 7550899da2d4a34a5f73b192cbd7c348426786b7..398324cdc773d1dc4b7f26c58866c9df6469cc0b 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -62,9 +62,8 @@ pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
-             "drift", "kick", "kick_fixdt", "send", "recv", "grav_gather_m", "grav_fft",
-             "grav_mm", "grav_up", "grav_external", "part_sort", "gpart_sort",
-             "split_cell", "rewait", "count"]
+             "kick", "kick_fixdt", "send", "recv", "grav_gather_m", "grav_fft",
+             "grav_mm", "grav_up", "grav_external", "count"]
 
 TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
@@ -74,7 +73,6 @@ TASKCOLOURS = {"none": "black",
                "sub_pair": "navy",
                "init": "indigo",
                "ghost": "cyan",
-               "drift": "maroon",
                "kick": "green",
                "kick_fixdt": "green",
                "send": "yellow",
@@ -84,20 +82,17 @@ TASKCOLOURS = {"none": "black",
                "grav_mm": "mediumturquoise",
                "grav_up": "mediumvioletred",
                "grav_external": "darkred",
-               "part_sort": "steelblue",
-               "gpart_sort": "teal" ,
-               "split_cell": "seagreen",
-               "rewait": "olive",
                "count": "powerblue"}
 
-SUBTYPES = ["none", "density", "force", "grav", "tend", "count"]
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"]
 
 SUBCOLOURS = {"none": "black",
               "density": "red",
+              "gradient": "powerblue",
               "force": "blue",
               "grav": "indigo",
-              "tend": "grey"
-              "count": "purple"}
+              "tend": "grey",
+              "count": "black"}
 
 #  Show docs if help is requested.
 if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
diff --git a/src/Makefile.am b/src/Makefile.am
index 4d6a67c7569464486a017d8306c3e54730ebb3b2..f7cb52ba40a34269173ab8c5019d3011b0c34b61 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -42,18 +42,20 @@ endif
 include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
-    physical_constants.h physical_constants_cgs.h potentials.h version.h hydro_properties.h
+    physical_constants.h physical_constants_cgs.h potential.h version.h \
+    hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h
+
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
     units.c common_io.c single_io.c multipole.c version.c map.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
-    physical_constants.c potentials.c hydro_properties.c \
-    runner_doiact_fft.c
+    physical_constants.c potential.c hydro_properties.c \
+    runner_doiact_fft.c threadpool.c cooling.c
 
 # Include files for distribution, not installation.
-nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
+nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
 		 kernel_long_gravity.h vector.h runner_doiact.h runner_doiact_grav.h runner_doiact_fft.h \
                  units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
 		 dimension.h equation_of_state.h \
@@ -69,8 +71,14 @@ nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_h
                  hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h \
 		 hydro/Gizmo/hydro.h hydro/Gizmo/hydro_iact.h hydro/Gizmo/hydro_io.h \
                  hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \
-	         riemann.h \
-		 riemann/riemann_hllc.h riemann/riemann_trrs.h riemann/riemann_exact.h
+	         riemann/riemann_hllc.h riemann/riemann_trrs.h \
+		 riemann/riemann_exact.h riemann/riemann_vacuum.h \
+	         potential/none/potential.h potential/point_mass/potential.h \
+                 potential/isothermal/potential.h potential/disc_patch/potential.h \
+		 cooling/none/cooling.h cooling/none/cooling_struct.h \
+	         cooling/const_du/cooling.h cooling/const_du/cooling_struct.h \
+                 cooling/const_lambda/cooling.h cooling/const_lambda/cooling_struct.h 
+
 
 # Sources and flags for regular library
 libswiftsim_la_SOURCES = $(AM_SOURCES)
diff --git a/src/adiabatic_index.h b/src/adiabatic_index.h
index 74eedb163b3e5a1fc56debcec8a3259d52fe7999..a0c9ce09e3e004af07e8b208ef9f1af5f46c9e81 100644
--- a/src/adiabatic_index.h
+++ b/src/adiabatic_index.h
@@ -1,6 +1,7 @@
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (c) 2016   Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *                      Bert Vandenbroucke (bert.vandenbroucke@gmail.com).
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -34,6 +35,7 @@
 /* Local headers. */
 #include "const.h"
 #include "debug.h"
+#include "error.h"
 #include "inline.h"
 
 /* First define some constants */
@@ -42,18 +44,56 @@
 #define hydro_gamma 1.66666666666666667f
 #define hydro_gamma_minus_one 0.66666666666666667f
 #define hydro_one_over_gamma_minus_one 1.5f
+#define hydro_gamma_plus_one_over_two_gamma 0.8f
+#define hydro_gamma_minus_one_over_two_gamma 0.2f
+#define hydro_gamma_minus_one_over_gamma_plus_one 0.25f
+#define hydro_two_over_gamma_plus_one 0.75f
+#define hydro_two_over_gamma_minus_one 3.f
+#define hydro_gamma_minus_one_over_two 0.33333333333333333f
+#define hydro_two_gamma_over_gamma_minus_one 5.f
+#define hydro_one_over_gamma 0.6f
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+#define hydro_gamma 1.4f
+#define hydro_gamma_minus_one 0.4f
+#define hydro_one_over_gamma_minus_one 2.5f
+#define hydro_gamma_plus_one_over_two_gamma 0.857142857f
+#define hydro_gamma_minus_one_over_two_gamma 0.142857143f
+#define hydro_gamma_minus_one_over_gamma_plus_one 0.166666667f
+#define hydro_two_over_gamma_plus_one 0.833333333
+#define hydro_two_over_gamma_minus_one 5.f
+#define hydro_gamma_minus_one_over_two 0.2f
+#define hydro_two_gamma_over_gamma_minus_one 7.f
+#define hydro_one_over_gamma 0.714285714f
 
 #elif defined(HYDRO_GAMMA_4_3)
 
 #define hydro_gamma 1.33333333333333333f
 #define hydro_gamma_minus_one 0.33333333333333333f
 #define hydro_one_over_gamma_minus_one 3.f
+#define hydro_gamma_plus_one_over_two_gamma 0.875f
+#define hydro_gamma_minus_one_over_two_gamma 0.125f
+#define hydro_gamma_minus_one_over_gamma_plus_one 0.142857143f
+#define hydro_two_over_gamma_plus_one 0.857142857f
+#define hydro_two_over_gamma_minus_one 6.f
+#define hydro_gamma_minus_one_over_two 0.166666666666666666f
+#define hydro_two_gamma_over_gamma_minus_one 8.f
+#define hydro_one_over_gamma 0.75f
 
 #elif defined(HYDRO_GAMMA_2_1)
 
 #define hydro_gamma 2.f
 #define hydro_gamma_minus_one 1.f
 #define hydro_one_over_gamma_minus_one 1.f
+#define hydro_gamma_plus_one_over_two_gamma 0.75f
+#define hydro_gamma_minus_one_over_two_gamma 0.25f
+#define hydro_gamma_minus_one_over_gamma_plus_one 0.33333333333333333f
+#define hydro_two_over_gamma_plus_one 0.66666666666666666f
+#define hydro_two_over_gamma_minus_one 2.f
+#define hydro_gamma_minus_one_over_two 0.5f
+#define hydro_two_gamma_over_gamma_minus_one 4.f
+#define hydro_one_over_gamma 0.5f
 
 #else
 
@@ -73,6 +113,10 @@ __attribute__((always_inline)) INLINE static float pow_gamma(float x) {
   const float cbrt = cbrtf(x); /* x^(1/3) */
   return cbrt * cbrt * x;      /* x^(5/3) */
 
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, 1.4f); /* x^(7/5) */
+
 #elif defined(HYDRO_GAMMA_4_3)
 
   return cbrtf(x) * x; /* x^(4/3) */
@@ -103,6 +147,10 @@ __attribute__((always_inline)) INLINE static float pow_gamma_minus_one(
   const float cbrt = cbrtf(x); /* x^(1/3) */
   return cbrt * cbrt;          /* x^(2/3) */
 
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, 0.4f); /* x^(2/5) */
+
 #elif defined(HYDRO_GAMMA_4_3)
 
   return cbrtf(x); /* x^(1/3) */
@@ -133,6 +181,10 @@ __attribute__((always_inline)) INLINE static float pow_minus_gamma_minus_one(
   const float cbrt_inv = 1.f / cbrtf(x); /* x^(-1/3) */
   return cbrt_inv * cbrt_inv;            /* x^(-2/3) */
 
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, -0.4f); /* x^(-2/5) */
+
 #elif defined(HYDRO_GAMMA_4_3)
 
   return 1.f / cbrtf(x); /* x^(-1/3) */
@@ -150,24 +202,36 @@ __attribute__((always_inline)) INLINE static float pow_minus_gamma_minus_one(
 }
 
 /**
- * @brief Returns one over the argument to the power given by one over the
- * adiabatic index.
+ * @brief Returns one over the argument to the power given by the adiabatic
+ * index
  *
- * Computes \f$x^{\frac{1}{\gamma}}\f$.
+ * Computes \f$x^{-\gamma}\f$.
+ *
+ * @param x Argument
+ * @return One over the argument to the power given by the adiabatic index
  */
-__attribute__((always_inline)) INLINE static float pow_one_over_gamma(float x) {
+__attribute__((always_inline)) INLINE static float pow_minus_gamma(float x) {
 
 #if defined(HYDRO_GAMMA_5_3)
 
-  return powf(x, 0.6f); /* x^(3/5) */
+  const float cbrt_inv = 1.f / cbrtf(x);       /* x^(-1/3) */
+  const float cbrt_inv2 = cbrt_inv * cbrt_inv; /* x^(-2/3) */
+  return cbrt_inv * cbrt_inv2 * cbrt_inv2;     /* x^(-5/3) */
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, -1.4f); /* x^(-7/5) */
 
 #elif defined(HYDRO_GAMMA_4_3)
 
-  return powf(x, 0.75f); /* x^(3/4) */
+  const float cbrt_inv = 1.f / cbrtf(x);       /* x^(-1/3) */
+  const float cbrt_inv2 = cbrt_inv * cbrt_inv; /* x^(-2/3) */
+  return cbrt_inv2 * cbrt_inv2;                /* x^(-4/3) */
 
 #elif defined(HYDRO_GAMMA_2_1)
 
-  return sqrtf(x); /* x^(1/2) */
+  const float inv = 1.f / x;
+  return inv * inv;
 
 #else
 
@@ -178,25 +242,35 @@ __attribute__((always_inline)) INLINE static float pow_one_over_gamma(float x) {
 }
 
 /**
- * @brief Returns the argument to the power given by two over the adiabatic
- * index.
+ * @brief Return the argument to the power given by two divided by the adiabatic
+ * index minus one
+ *
+ * Computes \f$x^{\frac{2}{\gamma - 1}}\f$.
  *
- * Computes \f$x^{\frac{2}{\gamma}}\f$.
+ * @param x Argument
+ * @return Argument to the power two divided by the adiabatic index minus one
  */
-__attribute__((always_inline)) INLINE static float pow_two_over_gamma(float x) {
+__attribute__((always_inline)) INLINE static float pow_two_over_gamma_minus_one(
+    float x) {
 
 #if defined(HYDRO_GAMMA_5_3)
 
-  return powf(x, 1.2f); /* x^(6/5) */
+  return x * x * x; /* x^3 */
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+  const float x2 = x * x;
+  const float x3 = x2 * x;
+  return x2 * x3;
 
 #elif defined(HYDRO_GAMMA_4_3)
 
-  const float sqrt = sqrtf(x);
-  return sqrt * sqrt * sqrt;
+  const float x3 = x * x * x; /* x^3 */
+  return x3 * x3;             /* x^6 */
 
 #elif defined(HYDRO_GAMMA_2_1)
 
-  return x; /* x^(2/2) */
+  return x * x; /* x^2 */
 
 #else
 
@@ -207,26 +281,148 @@ __attribute__((always_inline)) INLINE static float pow_two_over_gamma(float x) {
 }
 
 /**
- * @brief Returns the argument to the power one minus two over the
- * adiabatic index.
+ * @brief Return the argument to the power given by two times the adiabatic
+ * index divided by the adiabatic index minus one
  *
- * Computes \f$x^{1 - \frac{2}{\gamma}}\f$.
+ * Computes \f$x^{\frac{2\gamma}{\gamma - 1}}\f$.
+ *
+ * @param x Argument
+ * @return Argument to the power two times the adiabatic index divided by the
+ * adiabatic index minus one
  */
-__attribute__((always_inline)) INLINE static float pow_one_minus_two_over_gamma(
-    float x) {
+__attribute__((always_inline)) INLINE static float
+pow_two_gamma_over_gamma_minus_one(float x) {
 
 #if defined(HYDRO_GAMMA_5_3)
 
-  return powf(x, -0.2f); /* x^(-1/5) */
+  const float x2 = x * x;
+  const float x3 = x2 * x;
+  return x2 * x3;
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+  const float x2 = x * x;
+  const float x4 = x2 * x2;
+  return x4 * x2 * x;
 
 #elif defined(HYDRO_GAMMA_4_3)
 
-  const float sqrt = sqrtf(x);
-  return 1.f / sqrt;
+  const float x2 = x * x;
+  const float x4 = x2 * x2;
+  return x4 * x4; /* x^8 */
 
 #elif defined(HYDRO_GAMMA_2_1)
 
-  return 1.f; /* x^0 */
+  const float x2 = x * x;
+  return x2 * x2; /* x^4 */
+
+#else
+
+  error("The adiabatic index is not defined !");
+  return 0.f;
+
+#endif
+}
+
+/**
+ * @brief Return the argument to the power given by the adiabatic index minus
+ * one  divided by two times the adiabatic index
+ *
+ * Computes \f$x^{\frac{\gamma - 1}{2\gamma}}\f$.
+ *
+ * @param x Argument
+ * @return Argument to the power the adiabatic index minus one divided by two
+ * times the adiabatic index
+ */
+__attribute__((always_inline)) INLINE static float
+pow_gamma_minus_one_over_two_gamma(float x) {
+
+#if defined(HYDRO_GAMMA_5_3)
+
+  return powf(x, 0.2f); /* x^0.2 */
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, hydro_gamma_minus_one_over_two_gamma);
+
+#elif defined(HYDRO_GAMMA_4_3)
+
+  return powf(x, 0.125f); /* x^0.125 */
+
+#elif defined(HYDRO_GAMMA_2_1)
+
+  return powf(x, 0.25f); /* x^0.25 */
+
+#else
+
+  error("The adiabatic index is not defined !");
+  return 0.f;
+
+#endif
+}
+
+/**
+ * @brief Return the inverse argument to the power given by the adiabatic index
+ * plus one divided by two times the adiabatic index
+ *
+ * Computes \f$x^{-\frac{\gamma + 1}{2\gamma}}\f$.
+ *
+ * @param x Argument
+ * @return Inverse argument to the power the adiabatic index plus one divided by
+ * two times the adiabatic index
+ */
+__attribute__((always_inline)) INLINE static float
+pow_minus_gamma_plus_one_over_two_gamma(float x) {
+
+#if defined(HYDRO_GAMMA_5_3)
+
+  return powf(x, -0.8f); /* x^-0.8 */
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, -hydro_gamma_plus_one_over_two_gamma);
+
+#elif defined(HYDRO_GAMMA_4_3)
+
+  return powf(x, -0.875f); /* x^-0.875 */
+
+#elif defined(HYDRO_GAMMA_2_1)
+
+  return powf(x, -0.75f); /* x^-0.75 */
+
+#else
+
+  error("The adiabatic index is not defined !");
+  return 0.f;
+
+#endif
+}
+
+/**
+ * @brief Return the argument to the power one over the adiabatic index
+ *
+ * Computes \f$x^{\frac{1}{\gamma}}\f$.
+ *
+ * @param x Argument
+ * @return Argument to the power one over the adiabatic index
+ */
+__attribute__((always_inline)) INLINE static float pow_one_over_gamma(float x) {
+
+#if defined(HYDRO_GAMMA_5_3)
+
+  return powf(x, 0.6f); /* x^(3/5) */
+
+#elif defined(HYDRO_GAMMA_7_5)
+
+  return powf(x, hydro_one_over_gamma);
+
+#elif defined(HYDRO_GAMMA_4_3)
+
+  return powf(x, 0.75f); /* x^(3/4) */
+
+#elif defined(HYDRO_GAMMA_2_1)
+
+  return sqrtf(x); /* x^(1/2) */
 
 #else
 
diff --git a/src/align.h b/src/align.h
new file mode 100644
index 0000000000000000000000000000000000000000..84e2909c0866c18f0f8378df9d0efc8d0f6545b5
--- /dev/null
+++ b/src/align.h
@@ -0,0 +1,27 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_ALIGN_H
+#define SWIFT_ALIGN_H
+
+/**
+ * @brief Defines alignment of structures
+ */
+#define SWIFT_STRUCT_ALIGN __attribute__((aligned(32)))
+
+#endif /* SWIFT_ALIGN_H */
diff --git a/src/atomic.h b/src/atomic.h
index 0b87a0f77e17bafc64a2a59b3c70bda782fc14d4..be24f96e5a9d2e955132f0d6d34bdfa58bc1649c 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -23,6 +23,7 @@
 #include "inline.h"
 
 #define atomic_add(v, i) __sync_fetch_and_add(v, i)
+#define atomic_or(v, i) __sync_fetch_and_or(v, i)
 #define atomic_inc(v) atomic_add(v, 1)
 #define atomic_dec(v) atomic_add(v, -1)
 #define atomic_cas(v, o, n) __sync_val_compare_and_swap(v, o, n)
diff --git a/src/cell.c b/src/cell.c
index 7df55ce04ca739da2de6e6061048ad1fe3998a1e..7ce6fb81a8fa6875884d3f5c840c36e5177cdf6b 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -63,7 +63,6 @@ int cell_next_tag = 0;
  *
  * @param c The #cell.
  */
-
 int cell_getsize(struct cell *c) {
 
   /* Number of cells in this subtree. */
@@ -87,9 +86,10 @@ int cell_getsize(struct cell *c) {
  *
  * @return The number of cells created.
  */
-
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
 
+#ifdef WITH_MPI
+
   /* Unpack the current pcell. */
   c->h_max = pc->h_max;
   c->ti_end_min = pc->ti_end_min;
@@ -130,6 +130,11 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
   /* Return the total number of unpacked cells. */
   c->pcell_size = count;
   return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
 }
 
 /**
@@ -140,7 +145,6 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
  *
  * @return The number of particles linked.
  */
-
 int cell_link_parts(struct cell *c, struct part *parts) {
 
   c->parts = parts;
@@ -166,7 +170,6 @@ int cell_link_parts(struct cell *c, struct part *parts) {
  *
  * @return The number of particles linked.
  */
-
 int cell_link_gparts(struct cell *c, struct gpart *gparts) {
 
   c->gparts = gparts;
@@ -193,9 +196,10 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) {
  *
  * @return The number of packed cells.
  */
-
 int cell_pack(struct cell *c, struct pcell *pc) {
 
+#ifdef WITH_MPI
+
   /* Start by packing the data of the current cell. */
   pc->h_max = c->h_max;
   pc->ti_end_min = c->ti_end_min;
@@ -216,10 +220,25 @@ int cell_pack(struct cell *c, struct pcell *pc) {
   /* Return the number of packed cells used. */
   c->pcell_size = count;
   return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
 }
 
+/**
+ * @brief Pack the time information of the given cell and all it's sub-cells.
+ *
+ * @param c The #cell.
+ * @param ti_ends (output) The time information we pack into
+ *
+ * @return The number of packed cells.
+ */
 int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
 
+#ifdef WITH_MPI
+
   /* Pack this cell's data. */
   ti_ends[0] = c->ti_end_min;
 
@@ -232,10 +251,25 @@ int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
 
   /* Return the number of packed values. */
   return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
 }
 
+/**
+ * @brief Unpack the time information of a given cell and its sub-cells.
+ *
+ * @param c The #cell
+ * @param ti_ends The time information to unpack
+ *
+ * @return The number of cells created.
+ */
 int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
 
+#ifdef WITH_MPI
+
   /* Unpack this cell's data. */
   c->ti_end_min = ti_ends[0];
 
@@ -248,14 +282,19 @@ int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
 
   /* Return the number of packed values. */
   return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
 }
 
 /**
- * @brief Lock a cell and hold its parents.
+ * @brief Lock a cell for access to its array of #part and hold its parents.
  *
  * @param c The #cell.
+ * @return 0 on success, 1 on failure
  */
-
 int cell_locktree(struct cell *c) {
 
   TIMER_TIC
@@ -314,6 +353,12 @@ int cell_locktree(struct cell *c) {
   }
 }
 
+/**
+ * @brief Lock a cell for access to its array of #gpart and hold its parents.
+ *
+ * @param c The #cell.
+ * @return 0 on success, 1 on failure
+ */
 int cell_glocktree(struct cell *c) {
 
   TIMER_TIC
@@ -373,11 +418,10 @@ int cell_glocktree(struct cell *c) {
 }
 
 /**
- * @brief Unlock a cell's parents.
+ * @brief Unlock a cell's parents for access to #part array.
  *
  * @param c The #cell.
  */
-
 void cell_unlocktree(struct cell *c) {
 
   TIMER_TIC
@@ -392,6 +436,11 @@ void cell_unlocktree(struct cell *c) {
   TIMER_TOC(timer_locktree);
 }
 
+/**
+ * @brief Unlock a cell's parents for access to #gpart array.
+ *
+ * @param c The #cell.
+ */
 void cell_gunlocktree(struct cell *c) {
 
   TIMER_TIC
@@ -413,7 +462,6 @@ void cell_gunlocktree(struct cell *c) {
  * @param parts_offset Offset of the cell parts array relative to the
  *        space's parts array, i.e. c->parts - s->parts.
  */
-
 void cell_split(struct cell *c, ptrdiff_t parts_offset) {
 
   int i, j;
@@ -532,7 +580,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) {
   }
 
   /* Re-link the gparts. */
-  part_relink_gparts(parts, count, parts_offset);
+  if (count > 0 && gcount > 0) part_relink_gparts(parts, count, parts_offset);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that _all_ the parts have been assigned to a cell. */
@@ -627,60 +675,13 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) {
   }
 
   /* Re-link the parts. */
-  part_relink_parts(gparts, gcount, parts - parts_offset);
-}
-
-/**
- * @brief Initialises all particles to a valid state even if the ICs were stupid
- *
- * @param c Cell to act upon
- * @param data Unused parameter
- */
-void cell_init_parts(struct cell *c, void *data) {
-
-  struct part *restrict p = c->parts;
-  struct xpart *restrict xp = c->xparts;
-  const size_t count = c->count;
-
-  for (size_t i = 0; i < count; ++i) {
-    p[i].ti_begin = 0;
-    p[i].ti_end = 0;
-    xp[i].v_full[0] = p[i].v[0];
-    xp[i].v_full[1] = p[i].v[1];
-    xp[i].v_full[2] = p[i].v[2];
-    hydro_first_init_part(&p[i], &xp[i]);
-    hydro_init_part(&p[i]);
-    hydro_reset_acceleration(&p[i]);
-  }
-  c->ti_end_min = 0;
-  c->ti_end_max = 0;
-}
-
-/**
- * @brief Initialises all g-particles to a valid state even if the ICs were
- *stupid
- *
- * @param c Cell to act upon
- * @param data Unused parameter
- */
-void cell_init_gparts(struct cell *c, void *data) {
-
-  struct gpart *restrict gp = c->gparts;
-  const size_t gcount = c->gcount;
-
-  for (size_t i = 0; i < gcount; ++i) {
-    gp[i].ti_begin = 0;
-    gp[i].ti_end = 0;
-    gravity_first_init_gpart(&gp[i]);
-    gravity_init_gpart(&gp[i]);
-  }
-  c->ti_end_min = 0;
-  c->ti_end_max = 0;
+  if (count > 0 && gcount > 0)
+    part_relink_parts(gparts, gcount, parts - parts_offset);
 }
 
 /**
  * @brief Converts hydro quantities to a valid state after the initial density
- *calculation
+ * calculation
  *
  * @param c Cell to act upon
  * @param data Unused parameter
@@ -704,12 +705,14 @@ void cell_clean_links(struct cell *c, void *data) {
   c->density = NULL;
   c->nr_density = 0;
 
+  c->gradient = NULL;
+  c->nr_gradient = 0;
+
   c->force = NULL;
   c->nr_force = 0;
 }
 
 /**
-<<<<<<< HEAD
  * @brief Checks whether the cells are direct neighbours ot not. Both cells have
  * to be of the same size
  *
@@ -794,8 +797,10 @@ void cell_check_multipole(struct cell *c, void *data) {
   }
 }
 
-/*
- * @brief Frees up the memory allocated for this #cell
+/**
+ * @brief Frees up the memory allocated for this #cell.
+ *
+ * @param c The #cell.
  */
 void cell_clean(struct cell *c) {
 
@@ -805,3 +810,32 @@ void cell_clean(struct cell *c) {
   for (int k = 0; k < 8; k++)
     if (c->progeny[k]) cell_clean(c->progeny[k]);
 }
+
+/**
+ * @brief Checks whether a given cell needs drifting or not.
+ *
+ * @param c the #cell.
+ * @param ti_current The current time on the integer time-line.
+ *
+ * @return 1 If the cell needs drifting, 0 otherwise.
+ */
+int cell_is_drift_needed(struct cell *c, int ti_current) {
+
+  /* Do we have at least one active particle in the cell ?*/
+  if (c->ti_end_min == ti_current) return 1;
+
+  /* Loop over the pair tasks that involve this cell */
+  for (struct link *l = c->density; l != NULL; l = l->next) {
+
+    if (l->t->type != task_type_pair && l->t->type != task_type_sub_pair)
+      continue;
+
+    /* Does the other cell in the pair have an active particle ? */
+    if ((l->t->ci == c && l->t->cj->ti_end_min == ti_current) ||
+        (l->t->cj == c && l->t->ci->ti_end_min == ti_current))
+      return 1;
+  }
+
+  /* No neighbouring cell has active particles. Drift not necessary */
+  return 0;
+}
diff --git a/src/cell.h b/src/cell.h
index 150718eeb4bd3857e37d517718fe53661033a330..b78cc0a8f842770f60777e3986616a175d2f33ca 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -23,10 +23,14 @@
 #ifndef SWIFT_CELL_H
 #define SWIFT_CELL_H
 
+/* Config parameters. */
+#include "../config.h"
+
 /* Includes. */
 #include <stddef.h>
 
 /* Local includes. */
+#include "align.h"
 #include "lock.h"
 #include "multipole.h"
 #include "part.h"
@@ -41,9 +45,21 @@ struct space;
  * The maximum was lowered by a further factor of 2 to be on the safe side.*/
 #define cell_max_tag (1 << 29)
 
+#define cell_align 128
+
 /* Global variables. */
 extern int cell_next_tag;
 
+/* Mini struct to link cells to tasks. Used as a linked list. */
+struct link {
+
+  /* The task pointer. */
+  struct task *t;
+
+  /* The next pointer. */
+  struct link *next;
+};
+
 /* Packed cell. */
 struct pcell {
 
@@ -59,7 +75,8 @@ struct pcell {
 
   /* Relative indices of the cell's progeny. */
   int progeny[8];
-};
+
+} SWIFT_STRUCT_ALIGN;
 
 /* Structure to store the data of a single cell. */
 struct cell {
@@ -70,19 +87,22 @@ struct cell {
   /* The cell dimensions. */
   double width[3];
 
-  /* Max radii in this cell. */
+  /* Max smoothing length in this cell. */
   double h_max;
 
   /* Minimum and maximum end of time step in this cell. */
   int ti_end_min, ti_end_max;
 
+  /* Last time the cell's content was drifted forward in time. */
+  int ti_old;
+
   /* Minimum dimension, i.e. smallest edge of this cell. */
   float dmin;
 
   /* Maximum slack allowed for particle movement. */
   float slack;
 
-  /* Maximum particle movement in this cell. */
+  /* Maximum particle movement in this cell since last construction. */
   float dx_max;
 
   /* The depth of this cell in the tree. */
@@ -101,8 +121,8 @@ struct cell {
   struct gpart *gparts;
 
   /* Pointers for the sorted indices. */
-  struct entry *sort, *gsort;
-  unsigned int sorted, gsorted;
+  struct entry *sort;
+  unsigned int sorted;
 
   /* Pointers to the next level of cells. */
   struct cell *progeny[8];
@@ -110,25 +130,34 @@ struct cell {
   /* Parent cell. */
   struct cell *parent;
 
-  /* Super cell, i.e. the highest-level supercell that has interactions. */
+  /* Super cell, i.e. the highest-level supercell that has hydro interactions.
+   */
   struct cell *super;
 
+  /* Super cell, i.e. the highest-level supercell that has gravity interactions.
+   */
+  struct cell *gsuper;
+
   /* The task computing this cell's sorts. */
-  struct task *sorts, *gsorts;
-  int sortsize, gsortsize;
+  struct task *sorts;
+  int sortsize;
 
   /* The tasks computing this cell's density. */
-  struct link *density, *force, *grav;
-  int nr_density, nr_force, nr_grav;
+  struct link *density, *gradient, *force, *grav;
+  int nr_density, nr_gradient, nr_force, nr_grav;
 
   /* The hierarchical tasks. */
-  struct task *ghost, *init, *drift, *kick;
+  struct task *extra_ghost, *ghost, *init, *kick;
+
+#ifdef WITH_MPI
 
   /* Task receiving data. */
-  struct task *recv_xv, *recv_rho, *recv_ti;
+  struct task *recv_xv, *recv_rho, *recv_gradient, *recv_ti;
 
   /* Task send data. */
-  struct link *send_xv, *send_rho, *send_ti;
+  struct link *send_xv, *send_rho, *send_gradient, *send_ti;
+
+#endif
 
   /* Tasks for gravity tree. */
   struct task *grav_up, *grav_down;
@@ -136,6 +165,9 @@ struct cell {
   /* Task for external gravity */
   struct task *grav_external;
 
+  /* Task for cooling */
+  struct task *cooling;
+
   /* Number of tasks that are associated with this cell. */
   int nr_tasks;
 
@@ -152,7 +184,7 @@ struct cell {
   double mom[3], ang_mom[3];
 
   /* Mass, potential, internal  and kinetic energy of particles in this cell. */
-  double mass, e_pot, e_int, e_kin, entropy;
+  double mass, e_pot, e_int, e_kin, e_rad, entropy;
 
   /* Number of particles updated in this cell. */
   int updated, g_updated;
@@ -160,9 +192,14 @@ struct cell {
   /* Linking pointer for "memory management". */
   struct cell *next;
 
+  /* This cell's multipole. */
+  struct multipole multipole;
+
   /* ID of the node this cell lives on. */
   int nodeID;
 
+#ifdef WITH_MPI
+
   /* Bit mask of the proxies this cell is registered with. */
   unsigned long long int sendto;
 
@@ -171,10 +208,9 @@ struct cell {
   int pcell_size;
   int tag;
 
-  /* This cell's multipole. */
-  struct multipole multipole;
+#endif
 
-} __attribute__((aligned(64)));
+} SWIFT_STRUCT_ALIGN;
 
 /* Convert cell location to ID. */
 #define cell_getid(cdim, i, j, k) \
@@ -193,13 +229,12 @@ int cell_unpack_ti_ends(struct cell *c, int *ti_ends);
 int cell_getsize(struct cell *c);
 int cell_link_parts(struct cell *c, struct part *parts);
 int cell_link_gparts(struct cell *c, struct gpart *gparts);
-void cell_init_parts(struct cell *c, void *data);
-void cell_init_gparts(struct cell *c, void *data);
 void cell_convert_hydro(struct cell *c, void *data);
 void cell_clean_links(struct cell *c, void *data);
 int cell_are_neighbours(const struct cell *restrict ci,
                         const struct cell *restrict cj);
 void cell_check_multipole(struct cell *c, void *data);
 void cell_clean(struct cell *c);
+int cell_is_drift_needed(struct cell *c, int ti_current);
 
 #endif /* SWIFT_CELL_H */
diff --git a/src/common_io.c b/src/common_io.c
index 3c001d9da106a46ef5033c8cdec9346d68c54ecd..37e2837fbaeee87916ddea9264439c824149479c 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -42,6 +42,7 @@
 /* Local includes. */
 #include "const.h"
 #include "error.h"
+#include "hydro.h"
 #include "kernel_hydro.h"
 #include "part.h"
 #include "units.h"
@@ -515,13 +516,13 @@ void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N,
   fprintf(xmfFile, "\n<Grid Name=\"%s\" GridType=\"Uniform\">\n",
           particle_type_names[ptype]);
   fprintf(xmfFile,
-          "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%zi\"/>\n", N);
+          "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%zu\"/>\n", N);
   fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n");
   fprintf(xmfFile,
-          "<DataItem Dimensions=\"%zi 3\" NumberType=\"Double\" "
+          "<DataItem Dimensions=\"%zu 3\" NumberType=\"Double\" "
           "Precision=\"8\" "
           "Format=\"HDF\">%s:/PartType%d/Coordinates</DataItem>\n",
-          N, hdfFileName, ptype);
+          N, hdfFileName, (int)ptype);
   fprintf(xmfFile,
           "</Geometry>\n <!-- Done geometry for %s, start of particle fields "
           "list -->\n",
@@ -555,12 +556,12 @@ void writeXMFline(FILE* xmfFile, const char* fileName,
           name, dim == 1 ? "Scalar" : "Vector");
   if (dim == 1)
     fprintf(xmfFile,
-            "<DataItem Dimensions=\"%zi\" NumberType=\"Double\" "
+            "<DataItem Dimensions=\"%zu\" NumberType=\"Double\" "
             "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
             N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name);
   else
     fprintf(xmfFile,
-            "<DataItem Dimensions=\"%zi %d\" NumberType=\"Double\" "
+            "<DataItem Dimensions=\"%zu %d\" NumberType=\"Double\" "
             "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
             N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name);
   fprintf(xmfFile, "</Attribute>\n");
@@ -582,7 +583,7 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) {
   for (size_t i = 0; i < Ndm; ++i) {
     /* 0 or negative ids are not allowed */
     if (gparts[i].id_or_neg_offset <= 0)
-      error("0 or negative ID for DM particle %zd: ID=%lld", i,
+      error("0 or negative ID for DM particle %zu: ID=%lld", i,
             gparts[i].id_or_neg_offset);
   }
 }
@@ -614,7 +615,7 @@ void duplicate_hydro_gparts(struct part* const parts,
     gparts[i + Ndm].v_full[1] = parts[i].v[1];
     gparts[i + Ndm].v_full[2] = parts[i].v[2];
 
-    gparts[i + Ndm].mass = parts[i].mass;
+    gparts[i + Ndm].mass = hydro_get_mass(&parts[i]);
 
     /* Link the particles */
     gparts[i + Ndm].id_or_neg_offset = -i;
@@ -650,7 +651,7 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
 
   /* Check that everything is fine */
   if (count != Ndm)
-    error("Collected the wrong number of dm particles (%zd vs. %zd expected)",
+    error("Collected the wrong number of dm particles (%zu vs. %zu expected)",
           count, Ndm);
 }
 
diff --git a/src/const.h b/src/const.h
index 26f6231811acf1e8e905ef568693c71a7ba32dd3..2fc1068d415b40d4bc6f6acd2fe04ee6e0f34587 100644
--- a/src/const.h
+++ b/src/const.h
@@ -36,6 +36,9 @@
 /* Time integration constants. */
 #define const_max_u_change 0.1f
 
+/* Thermal energy per unit mass used as a constant for the isothermal EoS */
+#define const_isothermal_internal_energy 20.2615290634f
+
 /* Dimensionality of the problem */
 //#define HYDRO_DIMENSION_3D
 #define HYDRO_DIMENSION_2D
@@ -43,6 +46,7 @@
 
 /* Hydrodynamical adiabatic index. */
 #define HYDRO_GAMMA_5_3
+//#define HYDRO_GAMMA_7_5
 //#define HYDRO_GAMMA_4_3
 //#define HYDRO_GAMMA_2_1
 
@@ -63,6 +67,22 @@
 //#define GADGET2_SPH
 #define HOPKINS_PE_SPH
 //#define DEFAULT_SPH
+//#define GIZMO_SPH
+
+/* Riemann solver to use (GIZMO_SPH only) */
+#define RIEMANN_SOLVER_EXACT
+//#define RIEMANN_SOLVER_TRRS
+//#define RIEMANN_SOLVER_HLLC
+
+/* Type of gradients to use (GIZMO_SPH only) */
+/* If no option is chosen, no gradients are used (first order scheme) */
+//#define GRADIENTS_SPH
+#define GRADIENTS_GIZMO
+
+/* Types of slope limiter to use (GIZMO_SPH only) */
+/* Different slope limiters can be combined */
+#define SLOPE_LIMITER_PER_FACE
+#define SLOPE_LIMITER_CELL_WIDE
 
 /* Self gravity stuff. */
 #define const_gravity_multipole_order 2
@@ -71,8 +91,16 @@
 #define const_gravity_eta 0.025f
 
 /* External gravity properties */
-#define EXTERNAL_POTENTIAL_POINTMASS
+#define EXTERNAL_POTENTIAL_NONE
+//#define EXTERNAL_POTENTIAL_POINTMASS
 //#define EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
+//#define EXTERNAL_POTENTIAL_DISC_PATCH
+
+/* Cooling properties */
+#define COOLING_NONE
+//#define COOLING_CONST_DU
+//#define COOLING_CONST_LAMBDA
+//#define COOLING_GRACKLE
 
 /* Are we debugging ? */
 //#define SWIFT_DEBUG_CHECKS
diff --git a/src/cooling.c b/src/cooling.c
new file mode 100644
index 0000000000000000000000000000000000000000..e0208dbb591445d0877ef1e703d6e8cf349ddfd6
--- /dev/null
+++ b/src/cooling.c
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "cooling.h"
+
+/**
+ * @brief Initialises the cooling properties.
+ *
+ * Calls cooling_init_backend for the chosen cooling function.
+ *
+ * @param parameter_file The parsed parameter file.
+ * @param us The current internal system of units.
+ * @param phys_const The physical constants in internal units.
+ * @param cooling The cooling properties to initialize
+ */
+void cooling_init(const struct swift_params* parameter_file,
+                  const struct UnitSystem* us,
+                  const struct phys_const* phys_const,
+                  struct cooling_function_data* cooling) {
+
+  cooling_init_backend(parameter_file, us, phys_const, cooling);
+}
+
+/**
+ * @brief Prints the properties of the cooling model to stdout.
+ *
+ * Calls cooling_print_backend for the chosen cooling function.
+ *
+ * @param cooling The properties of the cooling function.
+ */
+void cooling_print(const struct cooling_function_data* cooling) {
+
+  cooling_print_backend(cooling);
+}
diff --git a/src/cooling.h b/src/cooling.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b326f6dc4fdf796dd1587e73e9b591f0f500ccb
--- /dev/null
+++ b/src/cooling.h
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_COOLING_H
+#define SWIFT_COOLING_H
+
+/**
+ * @file src/cooling.h
+ * @brief Branches between the different cooling functions.
+ */
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "const.h"
+
+/* Import the right cooling definition */
+#if defined(COOLING_NONE)
+#include "./cooling/none/cooling.h"
+#elif defined(COOLING_CONST_DU)
+#include "./cooling/const_du/cooling.h"
+#elif defined(COOLING_CONST_LAMBDA)
+#include "./cooling/const_lambda/cooling.h"
+#elif defined(COOLING_GRACKLE)
+#include "./cooling/grackle/cooling.h"
+#else
+#error "Invalid choice of cooling function."
+#endif
+
+/* Common functions */
+void cooling_init(const struct swift_params* parameter_file,
+                  const struct UnitSystem* us,
+                  const struct phys_const* phys_const,
+                  struct cooling_function_data* cooling);
+
+void cooling_print(const struct cooling_function_data* cooling);
+
+#endif /* SWIFT_COOLING_H */
diff --git a/src/cooling/const_du/cooling.h b/src/cooling/const_du/cooling.h
new file mode 100644
index 0000000000000000000000000000000000000000..b25980ff2269ca9ea176edcc2a3c771647819133
--- /dev/null
+++ b/src/cooling/const_du/cooling.h
@@ -0,0 +1,176 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    Richard Bower (r.g.bower@durham.ac.uk)
+ *                    Stefan Arridge  (stefan.arridge@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_COOLING_CONST_DU_H
+#define SWIFT_COOLING_CONST_DU_H
+
+/**
+ * @file src/cooling/const_du/cooling.h
+ * @brief Routines related to the "constant cooling" cooling function.
+ *
+ * This is the simplest possible cooling function. A constant cooling rate with
+ * a minimal energy floor is applied. Should be used as a template for more
+ * realistic functions.
+ */
+
+/* Some standard headers. */
+#include <math.h>
+
+/* Local includes. */
+#include "const.h"
+#include "cooling_struct.h"
+#include "error.h"
+#include "hydro.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief Apply the cooling function to a particle.
+ *
+ * In this simple example we just apply the const cooling rate
+ * and check that we don't go below the given floor.
+ *
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param cooling The #cooling_function_data used in the run.
+ * @param p Pointer to the particle data.
+ * @param xp Pointer to the extended particle data.
+ * @param dt The time-step of this particle.
+ */
+__attribute__((always_inline)) INLINE static void cooling_cool_part(
+    const struct phys_const* restrict phys_const,
+    const struct UnitSystem* restrict us,
+    const struct cooling_function_data* restrict cooling,
+    struct part* restrict p, struct xpart* restrict xp, float dt) {
+
+  /* Get current internal energy (dt=0) */
+  const float u_old = hydro_get_internal_energy(p, 0.f);
+
+  /* Get cooling function properties */
+  const float du_dt = -cooling->cooling_rate;
+  const float u_floor = cooling->min_energy;
+
+  /* Constant cooling with a minimal floor */
+  float u_new;
+  if (u_old - du_dt * dt > u_floor) {
+    u_new = u_old + du_dt * dt;
+  } else {
+    u_new = u_floor;
+  }
+
+  /* Update the internal energy */
+  hydro_set_internal_energy(p, u_new);
+
+  /* Store the radiated energy */
+  xp->cooling_data.radiated_energy += hydro_get_mass(p) * (u_old - u_new);
+}
+
+/**
+ * @brief Computes the cooling time-step.
+ *
+ * In this simple example, we return \f$ \alpha \frac{u}{du/dt} \f$.
+ * This is used to compute the time-step of the particle. Cooling functions
+ * that are solved implicitly can simply return FLT_MAX here.
+ *
+ * @param cooling The #cooling_function_data used in the run.
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param p Pointer to the particle data.
+ */
+__attribute__((always_inline)) INLINE static float cooling_timestep(
+    const struct cooling_function_data* restrict cooling,
+    const struct phys_const* restrict phys_const,
+    const struct UnitSystem* restrict us, const struct part* restrict p) {
+
+  const float cooling_rate = cooling->cooling_rate;
+  const float internal_energy = hydro_get_internal_energy(p, 0);
+  return cooling->cooling_tstep_mult * internal_energy / cooling_rate;
+}
+
+/**
+ * @brief Sets the cooling properties of the (x-)particles to a valid start
+ * state.
+ *
+ * In this case, we set the total radiated energy to 0. Note that the particle
+ * structure is just passed in for cases where information needs to be read
+ * from there.
+ *
+ * @param p Pointer to the particle data.
+ * @param xp Pointer to the extended particle data.
+ */
+__attribute__((always_inline)) INLINE static void cooling_init_part(
+    const struct part* restrict p, struct xpart* restrict xp) {
+
+  xp->cooling_data.radiated_energy = 0.f;
+}
+
+/**
+ * @brief Returns the total radiated energy by this particle.
+ *
+ * In this simple example we jsut return the quantity accumulated in the
+ * #cooling_xpart_data of this particle.
+ *
+ * @param xp The extended particle data
+ */
+__attribute__((always_inline)) INLINE static float cooling_get_radiated_energy(
+    const struct xpart* restrict xp) {
+
+  return xp->cooling_data.radiated_energy;
+}
+
+/**
+ * @brief Initialises the cooling function properties from the parameter file
+ *
+ * In this example, we just read in the values from the YAML file without
+ * doing any conversions or multiplying any constants in.
+ *
+ * @param parameter_file The parsed parameter file.
+ * @param us The current internal system of units.
+ * @param phys_const The physical constants in internal units.
+ * @param cooling The cooling properties to initialize
+ */
+static INLINE void cooling_init_backend(
+    const struct swift_params* parameter_file, const struct UnitSystem* us,
+    const struct phys_const* phys_const,
+    struct cooling_function_data* cooling) {
+
+  cooling->cooling_rate =
+      parser_get_param_double(parameter_file, "ConstCooling:cooling_rate");
+  cooling->min_energy =
+      parser_get_param_double(parameter_file, "ConstCooling:min_energy");
+  cooling->cooling_tstep_mult = parser_get_param_double(
+      parameter_file, "ConstCooling:cooling_tstep_mult");
+}
+
+/**
+ * @brief Prints the properties of the cooling model to stdout.
+ *
+ * @param cooling The properties of the cooling function.
+ */
+static INLINE void cooling_print_backend(
+    const struct cooling_function_data* cooling) {
+
+  message("Cooling function is 'Constant cooling' with rate %f and floor %f.",
+          cooling->cooling_rate, cooling->min_energy);
+}
+
+#endif /* SWIFT_COOLING_CONST_DU_H */
diff --git a/src/cooling/const_du/cooling_struct.h b/src/cooling/const_du/cooling_struct.h
new file mode 100644
index 0000000000000000000000000000000000000000..cc00b001cf6b576266de02dac885f87d089bd8e4
--- /dev/null
+++ b/src/cooling/const_du/cooling_struct.h
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    Richard Bower (r.g.bower@durham.ac.uk)
+ *                    Stefan Arridge  (stefan.arridge@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_COOLING_STRUCT_CONST_DU_H
+#define SWIFT_COOLING_STRUCT_CONST_DU_H
+
+/**
+ * @file src/cooling/const_du/cooling_struct.h
+ * @brief Structure related to the "constant cooling" cooling function.
+ *
+ * This is the simplest possible cooling function. A constant cooling rate with
+ * a minimal energy floor is applied. Should be used as a template for more
+ * realistic functions.
+ */
+
+/**
+ * @brief Properties of the cooling function.
+ */
+struct cooling_function_data {
+
+  /*! Cooling rate in internal units. du_dt = -cooling_rate */
+  float cooling_rate;
+
+  /*! Minimally allowed internal energy of the particles */
+  float min_energy;
+
+  /*! Constant multiplication factor for time-step criterion */
+  float cooling_tstep_mult;
+};
+
+/**
+ * @brief Properties of the cooling stored in the particle data.
+ *
+ * This is used to carry properties such as the total amount of
+ * energy radiated away.
+ */
+struct cooling_xpart_data {
+
+  /*! Energy radiated away by this particle since the start of the run */
+  float radiated_energy;
+};
+
+#endif /* SWIFT_COOLING_STRUCT_CONST_DU_H */
diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h
new file mode 100644
index 0000000000000000000000000000000000000000..11cf2cae51f1ab2646d1391d2164c399c77a7bba
--- /dev/null
+++ b/src/cooling/const_lambda/cooling.h
@@ -0,0 +1,220 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    Richard Bower (r.g.bower@durham.ac.uk)
+ *                    Stefan Arridge  (stefan.arridge@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_COOLING_CONST_LAMBDA_H
+#define SWIFT_COOLING_CONST_LAMBDA_H
+
+/* Some standard headers. */
+#include <math.h>
+
+/* Local includes. */
+#include "const.h"
+#include "error.h"
+#include "hydro.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief Calculates du/dt in code units for a particle.
+ *
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param cooling The #cooling_function_data used in the run.
+ * @param p Pointer to the particle data..
+ */
+__attribute__((always_inline)) INLINE static float cooling_rate(
+    const struct phys_const* const phys_const, const struct UnitSystem* us,
+    const struct cooling_function_data* cooling, const struct part* p) {
+
+  /* Get particle properties */
+  /* Density */
+  const float rho = hydro_get_density(p);
+  /* Get cooling function properties */
+  const float X_H = cooling->hydrogen_mass_abundance;
+  /* lambda should always be set in cgs units */
+  const float lambda_cgs = cooling->lambda;
+
+  /*convert from internal code units to cgs*/
+  const float rho_cgs =
+      rho * units_cgs_conversion_factor(us, UNIT_CONV_DENSITY);
+  const float m_p_cgs = phys_const->const_proton_mass *
+                        units_cgs_conversion_factor(us, UNIT_CONV_MASS);
+  const float n_H_cgs = X_H * rho_cgs / m_p_cgs;
+
+  /* Calculate du_dt */
+  const float du_dt_cgs = -lambda_cgs * n_H_cgs * n_H_cgs / rho_cgs;
+
+  /* Convert du/dt back to internal code units */
+  const float du_dt =
+      du_dt_cgs * units_cgs_conversion_factor(us, UNIT_CONV_TIME) /
+      units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS);
+
+  return du_dt;
+}
+
+/**
+ * @brief Apply the cooling function to a particle.
+ *
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param cooling The #cooling_function_data used in the run.
+ * @param p Pointer to the particle data.
+ * @param dt The time-step of this particle.
+ */
+__attribute__((always_inline)) INLINE static void cooling_cool_part(
+    const struct phys_const* restrict phys_const,
+    const struct UnitSystem* restrict us,
+    const struct cooling_function_data* restrict cooling,
+    struct part* restrict p, struct xpart* restrict xp, float dt) {
+
+  /* Get current internal energy (dt=0) */
+  const float u_old = hydro_get_internal_energy(p, 0.f);
+
+  /* Internal energy floor */
+  const float u_floor = cooling->min_energy;
+
+  /* Calculate du_dt */
+  const float du_dt = cooling_rate(phys_const, us, cooling, p);
+
+  /* Intergrate cooling equation, but enforce energy floor */
+  float u_new;
+  if (u_old + du_dt * dt > u_floor) {
+    u_new = u_old + du_dt * dt;
+  } else {
+    u_new = u_floor;
+  }
+
+  /* Update the internal energy */
+  hydro_set_internal_energy(p, u_new);
+
+  /* if (-(u_new_test - u_old) / u_old > 1.0e-6) */
+  /*   error( */
+  /*       "Particle has not successfully cooled: u_old = %g , du_dt = %g , dt =
+   * " */
+  /*       "%g, du_dt*dt = %g, u_old + du_dt*dt = %g, u_new = %g\n", */
+  /*       u_old, du_dt, dt, du_dt * dt, u_new, u_new_test); */
+
+  /* Store the radiated energy */
+  xp->cooling_data.radiated_energy += hydro_get_mass(p) * (u_old - u_new);
+}
+
+/**
+ * @brief Computes the time-step due to cooling
+ *
+ * @param cooling The #cooling_function_data used in the run.
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param p Pointer to the particle data.
+ */
+__attribute__((always_inline)) INLINE static float cooling_timestep(
+    const struct cooling_function_data* restrict cooling,
+    const struct phys_const* restrict phys_const,
+    const struct UnitSystem* restrict us, const struct part* restrict p) {
+
+  /* Get du_dt */
+  const float du_dt = cooling_rate(phys_const, us, cooling, p);
+
+  /* Get current internal energy (dt=0) */
+  const float u = hydro_get_internal_energy(p, 0.f);
+
+  return u / du_dt;
+}
+
+/**
+ * @brief Sets the cooling properties of the (x-)particles to a valid start
+ * state.
+ *
+ * @param p Pointer to the particle data.
+ * @param xp Pointer to the extended particle data.
+ */
+__attribute__((always_inline)) INLINE static void cooling_init_part(
+    const struct part* restrict p, struct xpart* restrict xp) {
+
+  xp->cooling_data.radiated_energy = 0.f;
+}
+
+/**
+ * @brief Returns the total radiated energy by this particle.
+ *
+ * @param xp The extended particle data
+ */
+__attribute__((always_inline)) INLINE static float cooling_get_radiated_energy(
+    const struct xpart* restrict xp) {
+
+  return xp->cooling_data.radiated_energy;
+}
+
+/**
+ * @brief Initialises the cooling properties.
+ *
+ * @param parameter_file The parsed parameter file.
+ * @param us The current internal system of units.
+ * @param phys_const The physical constants in internal units.
+ * @param cooling The cooling properties to initialize
+ */
+static INLINE void cooling_init_backend(
+    const struct swift_params* parameter_file, const struct UnitSystem* us,
+    const struct phys_const* phys_const,
+    struct cooling_function_data* cooling) {
+
+  cooling->lambda =
+      parser_get_param_double(parameter_file, "LambdaCooling:lambda");
+  cooling->min_temperature = parser_get_param_double(
+      parameter_file, "LambdaCooling:minimum_temperature");
+  cooling->hydrogen_mass_abundance = parser_get_param_double(
+      parameter_file, "LambdaCooling:hydrogen_mass_abundance");
+  cooling->mean_molecular_weight = parser_get_param_double(
+      parameter_file, "LambdaCooling:mean_molecular_weight");
+  cooling->cooling_tstep_mult = parser_get_param_double(
+      parameter_file, "LambdaCooling:cooling_tstep_mult");
+
+  /*convert minimum temperature into minimum internal energy*/
+  const float u_floor =
+      phys_const->const_boltzmann_k * cooling->min_temperature /
+      (hydro_gamma_minus_one * cooling->mean_molecular_weight *
+       phys_const->const_proton_mass);
+  const float u_floor_cgs =
+      u_floor * units_cgs_conversion_factor(us, UNIT_CONV_ENERGY_PER_UNIT_MASS);
+
+  cooling->min_energy = u_floor;
+  cooling->min_energy_cgs = u_floor_cgs;
+}
+
+/**
+ * @brief Prints the properties of the cooling model to stdout.
+ *
+ * @param cooling The properties of the cooling function.
+ */
+static INLINE void cooling_print_backend(
+    const struct cooling_function_data* cooling) {
+
+  message(
+      "Cooling function is 'Constant lambda' with "
+      "(lambda,min_temperature,hydrogen_mass_abundance,mean_molecular_weight) "
+      "=  (%g,%g,%g,%g)",
+      cooling->lambda, cooling->min_temperature,
+      cooling->hydrogen_mass_abundance, cooling->mean_molecular_weight);
+}
+
+#endif /* SWIFT_COOLING_CONST_LAMBDA_H */
diff --git a/src/cooling/const_lambda/cooling_struct.h b/src/cooling/const_lambda/cooling_struct.h
new file mode 100644
index 0000000000000000000000000000000000000000..27c5df16bffbe7d165237d201ca68ea4ba89dd73
--- /dev/null
+++ b/src/cooling/const_lambda/cooling_struct.h
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    Richard Bower (r.g.bower@durham.ac.uk)
+ *                    Stefan Arridge  (stefan.arridge@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_COOLING_STRUCT_CONST_LAMBDA_H
+#define SWIFT_COOLING_STRUCT_CONST_LAMBDA_H
+
+/**
+ * @brief Properties of the cooling function.
+ */
+struct cooling_function_data {
+
+  /*! Cooling rate in cgs units. Defined by 'rho * du/dt = -lambda * n_H^2'*/
+  float lambda;
+
+  /*! Minimum temperature (in Kelvin) for all gas particles*/
+  float min_temperature;
+
+  /*! Fraction of gas mass that is Hydrogen. Used to calculate n_H*/
+  float hydrogen_mass_abundance;
+
+  /* 'mu', used to convert min_temperature to min_internal energy*/
+  float mean_molecular_weight;
+
+  /*! Minimally allowed internal energy of the particles */
+  float min_energy;
+  float min_energy_cgs;
+
+  /*! Constant multiplication factor for time-step criterion */
+  float cooling_tstep_mult;
+};
+
+/**
+ * @brief Properties of the cooling stored in the particle data.
+ */
+struct cooling_xpart_data {
+
+  /*! Energy radiated away by this particle since the start of the run */
+  float radiated_energy;
+};
+
+#endif /* SWIFT_COOLING_STRUCT_CONST_LAMBDA_H */
diff --git a/src/cooling/none/cooling.h b/src/cooling/none/cooling.h
new file mode 100644
index 0000000000000000000000000000000000000000..0461100dc11e7ffbb4616766923142442b4ac943
--- /dev/null
+++ b/src/cooling/none/cooling.h
@@ -0,0 +1,125 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_COOLING_NONE_H
+#define SWIFT_COOLING_NONE_H
+
+/**
+ * @file src/cooling/none/cooling.h
+ * @brief Empty infrastructure for the cases without cooling function
+ */
+
+/* Some standard headers. */
+#include <float.h>
+#include <math.h>
+
+/* Local includes. */
+#include "error.h"
+#include "hydro.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief Apply the cooling function to a particle.
+ *
+ * We do nothing.
+ *
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param cooling The #cooling_function_data used in the run.
+ * @param p Pointer to the particle data.
+ * @param dt The time-step of this particle.
+ */
+__attribute__((always_inline)) INLINE static void cooling_cool_part(
+    const struct phys_const* restrict phys_const,
+    const struct UnitSystem* restrict us,
+    const struct cooling_function_data* restrict cooling,
+    struct part* restrict p, struct xpart* restrict xp, float dt) {}
+
+/**
+ * @brief Computes the cooling time-step.
+ *
+ * We return FLT_MAX so as to impose no limit on the time-step.
+ *
+ * @param cooling The #cooling_function_data used in the run.
+ * @param phys_const The physical constants in internal units.
+ * @param us The internal system of units.
+ * @param p Pointer to the particle data.
+ */
+__attribute__((always_inline)) INLINE static float cooling_timestep(
+    const struct cooling_function_data* restrict cooling,
+    const struct phys_const* restrict phys_const,
+    const struct UnitSystem* restrict us, const struct part* restrict p) {
+
+  return FLT_MAX;
+}
+
+/**
+ * @brief Sets the cooling properties of the (x-)particles to a valid start
+ * state.
+ *
+ * Nothing to do here.
+ *
+ * @param p Pointer to the particle data.
+ * @param xp Pointer to the extended particle data.
+ */
+__attribute__((always_inline)) INLINE static void cooling_init_part(
+    const struct part* restrict p, struct xpart* restrict xp) {}
+
+/**
+ * @brief Returns the total radiated energy by this particle.
+ *
+ * No cooling, so return 0.
+ *
+ * @param xp The extended particle data
+ */
+__attribute__((always_inline)) INLINE static float cooling_get_radiated_energy(
+    const struct xpart* restrict xp) {
+
+  return 0.f;
+}
+
+/**
+ * @brief Initialises the cooling properties.
+ *
+ * Nothing to do here.
+ *
+ * @param parameter_file The parsed parameter file.
+ * @param us The current internal system of units.
+ * @param phys_const The physical constants in internal units.
+ * @param cooling The cooling properties to initialize
+ */
+static INLINE void cooling_init_backend(
+    const struct swift_params* parameter_file, const struct UnitSystem* us,
+    const struct phys_const* phys_const,
+    struct cooling_function_data* cooling) {}
+
+/**
+ * @brief Prints the properties of the cooling model to stdout.
+ *
+ * @param cooling The properties of the cooling function.
+ */
+static INLINE void cooling_print_backend(
+    const struct cooling_function_data* cooling) {
+
+  message("Cooling function is 'No cooling'.");
+}
+
+#endif /* SWIFT_COOLING_NONE_H */
diff --git a/src/cooling/none/cooling_struct.h b/src/cooling/none/cooling_struct.h
new file mode 100644
index 0000000000000000000000000000000000000000..a08530c44d7405df934136f2861f84ba619d2595
--- /dev/null
+++ b/src/cooling/none/cooling_struct.h
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_COOLING_STRUCT_NONE_H
+#define SWIFT_COOLING_STRUCT_NONE_H
+
+/**
+ * @file src/cooling/none/cooling_struct.h
+ * @brief Empty infrastructure for the cases without cooling function
+ */
+
+/**
+ * @brief Properties of the cooling function.
+ */
+struct cooling_function_data {};
+
+/**
+ * @brief Properties of the cooling stored in the particle data
+ */
+struct cooling_xpart_data {};
+
+#endif /* SWIFT_COOLING_STRUCT_NONE_H */
diff --git a/src/cooling_struct.h b/src/cooling_struct.h
new file mode 100644
index 0000000000000000000000000000000000000000..0c567788423ae39507864de8b4a687eeed358cb6
--- /dev/null
+++ b/src/cooling_struct.h
@@ -0,0 +1,46 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_COOLING_STRUCT_H
+#define SWIFT_COOLING_STRUCT_H
+
+/**
+ * @file src/cooling_struct.h
+ * @brief Branches between the different cooling functions.
+ */
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "const.h"
+
+/* Import the right cooling definition */
+#if defined(COOLING_NONE)
+#include "./cooling/none/cooling_struct.h"
+#elif defined(COOLING_CONST_DU)
+#include "./cooling/const_du/cooling_struct.h"
+#elif defined(COOLING_CONST_LAMBDA)
+#include "./cooling/const_lambda/cooling_struct.h"
+#elif defined(COOLING_GRACKLE)
+#include "./cooling/grackle/cooling_struct.h"
+#else
+#error "Invalid choice of cooling function."
+#endif
+
+#endif /* SWIFT_COOLING_STRUCT_H */
diff --git a/src/debug.c b/src/debug.c
index 9e14ad27e09e083a1b60a49e1eb2009a1761bdc7..3de4ccd048bc67247cdc8cb1b327effc354c160b 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -24,17 +24,20 @@
 #include "../config.h"
 
 /* Some standard headers. */
+#include <float.h>
 #include <stdio.h>
 
 /* This object's header. */
 #include "debug.h"
 
 /* Local includes. */
-#include "config.h"
+#include "cell.h"
 #include "const.h"
+#include "engine.h"
 #include "hydro.h"
 #include "inline.h"
 #include "part.h"
+#include "space.h"
 
 /* Import the right hydro definition */
 #if defined(MINIMAL_SPH)
@@ -45,6 +48,8 @@
 #include "./hydro/PressureEntropy/hydro_debug.h"
 #elif defined(DEFAULT_SPH)
 #include "./hydro/Default/hydro_debug.h"
+#elif defined(GIZMO_SPH)
+#include "./hydro/Gizmo/hydro_debug.h"
 #else
 #error "Invalid choice of SPH variant"
 #endif
@@ -70,7 +75,7 @@ void printParticle(const struct part *parts, struct xpart *xparts,
   /* Look for the particle. */
   for (size_t i = 0; i < N; i++)
     if (parts[i].id == id) {
-      printf("## Particle[%zd]:\n id=%lld ", i, parts[i].id);
+      printf("## Particle[%zu]:\n id=%lld ", i, parts[i].id);
       hydro_debug_particle(&parts[i], &xparts[i]);
       found = 1;
       break;
@@ -99,13 +104,13 @@ void printgParticle(const struct gpart *gparts, const struct part *parts,
   /* Look for the particle. */
   for (size_t i = 0; i < N; i++)
     if (gparts[i].id_or_neg_offset == id) {
-      printf("## gParticle[%zd] (DM) :\n id=%lld", i, id);
+      printf("## gParticle[%zu] (DM) :\n id=%lld", i, id);
       gravity_debug_particle(&gparts[i]);
       found = 1;
       break;
     } else if (gparts[i].id_or_neg_offset < 0 &&
                parts[-gparts[i].id_or_neg_offset].id == id) {
-      printf("## gParticle[%zd] (hydro) :\n id=%lld", i, id);
+      printf("## gParticle[%zu] (hydro) :\n id=%lld", i, id);
       gravity_debug_particle(&gparts[i]);
       found = 1;
       break;
@@ -139,6 +144,55 @@ void printgParticle_single(struct gpart *gp) {
   printf("\n");
 }
 
+/**
+ * @brief Check that the cells and particles of a space have consistent h_max
+ *        values.
+ *
+ * @param s the space.
+ * @result 1 or 0
+ */
+int checkSpacehmax(struct space *s) {
+
+  /* Loop over local cells. */
+  float cell_h_max = 0.0f;
+  for (int k = 0; k < s->nr_cells; k++) {
+    if (s->cells_top[k].nodeID == s->e->nodeID &&
+        s->cells_top[k].h_max > cell_h_max) {
+      cell_h_max = s->cells_top[k].h_max;
+    }
+  }
+
+  /* Now all particles. */
+  float part_h_max = 0.0f;
+  for (size_t k = 0; k < s->nr_parts; k++) {
+    if (s->parts[k].h > part_h_max) {
+      part_h_max = s->parts[k].h;
+    }
+  }
+
+  /*  If within some epsilon we are OK. */
+  if (abs(cell_h_max - part_h_max) <= FLT_EPSILON) return 1;
+
+  /* There is a problem. Hunt it down. */
+  for (int k = 0; k < s->nr_cells; k++) {
+    if (s->cells_top[k].nodeID == s->e->nodeID) {
+      if (s->cells_top[k].h_max > part_h_max) {
+        message("cell %d is inconsistent (%f > %f)", k, s->cells_top[k].h_max,
+                part_h_max);
+      }
+    }
+  }
+
+  for (size_t k = 0; k < s->nr_parts; k++) {
+    if (s->parts[k].h > cell_h_max) {
+      message("part %lld is inconsistent (%f > %f)", s->parts[k].id,
+              s->parts[k].h, cell_h_max);
+    }
+  }
+
+  return 0;
+}
+
 #ifdef HAVE_METIS
 
 /**
diff --git a/src/debug.h b/src/debug.h
index 367241201977d9b79a8c2913dbae5d08f1148529..22b63820745ca7282b7699f0be09e493238d83c2 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -22,6 +22,7 @@
 /* Includes. */
 #include "cell.h"
 #include "part.h"
+#include "space.h"
 
 void printParticle(const struct part *parts, struct xpart *xparts,
                    long long int id, size_t N);
@@ -30,6 +31,8 @@ void printgParticle(const struct gpart *gparts, const struct part *parts,
 void printParticle_single(const struct part *p, const struct xpart *xp);
 void printgParticle_single(struct gpart *gp);
 
+int checkSpacehmax(struct space *s);
+
 #ifdef HAVE_METIS
 #include "metis.h"
 void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, idx_t *xadj,
diff --git a/src/dimension.h b/src/dimension.h
index 6395d4d04e50d40b733e7a74dafb7d0ab277d204..0fae2c5602b87622ff67f6f5feb325efc6422472 100644
--- a/src/dimension.h
+++ b/src/dimension.h
@@ -33,6 +33,8 @@
 #include "inline.h"
 #include "vector.h"
 
+#include <math.h>
+
 /* First define some constants */
 #if defined(HYDRO_DIMENSION_3D)
 
@@ -114,6 +116,92 @@ __attribute__((always_inline)) INLINE static float pow_dimension_plus_one(
 #endif
 }
 
+/**
+ * @brief Inverts the given dimension by dimension matrix (in place)
+ *
+ * @param A A 3x3 matrix of which we want to invert the top left dxd part
+ */
+__attribute__((always_inline)) INLINE static void
+invert_dimension_by_dimension_matrix(float A[3][3]) {
+
+#if defined(HYDRO_DIMENSION_3D)
+
+  float detA, Ainv[3][3];
+
+  detA = A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] +
+         A[0][2] * A[1][0] * A[2][1] - A[0][2] * A[1][1] * A[2][0] -
+         A[0][1] * A[1][0] * A[2][2] - A[0][0] * A[1][2] * A[2][1];
+
+  if (detA && !isnan(detA)) {
+    Ainv[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) / detA;
+    Ainv[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) / detA;
+    Ainv[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) / detA;
+    Ainv[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) / detA;
+    Ainv[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) / detA;
+    Ainv[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) / detA;
+    Ainv[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) / detA;
+    Ainv[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) / detA;
+    Ainv[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) / detA;
+  } else {
+    Ainv[0][0] = 0.0f;
+    Ainv[0][1] = 0.0f;
+    Ainv[0][2] = 0.0f;
+    Ainv[1][0] = 0.0f;
+    Ainv[1][1] = 0.0f;
+    Ainv[1][2] = 0.0f;
+    Ainv[2][0] = 0.0f;
+    Ainv[2][1] = 0.0f;
+    Ainv[2][2] = 0.0f;
+  }
+
+  A[0][0] = Ainv[0][0];
+  A[0][1] = Ainv[0][1];
+  A[0][2] = Ainv[0][2];
+  A[1][0] = Ainv[1][0];
+  A[1][1] = Ainv[1][1];
+  A[1][2] = Ainv[1][2];
+  A[2][0] = Ainv[2][0];
+  A[2][1] = Ainv[2][1];
+  A[2][2] = Ainv[2][2];
+
+#elif defined(HYDRO_DIMENSION_2D)
+
+  float detA, Ainv[2][2];
+
+  detA = A[0][0] * A[1][1] - A[0][1] * A[1][0];
+
+  if (detA && !isnan(detA)) {
+    Ainv[0][0] = A[1][1] / detA;
+    Ainv[0][1] = -A[0][1] / detA;
+    Ainv[1][0] = -A[1][0] / detA;
+    Ainv[1][1] = A[0][0] / detA;
+  } else {
+    Ainv[0][0] = 0.0f;
+    Ainv[0][1] = 0.0f;
+    Ainv[1][0] = 0.0f;
+    Ainv[1][1] = 0.0f;
+  }
+
+  A[0][0] = Ainv[0][0];
+  A[0][1] = Ainv[0][1];
+  A[1][0] = Ainv[1][0];
+  A[1][1] = Ainv[1][1];
+
+#elif defined(HYDRO_DIMENSION_1D)
+
+  if (A[0][0] && !isnan(A[0][0])) {
+    A[0][0] = 1.0f / A[0][0];
+  } else {
+    A[0][0] = 0.0f;
+  }
+
+#else
+
+  error("The dimension is not defined !");
+
+#endif
+}
+
 /* ------------------------------------------------------------------------- */
 #ifdef WITH_VECTORIZATION
 
diff --git a/src/drift.h b/src/drift.h
index 880595dc59e3e5174ee5e888e595a9204ad383e2..bd1b35926740d49a67291ede4676f3387cd66748 100644
--- a/src/drift.h
+++ b/src/drift.h
@@ -65,8 +65,6 @@ __attribute__((always_inline)) INLINE static void drift_gpart(
 __attribute__((always_inline)) INLINE static void drift_part(
     struct part *restrict p, struct xpart *restrict xp, float dt,
     double timeBase, int ti_old, int ti_current) {
-  /* Useful quantity */
-  const float h_inv = 1.0f / p->h;
 
   /* Drift... */
   p->x[0] += xp->v_full[0] * dt;
@@ -78,22 +76,8 @@ __attribute__((always_inline)) INLINE static void drift_part(
   p->v[1] += p->a_hydro[1] * dt;
   p->v[2] += p->a_hydro[2] * dt;
 
-  /* Predict smoothing length */
-  const float w1 = p->force.h_dt * h_inv * dt;
-  if (fabsf(w1) < 0.2f)
-    p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
-  else
-    p->h *= expf(w1);
-
-  /* Predict density */
-  const float w2 = -hydro_dimension * w1;
-  if (fabsf(w2) < 0.2f)
-    p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
-  else
-    p->rho *= expf(w2);
-
   /* Predict the values of the extra fields */
-  hydro_predict_extra(p, xp, ti_old, ti_current, timeBase);
+  hydro_predict_extra(p, xp, dt, ti_old, ti_current, timeBase);
 
   /* Compute offset since last cell construction */
   xp->x_diff[0] -= xp->v_full[0] * dt;
diff --git a/src/engine.c b/src/engine.c
index a980bcbc9c2f42127fb43b30c7d5573a8e9957ef..8217bd79778a16422e5eb570afa3fb0329f8de38 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -68,7 +68,7 @@
 #include "units.h"
 #include "version.h"
 
-const char *engine_policy_names[13] = {"none",
+const char *engine_policy_names[15] = {"none",
                                        "rand",
                                        "steal",
                                        "keep",
@@ -80,35 +80,34 @@ const char *engine_policy_names[13] = {"none",
                                        "hydro",
                                        "self_gravity",
                                        "external_gravity",
-                                       "cosmology_integration"};
+                                       "cosmology_integration",
+                                       "drift_all",
+                                       "cooling"};
 
 /** The rank of the engine as a global variable (for messages). */
 int engine_rank;
 
-#ifdef HAVE_SETAFFINITY
-/** The initial affinity of the main thread (set by engin_pin()) */
-static cpu_set_t entry_affinity;
-#endif
-
 /**
  * @brief Link a density/force task to a cell.
  *
  * @param e The #engine.
- * @param l The #link.
+ * @param l A pointer to the #link, will be modified atomically.
  * @param t The #task.
  *
  * @return The new #link pointer.
  */
-struct link *engine_addlink(struct engine *e, struct link *l, struct task *t) {
+void engine_addlink(struct engine *e, struct link **l, struct task *t) {
 
+  /* Get the next free link. */
   const int ind = atomic_inc(&e->nr_links);
   if (ind >= e->size_links) {
     error("Link table overflow.");
   }
   struct link *res = &e->links[ind];
-  res->next = l;
+
+  /* Set it atomically. */
   res->t = t;
-  return res;
+  res->next = atomic_swap(l, res);
 }
 
 /**
@@ -119,10 +118,10 @@ struct link *engine_addlink(struct engine *e, struct link *l, struct task *t) {
  *
  * @param e The #engine.
  * @param c The #cell.
- * @param super The super #cell.
+ * @param gsuper The gsuper #cell.
  */
 void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
-                                            struct cell *super) {
+                                            struct cell *gsuper) {
 
   struct scheduler *s = &e->sched;
   const int is_with_external_gravity =
@@ -131,10 +130,10 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
   const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt;
 
   /* Is this the super-cell? */
-  if (super == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) {
+  if (gsuper == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) {
 
     /* This is the super cell, i.e. the first with gravity tasks attached. */
-    super = c;
+    gsuper = c;
 
     /* Local tasks only... */
     if (c->nodeID == e->nodeID) {
@@ -144,11 +143,6 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
         c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0,
                                     c, NULL, 0);
 
-      /* Add the drift task. */
-      if (c->drift == NULL)
-        c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0,
-                                     0, c, NULL, 0);
-
       /* Add the kick task that matches the policy. */
       if (is_fixdt) {
         if (c->kick == NULL)
@@ -167,13 +161,13 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
   }
 
   /* Set the super-cell. */
-  c->super = super;
+  c->gsuper = gsuper;
 
   /* Recurse. */
   if (c->split)
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
-        engine_make_gravity_hierarchical_tasks(e, c->progeny[k], super);
+        engine_make_gravity_hierarchical_tasks(e, c->progeny[k], gsuper);
 }
 
 /**
@@ -191,6 +185,8 @@ void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c,
 
   struct scheduler *s = &e->sched;
   const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt;
+  const int is_with_cooling =
+      (e->policy & engine_policy_cooling) == engine_policy_cooling;
 
   /* Is this the super-cell? */
   if (super == NULL && (c->density != NULL || (c->count > 0 && !c->split))) {
@@ -206,11 +202,6 @@ void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c,
         c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0,
                                     c, NULL, 0);
 
-      /* Add the drift task. */
-      if (c->drift == NULL)
-        c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0,
-                                     0, c, NULL, 0);
-
       /* Add the kick task that matches the policy. */
       if (is_fixdt) {
         if (c->kick == NULL)
@@ -225,6 +216,16 @@ void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c,
       /* Generate the ghost task. */
       c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0,
                                    c, NULL, 0);
+
+#ifdef EXTRA_HYDRO_LOOP
+      /* Generate the extra ghost task. */
+      c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
+                                         task_subtype_none, 0, 0, c, NULL, 0);
+#endif
+
+      if (is_with_cooling)
+        c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
+                                       0, 0, c, NULL, 0);
     }
   }
 
@@ -261,7 +262,7 @@ void engine_redistribute(struct engine *e) {
   const int nr_nodes = e->nr_nodes;
   const int nodeID = e->nodeID;
   struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const int *cdim = s->cdim;
   const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
@@ -303,7 +304,7 @@ void engine_redistribute(struct engine *e) {
                    parts[k].x[2] * iwidth[2]);
 #ifdef SWIFT_DEBUG_CHECKS
     if (cid < 0 || cid >= s->nr_cells)
-      error("Bad cell id %i for part %zi at [%.3e,%.3e,%.3e].", cid, k,
+      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
             parts[k].x[0], parts[k].x[1], parts[k].x[2]);
 #endif
 
@@ -359,7 +360,7 @@ void engine_redistribute(struct engine *e) {
                    gparts[k].x[2] * iwidth[2]);
 #ifdef SWIFT_DEBUG_CHECKS
     if (cid < 0 || cid >= s->nr_cells)
-      error("Bad cell id %i for part %zi at [%.3e,%.3e,%.3e].", cid, k,
+      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
             gparts[k].x[0], gparts[k].x[1], gparts[k].x[2]);
 #endif
 
@@ -540,12 +541,12 @@ void engine_redistribute(struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that all parts are in the right place. */
-  for (int k = 0; k < nr_parts; k++) {
-    int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
-                         parts_new[k].x[1] * iwidth[1],
-                         parts_new[k].x[2] * iwidth[2]);
+  for (size_t k = 0; k < nr_parts; k++) {
+    const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
+                               parts_new[k].x[1] * iwidth[1],
+                               parts_new[k].x[2] * iwidth[2]);
     if (cells[cid].nodeID != nodeID)
-      error("Received particle (%i) that does not belong here (nodeID=%i).", k,
+      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
             cells[cid].nodeID);
   }
 
@@ -566,7 +567,7 @@ void engine_redistribute(struct engine *e) {
   for (size_t k = 0; k < nr_parts; ++k) {
 
     if (parts_new[k].gpart != NULL &&
-        parts_new[k].gpart->id_or_neg_offset != -k) {
+        parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
       error("Linking problem !");
     }
   }
@@ -594,7 +595,7 @@ void engine_redistribute(struct engine *e) {
     int my_cells = 0;
     for (int k = 0; k < nr_cells; k++)
       if (cells[k].nodeID == nodeID) my_cells += 1;
-    message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID,
+    message("node %i now has %zu parts and %zu gparts in %i cells.", nodeID,
             nr_parts, nr_gparts, my_cells);
   }
 
@@ -682,11 +683,12 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
  * @param cj Dummy cell containing the nodeID of the receiving node.
  * @param t_xv The send_xv #task, if it has already been created.
  * @param t_rho The send_rho #task, if it has already been created.
+ * @param t_gradient The send_gradient #task, if already created.
  * @param t_ti The send_ti #task, if required and has already been created.
  */
 void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
                           struct task *t_xv, struct task *t_rho,
-                          struct task *t_ti) {
+                          struct task *t_gradient, struct task *t_ti) {
 
 #ifdef WITH_MPI
   struct link *l = NULL;
@@ -705,37 +707,62 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
     /* Create the tasks and their dependencies? */
     if (t_xv == NULL) {
       t_xv = scheduler_addtask(s, task_type_send, task_subtype_none,
-                               3 * ci->tag, 0, ci, cj, 0);
+                               4 * ci->tag, 0, ci, cj, 0);
       t_rho = scheduler_addtask(s, task_type_send, task_subtype_none,
-                                3 * ci->tag + 1, 0, ci, cj, 0);
+                                4 * ci->tag + 1, 0, ci, cj, 0);
       if (!(e->policy & engine_policy_fixdt))
         t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
-                                 3 * ci->tag + 2, 0, ci, cj, 0);
+                                 4 * ci->tag + 2, 0, ci, cj, 0);
+#ifdef EXTRA_HYDRO_LOOP
+      t_gradient = scheduler_addtask(s, task_type_send, task_subtype_none,
+                                     4 * ci->tag + 3, 0, ci, cj, 0);
+#endif
+
+#ifdef EXTRA_HYDRO_LOOP
+
+      scheduler_addunlock(s, t_gradient, ci->super->kick);
+
+      scheduler_addunlock(s, ci->super->extra_ghost, t_gradient);
+
+      /* The send_rho task should unlock the super-cell's extra_ghost task. */
+      scheduler_addunlock(s, t_rho, ci->super->extra_ghost);
 
       /* The send_rho task depends on the cell's ghost task. */
       scheduler_addunlock(s, ci->super->ghost, t_rho);
 
+      /* The send_xv task should unlock the super-cell's ghost task. */
+      scheduler_addunlock(s, t_xv, ci->super->ghost);
+
+#else
       /* The send_rho task should unlock the super-cell's kick task. */
       scheduler_addunlock(s, t_rho, ci->super->kick);
 
+      /* The send_rho task depends on the cell's ghost task. */
+      scheduler_addunlock(s, ci->super->ghost, t_rho);
+
       /* The send_xv task should unlock the super-cell's ghost task. */
       scheduler_addunlock(s, t_xv, ci->super->ghost);
+#endif
 
       /* The super-cell's kick task should unlock the send_ti task. */
       if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick, t_ti);
     }
 
     /* Add them to the local cell. */
-    ci->send_xv = engine_addlink(e, ci->send_xv, t_xv);
-    ci->send_rho = engine_addlink(e, ci->send_rho, t_rho);
-    if (t_ti != NULL) ci->send_ti = engine_addlink(e, ci->send_ti, t_ti);
+    engine_addlink(e, &ci->send_xv, t_xv);
+    engine_addlink(e, &ci->send_rho, t_rho);
+#ifdef EXTRA_HYDRO_LOOP
+    engine_addlink(e, &ci->send_gradient, t_gradient);
+#endif
+    if (t_ti != NULL) engine_addlink(e, &ci->send_ti, t_ti);
   }
 
   /* Recurse? */
   if (ci->split)
     for (int k = 0; k < 8; k++)
       if (ci->progeny[k] != NULL)
-        engine_addtasks_send(e, ci->progeny[k], cj, t_xv, t_rho, t_ti);
+        engine_addtasks_send(e, ci->progeny[k], cj, t_xv, t_rho, t_gradient,
+                             t_ti);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -749,10 +776,12 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
  * @param c The foreign #cell.
  * @param t_xv The recv_xv #task, if it has already been created.
  * @param t_rho The recv_rho #task, if it has already been created.
+ * @param t_gradient The recv_gradient #task, if it has already been created.
  * @param t_ti The recv_ti #task, if required and has already been created.
  */
 void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
-                          struct task *t_rho, struct task *t_ti) {
+                          struct task *t_rho, struct task *t_gradient,
+                          struct task *t_ti) {
 
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
@@ -763,19 +792,39 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   if (t_xv == NULL && c->density != NULL) {
 
     /* Create the tasks. */
-    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 3 * c->tag,
+    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 4 * c->tag,
                              0, c, NULL, 0);
     t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none,
-                              3 * c->tag + 1, 0, c, NULL, 0);
+                              4 * c->tag + 1, 0, c, NULL, 0);
     if (!(e->policy & engine_policy_fixdt))
       t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
-                               3 * c->tag + 2, 0, c, NULL, 0);
+                               4 * c->tag + 2, 0, c, NULL, 0);
+#ifdef EXTRA_HYDRO_LOOP
+    t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_none,
+                                   4 * c->tag + 3, 0, c, NULL, 0);
+#endif
   }
   c->recv_xv = t_xv;
   c->recv_rho = t_rho;
+  c->recv_gradient = t_gradient;
   c->recv_ti = t_ti;
 
-  /* Add dependencies. */
+/* Add dependencies. */
+#ifdef EXTRA_HYDRO_LOOP
+  for (struct link *l = c->density; l != NULL; l = l->next) {
+    scheduler_addunlock(s, t_xv, l->t);
+    scheduler_addunlock(s, l->t, t_rho);
+  }
+  for (struct link *l = c->gradient; l != NULL; l = l->next) {
+    scheduler_addunlock(s, t_rho, l->t);
+    scheduler_addunlock(s, l->t, t_gradient);
+  }
+  for (struct link *l = c->force; l != NULL; l = l->next) {
+    scheduler_addunlock(s, t_gradient, l->t);
+    if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti);
+  }
+  if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
+#else
   for (struct link *l = c->density; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_xv, l->t);
     scheduler_addunlock(s, l->t, t_rho);
@@ -785,12 +834,13 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
     if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti);
   }
   if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
+#endif
 
   /* Recurse? */
   if (c->split)
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
-        engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho, t_ti);
+        engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho, t_gradient, t_ti);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -807,7 +857,7 @@ void engine_exchange_cells(struct engine *e) {
 #ifdef WITH_MPI
 
   struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const int nr_proxies = e->nr_proxies;
   int offset[nr_cells];
@@ -884,7 +934,7 @@ void engine_exchange_cells(struct engine *e) {
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
-  int count_parts_in = 0, count_gparts_in = 0;
+  size_t count_parts_in = 0, count_gparts_in = 0;
   for (int k = 0; k < nr_proxies; k++)
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
       count_parts_in += e->proxies[k].cells_in[j]->count;
@@ -967,14 +1017,14 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   /* Put the parts and gparts into the corresponding proxies. */
   for (size_t k = 0; k < *Npart; k++) {
     /* Get the target node and proxy ID. */
-    const int node_id = e->s->cells[ind_part[k]].nodeID;
+    const int node_id = e->s->cells_top[ind_part[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
     if (pid < 0) {
       error(
           "Do not have a proxy for the requested nodeID %i for part with "
-          "id=%llu, x=[%e,%e,%e].",
+          "id=%lld, x=[%e,%e,%e].",
           node_id, s->parts[offset_parts + k].id,
           s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1],
           s->parts[offset_parts + k].x[2]);
@@ -991,7 +1041,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
                      &s->xparts[offset_parts + k], 1);
   }
   for (size_t k = 0; k < *Ngpart; k++) {
-    const int node_id = e->s->cells[ind_gpart[k]].nodeID;
+    const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
@@ -1038,7 +1088,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
     count_gparts_in += e->proxies[k].nr_gparts_in;
   }
   if (e->verbose) {
-    message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart,
+    message("sent out %zu/%zu parts/gparts, got %i/%i back.", *Npart, *Ngpart,
             count_parts_in, count_gparts_in);
   }
   if (offset_parts + count_parts_in > s->size_parts) {
@@ -1048,7 +1098,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
     struct xpart *xparts_new = NULL;
     if (posix_memalign((void **)&parts_new, part_align,
                        sizeof(struct part) * s->size_parts) != 0 ||
-        posix_memalign((void **)&xparts_new, part_align,
+        posix_memalign((void **)&xparts_new, xpart_align,
                        sizeof(struct xpart) * s->size_parts) != 0)
       error("Failed to allocate new part data.");
     memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts);
@@ -1132,13 +1182,13 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
         reqs_in[pid + 1] == MPI_REQUEST_NULL &&
         reqs_in[pid + 2] == MPI_REQUEST_NULL) {
       /* Copy the particle data to the part/xpart/gpart arrays. */
-      struct proxy *p = &e->proxies[pid / 3];
-      memcpy(&s->parts[offset_parts + count_parts], p->parts_in,
-             sizeof(struct part) * p->nr_parts_in);
-      memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in,
-             sizeof(struct xpart) * p->nr_parts_in);
-      memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in,
-             sizeof(struct gpart) * p->nr_gparts_in);
+      struct proxy *prox = &e->proxies[pid / 3];
+      memcpy(&s->parts[offset_parts + count_parts], prox->parts_in,
+             sizeof(struct part) * prox->nr_parts_in);
+      memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in,
+             sizeof(struct xpart) * prox->nr_parts_in);
+      memcpy(&s->gparts[offset_gparts + count_gparts], prox->gparts_in,
+             sizeof(struct gpart) * prox->nr_gparts_in);
       /* for (int k = offset; k < offset + count; k++)
          message(
             "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.",
@@ -1146,8 +1196,8 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
             s->parts[k].x[2], s->parts[k].h, p->nodeID); */
 
       /* Re-link the gparts. */
-      for (int k = 0; k < p->nr_gparts_in; k++) {
-        struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k];
+      for (int kk = 0; kk < prox->nr_gparts_in; kk++) {
+        struct gpart *gp = &s->gparts[offset_gparts + count_gparts + kk];
         if (gp->id_or_neg_offset <= 0) {
           struct part *p =
               &s->parts[offset_gparts + count_parts - gp->id_or_neg_offset];
@@ -1157,8 +1207,8 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
       }
 
       /* Advance the counters. */
-      count_parts += p->nr_parts_in;
-      count_gparts += p->nr_gparts_in;
+      count_parts += prox->nr_parts_in;
+      count_gparts += prox->nr_gparts_in;
     }
   }
 
@@ -1196,7 +1246,7 @@ void engine_make_gravity_tasks(struct engine *e) {
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
 
   for (int cid = 0; cid < nr_cells; ++cid) {
@@ -1251,7 +1301,7 @@ void engine_make_hydroloop_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int *cdim = s->cdim;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
 
   /* Run through the highest level of cells and add pairs. */
   for (int i = 0; i < cdim[0]; i++) {
@@ -1317,12 +1367,11 @@ void engine_make_hydroloop_tasks(struct engine *e) {
 void engine_count_and_link_tasks(struct engine *e) {
 
   struct scheduler *sched = &e->sched;
-  const int nr_tasks = sched->nr_tasks;
 
-  for (int k = 0; k < nr_tasks; k++) {
+  for (int ind = 0; ind < sched->nr_tasks; ind++) {
+
+    struct task *t = &sched->tasks[ind];
 
-    /* Get the current task. */
-    struct task *t = &sched->tasks[k];
     if (t->skip) continue;
 
     /* Link sort tasks together. */
@@ -1337,31 +1386,31 @@ void engine_count_and_link_tasks(struct engine *e) {
     if (t->type == task_type_self) {
       atomic_inc(&t->ci->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        t->ci->density = engine_addlink(e, t->ci->density, t);
+        engine_addlink(e, &t->ci->density, t);
         atomic_inc(&t->ci->nr_density);
       }
     } else if (t->type == task_type_pair) {
       atomic_inc(&t->ci->nr_tasks);
       atomic_inc(&t->cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        t->ci->density = engine_addlink(e, t->ci->density, t);
+        engine_addlink(e, &t->ci->density, t);
         atomic_inc(&t->ci->nr_density);
-        t->cj->density = engine_addlink(e, t->cj->density, t);
+        engine_addlink(e, &t->cj->density, t);
         atomic_inc(&t->cj->nr_density);
       }
     } else if (t->type == task_type_sub_self) {
       atomic_inc(&t->ci->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        t->ci->density = engine_addlink(e, t->ci->density, t);
+        engine_addlink(e, &t->ci->density, t);
         atomic_inc(&t->ci->nr_density);
       }
     } else if (t->type == task_type_sub_pair) {
       atomic_inc(&t->ci->nr_tasks);
       atomic_inc(&t->cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        t->ci->density = engine_addlink(e, t->ci->density, t);
+        engine_addlink(e, &t->ci->density, t);
         atomic_inc(&t->ci->nr_density);
-        t->cj->density = engine_addlink(e, t->cj->density, t);
+        engine_addlink(e, &t->cj->density, t);
         atomic_inc(&t->cj->nr_density);
       }
     }
@@ -1380,11 +1429,11 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched,
                                                     struct cell *c) {
 
   /* init --> gravity --> kick */
-  scheduler_addunlock(sched, c->super->init, gravity);
-  scheduler_addunlock(sched, gravity, c->super->kick);
+  scheduler_addunlock(sched, c->gsuper->init, gravity);
+  scheduler_addunlock(sched, gravity, c->gsuper->kick);
 
   /* grav_up --> gravity ( --> kick) */
-  scheduler_addunlock(sched, c->super->grav_up, gravity);
+  scheduler_addunlock(sched, c->gsuper->grav_up, gravity);
 }
 
 /**
@@ -1426,10 +1475,10 @@ void engine_link_gravity_tasks(struct engine *e) {
 
       /* Gather the multipoles --> mm interaction --> kick */
       scheduler_addunlock(sched, gather, t);
-      scheduler_addunlock(sched, t, t->ci->super->kick);
+      scheduler_addunlock(sched, t, t->ci->gsuper->kick);
 
       /* init --> mm interaction */
-      scheduler_addunlock(sched, t->ci->super->init, t);
+      scheduler_addunlock(sched, t->ci->gsuper->init, t);
     }
 
     /* Self-interaction? */
@@ -1447,7 +1496,7 @@ void engine_link_gravity_tasks(struct engine *e) {
         engine_make_gravity_dependencies(sched, t, t->ci);
       }
 
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) {
 
         engine_make_gravity_dependencies(sched, t, t->cj);
       }
@@ -1469,7 +1518,7 @@ void engine_link_gravity_tasks(struct engine *e) {
 
         engine_make_gravity_dependencies(sched, t, t->ci);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) {
 
         engine_make_gravity_dependencies(sched, t, t->cj);
       }
@@ -1477,19 +1526,46 @@ void engine_link_gravity_tasks(struct engine *e) {
   }
 }
 
+#ifdef EXTRA_HYDRO_LOOP
+
 /**
  * @brief Creates the dependency network for the hydro tasks of a given cell.
  *
  * @param sched The #scheduler.
  * @param density The density task to link.
+ * @param gradient The gradient task to link.
  * @param force The force task to link.
  * @param c The cell.
  */
 static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
                                                         struct task *density,
+                                                        struct task *gradient,
                                                         struct task *force,
                                                         struct cell *c) {
+  /* init --> density loop --> ghost --> gradient loop --> extra_ghost */
+  /* extra_ghost --> force loop --> kick */
+  scheduler_addunlock(sched, c->super->init, density);
+  scheduler_addunlock(sched, density, c->super->ghost);
+  scheduler_addunlock(sched, c->super->ghost, gradient);
+  scheduler_addunlock(sched, gradient, c->super->extra_ghost);
+  scheduler_addunlock(sched, c->super->extra_ghost, force);
+  scheduler_addunlock(sched, force, c->super->kick);
+}
 
+#else
+
+/**
+ * @brief Creates the dependency network for the hydro tasks of a given cell.
+ *
+ * @param sched The #scheduler.
+ * @param density The density task to link.
+ * @param force The force task to link.
+ * @param c The cell.
+ */
+static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
+                                                        struct task *density,
+                                                        struct task *force,
+                                                        struct cell *c) {
   /* init --> density loop --> ghost --> force loop --> kick */
   scheduler_addunlock(sched, c->super->init, density);
   scheduler_addunlock(sched, density, c->super->ghost);
@@ -1497,6 +1573,7 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
   scheduler_addunlock(sched, force, c->super->kick);
 }
 
+#endif
 /**
  * @brief Duplicates the first hydro loop and construct all the
  * dependencies for the hydro part
@@ -1512,13 +1589,11 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
 void engine_make_extra_hydroloop_tasks(struct engine *e) {
 
   struct scheduler *sched = &e->sched;
-  const int nodeID = e->nodeID;
   const int nr_tasks = sched->nr_tasks;
+  const int nodeID = e->nodeID;
 
-  for (int k = 0; k < nr_tasks; k++) {
-
-    /* Get a pointer to the task. */
-    struct task *t = &sched->tasks[k];
+  for (int ind = 0; ind < nr_tasks; ind++) {
+    struct task *t = &sched->tasks[ind];
 
     /* Skip? */
     if (t->skip) continue;
@@ -1526,29 +1601,76 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
     /* Self-interaction? */
     if (t->type == task_type_self && t->subtype == task_subtype_density) {
 
+#ifdef EXTRA_HYDRO_LOOP
+      /* Start by constructing the task for the second  and third hydro loop */
+      struct task *t2 = scheduler_addtask(
+          sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL, 0);
+      struct task *t3 = scheduler_addtask(
+          sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0);
+
+      /* Add the link between the new loops and the cell */
+      engine_addlink(e, &t->ci->gradient, t2);
+      atomic_inc(&t->ci->nr_gradient);
+      engine_addlink(e, &t->ci->force, t3);
+      atomic_inc(&t->ci->nr_force);
+
+      /* Now, build all the dependencies for the hydro */
+      engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+
+#else
+
       /* Start by constructing the task for the second hydro loop */
       struct task *t2 = scheduler_addtask(
           sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0);
 
       /* Add the link between the new loop and the cell */
-      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      engine_addlink(e, &t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
 
       /* Now, build all the dependencies for the hydro */
       engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+#endif
     }
 
     /* Otherwise, pair interaction? */
     else if (t->type == task_type_pair && t->subtype == task_subtype_density) {
 
+#ifdef EXTRA_HYDRO_LOOP
+      /* Start by constructing the task for the second and third hydro loop */
+      struct task *t2 = scheduler_addtask(
+          sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj, 0);
+      struct task *t3 = scheduler_addtask(
+          sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      engine_addlink(e, &t->ci->gradient, t2);
+      atomic_inc(&t->ci->nr_gradient);
+      engine_addlink(e, &t->cj->gradient, t2);
+      atomic_inc(&t->cj->nr_gradient);
+      engine_addlink(e, &t->ci->force, t3);
+      atomic_inc(&t->ci->nr_force);
+      engine_addlink(e, &t->cj->force, t3);
+      atomic_inc(&t->cj->nr_force);
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+      }
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj);
+      }
+
+#else
+
       /* Start by constructing the task for the second hydro loop */
       struct task *t2 = scheduler_addtask(
           sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0);
 
       /* Add the link between the new loop and both cells */
-      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      engine_addlink(e, &t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
-      t->cj->force = engine_addlink(e, t->cj->force, t2);
+      engine_addlink(e, &t->cj->force, t2);
       atomic_inc(&t->cj->nr_force);
 
       /* Now, build all the dependencies for the hydro for the cells */
@@ -1559,19 +1681,45 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
       }
+
+#endif
+
     }
 
     /* Otherwise, sub-self interaction? */
     else if (t->type == task_type_sub_self &&
              t->subtype == task_subtype_density) {
 
+#ifdef EXTRA_HYDRO_LOOP
+
+      /* Start by constructing the task for the second and third hydro loop */
+      struct task *t2 =
+          scheduler_addtask(sched, task_type_sub_self, task_subtype_gradient,
+                            t->flags, 0, t->ci, t->cj, 0);
+      struct task *t3 =
+          scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
+                            t->flags, 0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and the cell */
+      engine_addlink(e, &t->ci->gradient, t2);
+      atomic_inc(&t->ci->nr_gradient);
+      engine_addlink(e, &t->ci->force, t3);
+      atomic_inc(&t->ci->nr_force);
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+      }
+
+#else
       /* Start by constructing the task for the second hydro loop */
       struct task *t2 =
           scheduler_addtask(sched, task_type_sub_self, task_subtype_force,
                             t->flags, 0, t->ci, t->cj, 0);
 
       /* Add the link between the new loop and the cell */
-      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      engine_addlink(e, &t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
 
       /* Now, build all the dependencies for the hydro for the cells */
@@ -1579,21 +1727,52 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
       }
+#endif
     }
 
     /* Otherwise, sub-pair interaction? */
     else if (t->type == task_type_sub_pair &&
              t->subtype == task_subtype_density) {
 
+#ifdef EXTRA_HYDRO_LOOP
+
+      /* Start by constructing the task for the second and third hydro loop */
+      struct task *t2 =
+          scheduler_addtask(sched, task_type_sub_pair, task_subtype_gradient,
+                            t->flags, 0, t->ci, t->cj, 0);
+      struct task *t3 =
+          scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
+                            t->flags, 0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      engine_addlink(e, &t->ci->gradient, t2);
+      atomic_inc(&t->ci->nr_gradient);
+      engine_addlink(e, &t->cj->gradient, t2);
+      atomic_inc(&t->cj->nr_gradient);
+      engine_addlink(e, &t->ci->force, t3);
+      atomic_inc(&t->ci->nr_force);
+      engine_addlink(e, &t->cj->force, t3);
+      atomic_inc(&t->cj->nr_force);
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      if (t->ci->nodeID == nodeID) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+      }
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj);
+      }
+
+#else
       /* Start by constructing the task for the second hydro loop */
       struct task *t2 =
           scheduler_addtask(sched, task_type_sub_pair, task_subtype_force,
                             t->flags, 0, t->ci, t->cj, 0);
 
       /* Add the link between the new loop and both cells */
-      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      engine_addlink(e, &t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
-      t->cj->force = engine_addlink(e, t->cj->force, t2);
+      engine_addlink(e, &t->cj->force, t2);
       atomic_inc(&t->cj->nr_force);
 
       /* Now, build all the dependencies for the hydro for the cells */
@@ -1604,6 +1783,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
       }
+#endif
     }
 
     /* External gravity tasks should depend on init and unlock the kick */
@@ -1611,6 +1791,12 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       scheduler_addunlock(sched, t->ci->init, t);
       scheduler_addunlock(sched, t, t->ci->kick);
     }
+
+    /* Cooling tasks should depend on kick and does not unlock anything since
+     it is the last task*/
+    else if (t->type == task_type_cooling) {
+      scheduler_addunlock(sched, t->ci->kick, t);
+    }
   }
 }
 
@@ -1628,7 +1814,7 @@ void engine_make_gravityrecursive_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int nr_cells = s->nr_cells;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
 
   for (int k = 0; k < nr_cells; k++) {
 
@@ -1661,21 +1847,13 @@ void engine_maketasks(struct engine *e) {
 
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   const int nr_cells = s->nr_cells;
   const ticks tic = getticks();
 
   /* Re-set the scheduler. */
   scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
 
-  /* Add the space sorting tasks. */
-  for (int i = 0; i < e->nr_threads; i++) {
-    scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL,
-                      NULL, 0);
-    scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0,
-                      NULL, NULL, 0);
-  }
-
   /* Construct the firt hydro loop over neighbours */
   if (e->policy & engine_policy_hydro) engine_make_hydroloop_tasks(e);
 
@@ -1689,7 +1867,11 @@ void engine_maketasks(struct engine *e) {
      is the number of cells (s->tot_cells) times the number of neighbours (27)
      times the number of interaction types (2, density and force). */
   if (e->links != NULL) free(e->links);
+#ifdef EXTRA_HYDRO_LOOP
+  e->size_links = s->tot_cells * 27 * 3;
+#else
   e->size_links = s->tot_cells * 27 * 2;
+#endif
   if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
     error("Failed to allocate cell-task links.");
   e->nr_links = 0;
@@ -1735,13 +1917,13 @@ void engine_maketasks(struct engine *e) {
       /* Loop through the proxy's incoming cells and add the
          recv tasks. */
       for (int k = 0; k < p->nr_cells_in; k++)
-        engine_addtasks_recv(e, p->cells_in[k], NULL, NULL, NULL);
+        engine_addtasks_recv(e, p->cells_in[k], NULL, NULL, NULL, NULL);
 
       /* Loop through the proxy's outgoing cells and add the
          send tasks. */
       for (int k = 0; k < p->nr_cells_out; k++)
         engine_addtasks_send(e, p->cells_out[k], p->cells_in[0], NULL, NULL,
-                             NULL);
+                             NULL, NULL);
     }
   }
 #endif
@@ -1753,223 +1935,257 @@ void engine_maketasks(struct engine *e) {
   scheduler_ranktasks(sched);
 
   /* Weight the tasks. */
-  scheduler_reweight(sched);
+  scheduler_reweight(sched, e->verbose);
 
   /* Set the tasks age. */
   e->tasks_age = 0;
 
   if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
+    message("took %.3f %s (including reweight).",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
 }
 
 /**
  * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ *        Threadpool mapper function for fixdt version.
  *
- * @return 1 if the space has to be rebuilt, 0 otherwise.
+ * @param map_data pointer to the tasks
+ * @param num_elements number of tasks
+ * @param extra_data pointer to int that will define if a rebuild is needed.
  */
-int engine_marktasks(struct engine *e) {
-
-  struct scheduler *s = &e->sched;
-  const int ti_end = e->ti_current;
-  const int nr_tasks = s->nr_tasks;
-  const int *const ind = s->tasks_ind;
-  struct task *tasks = s->tasks;
-  const ticks tic = getticks();
-
-  /* Much less to do here if we're on a fixed time-step. */
-  if (e->policy & engine_policy_fixdt) {
+void engine_marktasks_fixdt_mapper(void *map_data, int num_elements,
+                                   void *extra_data) {
+  /* Unpack the arguments. */
+  struct task *tasks = (struct task *)map_data;
+  int *rebuild_space = (int *)extra_data;
 
-    /* Run through the tasks and mark as skip or not. */
-    for (int k = 0; k < nr_tasks; k++) {
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
 
-      /* Get a handle on the kth task. */
-      struct task *t = &tasks[ind[k]];
+    /* Pair? */
+    if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
-      /* Pair? */
-      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
+      /* Local pointers. */
+      const struct cell *ci = t->ci;
+      const struct cell *cj = t->cj;
 
-        /* Local pointers. */
-        const struct cell *ci = t->ci;
-        const struct cell *cj = t->cj;
+      /* Too much particle movement? */
+      if (t->tight &&
+          (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
+           ci->dx_max > space_maxreldx * ci->h_max ||
+           cj->dx_max > space_maxreldx * cj->h_max))
+        *rebuild_space = 1;
 
-        /* Too much particle movement? */
-        if (t->tight &&
-            (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
-             ci->dx_max > space_maxreldx * ci->h_max ||
-             cj->dx_max > space_maxreldx * cj->h_max))
-          return 1;
+    }
 
-      }
+    /* Sort? */
+    else if (t->type == task_type_sort) {
 
-      /* Sort? */
-      else if (t->type == task_type_sort) {
+      /* If all the sorts have been done, make this task implicit. */
+      if (!(t->flags & (t->flags ^ t->ci->sorted))) t->implicit = 1;
+    }
+  }
+}
 
-        /* If all the sorts have been done, make this task implicit. */
-        if (!(t->flags & (t->flags ^ t->ci->sorted))) t->implicit = 1;
-      }
+/**
+ * @brief Mark any sort tasks as initially skipped.
+ *        Threadpool mapper function.
+ *
+ * @param map_data pointer to the tasks
+ * @param num_elements number of tasks
+ * @param extra_data unused
+ */
+void engine_marktasks_sorts_mapper(void *map_data, int num_elements,
+                                   void *extra_data) {
+  /* Unpack the arguments. */
+  struct task *tasks = (struct task *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
+    if (t->type == task_type_sort) {
+      t->flags = 0;
+      t->skip = 1;
     }
+  }
+}
 
-    /* Multiple-timestep case */
-  } else {
+/**
+ * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ *        Threadpool mapper function.
+ *
+ * @param map_data pointer to the tasks
+ * @param num_elements number of tasks
+ * @param extra_data pointer to int that will define if a rebuild is needed.
+ */
+void engine_marktasks_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
+  /* Unpack the arguments. */
+  struct task *tasks = (struct task *)map_data;
+  const int ti_end = ((int *)extra_data)[0];
+  int *rebuild_space = &((int *)extra_data)[1];
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
+
+    /* Single-cell task? */
+    if (t->type == task_type_self || t->type == task_type_ghost ||
+        t->type == task_type_sub_self) {
+
+      /* Set this task's skip. */
+      t->skip = (t->ci->ti_end_min > ti_end);
+    }
 
-    /* Run through the tasks and mark as skip or not. */
-    for (int k = 0; k < nr_tasks; k++) {
+    /* Pair? */
+    else if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
-      /* Get a handle on the kth task. */
-      struct task *t = &tasks[k];
+      /* Local pointers. */
+      const struct cell *ci = t->ci;
+      const struct cell *cj = t->cj;
 
-      /* Sort-task? */
-      if (t->type == task_type_sort) {
+      /* Too much particle movement? */
+      if (t->tight &&
+          (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
+           ci->dx_max > space_maxreldx * ci->h_max ||
+           cj->dx_max > space_maxreldx * cj->h_max))
+        *rebuild_space = 1;
 
-        /* Re-set the flags. */
-        t->flags = 0;
-        t->skip = 1;
+      /* Set this task's skip. */
+      if ((t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end)) == 1)
+        continue;
 
+      /* Set the sort flags. */
+      if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
+        if (!(ci->sorted & (1 << t->flags))) {
+          atomic_or(&ci->sorts->flags, (1 << t->flags));
+          ci->sorts->skip = 0;
+        }
+        if (!(cj->sorted & (1 << t->flags))) {
+          atomic_or(&cj->sorts->flags, (1 << t->flags));
+          cj->sorts->skip = 0;
+        }
       }
 
-      /* Send/recv-task? */
-      else if (t->type == task_type_send || t->type == task_type_recv) {
-        t->skip = 1;
+#ifdef WITH_MPI
+
+      /* Activate the send/recv flags. */
+      if (ci->nodeID != engine_rank) {
+
+        /* Activate the tasks to recv foreign cell ci's data. */
+        ci->recv_xv->skip = 0;
+        ci->recv_rho->skip = 0;
+        ci->recv_ti->skip = 0;
+
+        /* Look for the local cell cj's send tasks. */
+        struct link *l = NULL;
+        for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_xv task.");
+        l->t->skip = 0;
+
+        for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_rho task.");
+        l->t->skip = 0;
+
+        for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_ti task.");
+        l->t->skip = 0;
+
+      } else if (cj->nodeID != engine_rank) {
+
+        /* Activate the tasks to recv foreign cell cj's data. */
+        cj->recv_xv->skip = 0;
+        cj->recv_rho->skip = 0;
+        cj->recv_ti->skip = 0;
+        /* Look for the local cell ci's send tasks. */
+        struct link *l = NULL;
+        for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_xv task.");
+        l->t->skip = 0;
+
+        for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_rho task.");
+        l->t->skip = 0;
+
+        for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_ti task.");
+        l->t->skip = 0;
       }
+
+#endif
     }
 
-    /* Run through the tasks and mark as skip or not. */
-    for (int k = 0; k < nr_tasks; k++) {
+    /* Kick? */
+    else if (t->type == task_type_kick) {
+      t->skip = (t->ci->ti_end_min > ti_end);
+      t->ci->updated = 0;
+      t->ci->g_updated = 0;
+    }
 
-      /* Get a handle on the kth task. */
-      struct task *t = &tasks[k];
+    /* Init? */
+    else if (t->type == task_type_init) {
+      /* Set this task's skip. */
+      t->skip = (t->ci->ti_end_min > ti_end);
+    }
 
-      /* Skip sorts, sends, and recvs. */
-      if (t->type == task_type_sort || t->type == task_type_send ||
-          t->type == task_type_recv) {
-        continue;
-      }
+    /* None? */
+    else if (t->type == task_type_none)
+      t->skip = 1;
+  }
+}
 
-      /* Single-cell task? */
-      else if (t->type == task_type_self || t->type == task_type_ghost ||
-               t->type == task_type_sub_self) {
+/**
+ * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ *
+ * @return 1 if the space has to be rebuilt, 0 otherwise.
+ */
+int engine_marktasks(struct engine *e) {
 
-        /* Set this task's skip. */
-        t->skip = (t->ci->ti_end_min > ti_end);
-      }
+  struct scheduler *s = &e->sched;
+  const ticks tic = getticks();
+  int rebuild_space = 0;
 
-      /* Pair? */
-      else if (t->type == task_type_pair || t->type == task_type_sub_pair) {
-
-        /* Local pointers. */
-        const struct cell *ci = t->ci;
-        const struct cell *cj = t->cj;
-
-        /* Too much particle movement? */
-        if (t->tight &&
-            (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
-             ci->dx_max > space_maxreldx * ci->h_max ||
-             cj->dx_max > space_maxreldx * cj->h_max))
-          return 1;
-
-        /* Set this task's skip. */
-        if ((t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end)) ==
-            1)
-          continue;
-
-        /* Set the sort flags. */
-        if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
-          if (!(ci->sorted & (1 << t->flags))) {
-            ci->sorts->flags |= (1 << t->flags);
-            ci->sorts->skip = 0;
-          }
-          if (!(cj->sorted & (1 << t->flags))) {
-            cj->sorts->flags |= (1 << t->flags);
-            cj->sorts->skip = 0;
-          }
-        }
+  /* Much less to do here if we're on a fixed time-step. */
+  if (e->policy & engine_policy_fixdt) {
 
-        /* Activate the send/recv flags. */
-        if (ci->nodeID != e->nodeID) {
-
-          /* Activate the tasks to recv foreign cell ci's data. */
-          ci->recv_xv->skip = 0;
-          ci->recv_rho->skip = 0;
-          ci->recv_ti->skip = 0;
-
-          /* Look for the local cell cj's send tasks. */
-          struct link *l = NULL;
-          for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID;
-               l = l->next)
-            ;
-          if (l == NULL) {
-            abort();
-            error("Missing link to send_xv task.");
-          }
-          l->t->skip = 0;
-
-          for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
-               l = l->next)
-            ;
-          if (l == NULL) error("Missing link to send_rho task.");
-          l->t->skip = 0;
-
-          for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
-               l = l->next)
-            ;
-          if (l == NULL) error("Missing link to send_ti task.");
-          l->t->skip = 0;
-
-        } else if (cj->nodeID != e->nodeID) {
-
-          /* Activate the tasks to recv foreign cell cj's data. */
-          cj->recv_xv->skip = 0;
-          cj->recv_rho->skip = 0;
-          cj->recv_ti->skip = 0;
-
-          /* Look for the local cell ci's send tasks. */
-          struct link *l = NULL;
-          for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID;
-               l = l->next)
-            ;
-          if (l == NULL) {
-            abort();
-            error("Missing link to send_xv task.");
-          }
-          l->t->skip = 0;
-
-          for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
-               l = l->next)
-            ;
-          if (l == NULL) error("Missing link to send_rho task.");
-          l->t->skip = 0;
-
-          for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
-               l = l->next)
-            ;
-          if (l == NULL) error("Missing link to send_ti task.");
-          l->t->skip = 0;
-        }
+    /* Run through the tasks and mark as skip or not. */
+    threadpool_map(&e->threadpool, engine_marktasks_fixdt_mapper, s->tasks,
+                   s->nr_tasks, sizeof(struct task), 1000, &rebuild_space);
+    return rebuild_space;
 
-      }
+    /* Multiple-timestep case */
+  } else {
 
-      /* Kick? */
-      else if (t->type == task_type_kick) {
-        t->skip = (t->ci->ti_end_min > ti_end);
-        t->ci->updated = 0;
-        t->ci->g_updated = 0;
-      }
+    /* Run through the tasks and mark as skip or not. */
+    int extra_data[2] = {e->ti_current, rebuild_space};
+    threadpool_map(&e->threadpool, engine_marktasks_sorts_mapper, s->tasks,
+                   s->nr_tasks, sizeof(struct task), 10000, NULL);
 
-      /* Drift? */
-      else if (t->type == task_type_drift)
-        t->skip = 0;
+#ifdef WITH_MPI
+    if (e->policy & engine_policy_mpi) {
 
-      /* Init? */
-      else if (t->type == task_type_init) {
-        /* Set this task's skip. */
-        t->skip = (t->ci->ti_end_min > ti_end);
+      /* Skip all sends and recvs, we will unmark if needed. */
+      for (int k = 0; k < s->nr_tasks; k++) {
+        struct task *t = &s->tasks[k];
+        if (t->type == task_type_send || t->type == task_type_recv) {
+          t->skip = 1;
+        }
       }
-
-      /* None? */
-      else if (t->type == task_type_none)
-        t->skip = 1;
     }
+#endif
+
+    threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks,
+                   s->nr_tasks, sizeof(struct task), 10000, extra_data);
+    rebuild_space = extra_data[1];
   }
 
   if (e->verbose)
@@ -1977,7 +2193,7 @@ int engine_marktasks(struct engine *e) {
             clocks_getunit());
 
   /* All is well... */
-  return 0;
+  return rebuild_space;
 }
 
 /**
@@ -2008,8 +2224,8 @@ void engine_print_task_counts(struct engine *e) {
     printf(" %s=%i", taskID_names[k], counts[k]);
   printf(" skipped=%i ]\n", counts[task_type_count]);
   fflush(stdout);
-  message("nr_parts = %zi.", e->s->nr_parts);
-  message("nr_gparts = %zi.", e->s->nr_gparts);
+  message("nr_parts = %zu.", e->s->nr_parts);
+  message("nr_gparts = %zu.", e->s->nr_gparts);
 }
 
 /**
@@ -2051,8 +2267,11 @@ void engine_rebuild(struct engine *e) {
  * @brief Prepare the #engine by re-building the cells and tasks.
  *
  * @param e The #engine to prepare.
+ * @param nodrift Whether to drift particles before rebuilding or not. Will
+ *                not be necessary if all particles have already been
+ *                drifted (before repartitioning for instance).
  */
-void engine_prepare(struct engine *e) {
+void engine_prepare(struct engine *e, int nodrift) {
 
   TIMER_TIC;
 
@@ -2068,22 +2287,32 @@ void engine_prepare(struct engine *e) {
   rebuild = buff;
 #endif
 
-  /* Did this not go through? */
+  /* And rebuild if necessary. */
   if (rebuild) {
+
+    /* Drift all particles to the current time if needed. */
+    if (!nodrift) {
+      e->drift_all = 1;
+      engine_drift(e);
+
+      /* Restore the default drifting policy */
+      e->drift_all = (e->policy & engine_policy_drift_all);
+    }
+
     engine_rebuild(e);
   }
 
   /* Re-rank the tasks every now and then. */
   if (e->tasks_age % engine_tasksreweight == 1) {
-    scheduler_reweight(&e->sched);
+    scheduler_reweight(&e->sched, e->verbose);
   }
   e->tasks_age += 1;
 
   TIMER_TOC(timer_prepare);
 
   if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
+    message("took %.3f %s (including marktask, rebuild and reweight).",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
 }
 
 /**
@@ -2175,14 +2404,15 @@ void engine_collect_kick(struct cell *c) {
  */
 void engine_collect_timestep(struct engine *e) {
 
+  const ticks tic = getticks();
   int updates = 0, g_updates = 0;
   int ti_end_min = max_nr_timesteps;
   const struct space *s = e->s;
 
   /* Collect the cell data. */
   for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells[k];
+    if (s->cells_top[k].nodeID == e->nodeID) {
+      struct cell *c = &s->cells_top[k];
 
       /* Make the top-cells recurse */
       engine_collect_kick(c);
@@ -2219,64 +2449,10 @@ void engine_collect_timestep(struct engine *e) {
   e->ti_end_min = ti_end_min;
   e->updates = updates;
   e->g_updates = g_updates;
-}
 
-/**
- * @brief Mapping function to collect the data from the drift.
- *
- * @param c A super-cell.
- */
-void engine_collect_drift(struct cell *c) {
-
-  /* Skip super-cells (Their values are already set) */
-  if (c->drift != NULL) return;
-
-  /* Counters for the different quantities. */
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, entropy = 0.0, mass = 0.0;
-  double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0};
-
-  /* Only do something is the cell is non-empty */
-  if (c->count != 0 || c->gcount != 0) {
-
-    /* If this cell is not split, I'm in trouble. */
-    if (!c->split) error("Cell has no super-cell.");
-
-    /* Collect the values from the progeny. */
-    for (int k = 0; k < 8; k++) {
-      struct cell *cp = c->progeny[k];
-      if (cp != NULL) {
-
-        /* Recurse */
-        engine_collect_drift(cp);
-
-        /* And update */
-        mass += cp->mass;
-        e_kin += cp->e_kin;
-        e_int += cp->e_int;
-        e_pot += cp->e_pot;
-        entropy += cp->entropy;
-        mom[0] += cp->mom[0];
-        mom[1] += cp->mom[1];
-        mom[2] += cp->mom[2];
-        ang_mom[0] += cp->ang_mom[0];
-        ang_mom[1] += cp->ang_mom[1];
-        ang_mom[2] += cp->ang_mom[2];
-      }
-    }
-  }
-
-  /* Store the collected values in the cell. */
-  c->mass = mass;
-  c->e_kin = e_kin;
-  c->e_int = e_int;
-  c->e_pot = e_pot;
-  c->entropy = entropy;
-  c->mom[0] = mom[0];
-  c->mom[1] = mom[1];
-  c->mom[2] = mom[2];
-  c->ang_mom[0] = ang_mom[0];
-  c->ang_mom[1] = ang_mom[1];
-  c->ang_mom[2] = ang_mom[2];
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 }
 
 /**
@@ -2286,24 +2462,22 @@ void engine_collect_drift(struct cell *c) {
  */
 void engine_print_stats(struct engine *e) {
 
+  const ticks tic = getticks();
   const struct space *s = e->s;
 
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, entropy = 0.0, mass = 0.0;
+  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0;
+  double entropy = 0.0, mass = 0.0;
   double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0};
 
   /* Collect the cell data. */
   for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells[k];
-
-      /* Make the top-cells recurse */
-      engine_collect_drift(c);
-
-      /* And aggregate */
+    if (s->cells_top[k].nodeID == e->nodeID) {
+      struct cell *c = &s->cells_top[k];
       mass += c->mass;
       e_kin += c->e_kin;
       e_int += c->e_int;
       e_pot += c->e_pot;
+      e_rad += c->e_rad;
       entropy += c->entropy;
       mom[0] += c->mom[0];
       mom[1] += c->mom[1];
@@ -2316,33 +2490,35 @@ void engine_print_stats(struct engine *e) {
 /* Aggregate the data from the different nodes. */
 #ifdef WITH_MPI
   {
-    double in[11] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.};
-    double out[11];
+    double in[12] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.};
+    double out[12];
     out[0] = e_kin;
     out[1] = e_int;
     out[2] = e_pot;
-    out[3] = mom[0];
-    out[4] = mom[1];
-    out[5] = mom[2];
-    out[6] = ang_mom[0];
-    out[7] = ang_mom[1];
-    out[8] = ang_mom[2];
-    out[9] = mass;
-    out[10] = entropy;
+    out[3] = e_rad;
+    out[4] = mom[0];
+    out[5] = mom[1];
+    out[6] = mom[2];
+    out[7] = ang_mom[0];
+    out[8] = ang_mom[1];
+    out[9] = ang_mom[2];
+    out[10] = mass;
+    out[11] = entropy;
     if (MPI_Reduce(out, in, 11, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD) !=
         MPI_SUCCESS)
       error("Failed to aggregate stats.");
     e_kin = out[0];
     e_int = out[1];
     e_pot = out[2];
-    mom[0] = out[3];
-    mom[1] = out[4];
-    mom[2] = out[5];
-    ang_mom[0] = out[6];
-    ang_mom[1] = out[7];
-    ang_mom[2] = out[8];
-    mass = out[9];
-    entropy = out[10];
+    e_rad = out[3];
+    mom[0] = out[4];
+    mom[1] = out[5];
+    mom[2] = out[6];
+    ang_mom[0] = out[7];
+    ang_mom[1] = out[8];
+    ang_mom[2] = out[9];
+    mass = out[10];
+    entropy = out[11];
   }
 #endif
 
@@ -2350,13 +2526,17 @@ void engine_print_stats(struct engine *e) {
 
   /* Print info */
   if (e->nodeID == 0) {
-    fprintf(
-        e->file_stats,
-        " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e\n",
-        e->time, mass, e_tot, e_kin, e_int, e_pot, entropy, mom[0], mom[1],
-        mom[2], ang_mom[0], ang_mom[1], ang_mom[2]);
+    fprintf(e->file_stats,
+            " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e "
+            "%14e\n",
+            e->time, mass, e_tot, e_kin, e_int, e_pot, e_rad, entropy, mom[0],
+            mom[1], mom[2], ang_mom[0], ang_mom[1], ang_mom[2]);
     fflush(e->file_stats);
   }
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 }
 
 /**
@@ -2370,6 +2550,8 @@ void engine_print_stats(struct engine *e) {
 void engine_launch(struct engine *e, int nr_runners, unsigned int mask,
                    unsigned int submask) {
 
+  const ticks tic = getticks();
+
   /* Prepare the scheduler. */
   atomic_inc(&e->sched.waiting);
 
@@ -2394,6 +2576,10 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask,
   while (e->barrier_launch || e->barrier_running)
     if (pthread_cond_wait(&e->barrier_cond, &e->barrier_mutex) != 0)
       error("Error while waiting for barrier.");
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 }
 
 /**
@@ -2413,7 +2599,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
 
   if (e->nodeID == 0) message("Running initialisation fake time-step.");
 
-  engine_prepare(e);
+  engine_prepare(e, 1);
 
   engine_marktasks(e);
 
@@ -2479,6 +2665,8 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
   /* Ready to go */
   e->step = -1;
   e->wallclock_time = (float)clocks_diff(&time1, &time2);
+
+  if (e->verbose) message("took %.3f %s.", e->wallclock_time, clocks_getunit());
 }
 
 /**
@@ -2511,7 +2699,11 @@ void engine_step(struct engine *e) {
     snapshot_drift_time = e->timeStep;
 
     /* Drift everybody to the snapshot position */
-    engine_launch(e, e->nr_threads, 1 << task_type_drift, 0);
+    e->drift_all = 1;
+    engine_drift(e);
+
+    /* Restore the default drifting policy */
+    e->drift_all = (e->policy & engine_policy_drift_all);
 
     /* Dump... */
     engine_dump_snapshot(e);
@@ -2528,17 +2720,14 @@ void engine_step(struct engine *e) {
   e->timeOld = e->ti_old * e->timeBase + e->timeBegin;
   e->timeStep = (e->ti_current - e->ti_old) * e->timeBase + snapshot_drift_time;
 
-  /* Drift everybody */
-  engine_launch(e, e->nr_threads, 1 << task_type_drift, 0);
-
   if (e->nodeID == 0) {
 
     /* Print some information to the screen */
-    printf("  %6d %14e %14e %10zd %10zd %21.3f\n", e->step, e->time,
+    printf("  %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time,
            e->timeStep, e->updates, e->g_updates, e->wallclock_time);
     fflush(stdout);
 
-    fprintf(e->file_timesteps, "  %6d %14e %14e %10zd %10zd %21.3f\n", e->step,
+    fprintf(e->file_timesteps, "  %6d %14e %14e %10zu %10zu %21.3f\n", e->step,
             e->time, e->timeStep, e->updates, e->g_updates, e->wallclock_time);
     fflush(e->file_timesteps);
   }
@@ -2549,11 +2738,20 @@ void engine_step(struct engine *e) {
     e->timeLastStatistics += e->deltaTimeStatistics;
   }
 
+  /* Drift only the necessary particles, that all means all particles
+   * if we are about to repartition. */
+  int repart = (e->forcerepart != REPART_NONE);
+  e->drift_all = repart || e->drift_all;
+  engine_drift(e);
+
   /* Re-distribute the particles amongst the nodes? */
-  if (e->forcerepart != REPART_NONE) engine_repartition(e);
+  if (repart) engine_repartition(e);
 
   /* Prepare the space. */
-  engine_prepare(e);
+  engine_prepare(e, e->drift_all);
+
+  /* Restore the default drifting policy */
+  e->drift_all = (e->policy & engine_policy_drift_all);
 
   /* Build the masks corresponding to the policy */
   unsigned int mask = 0, submask = 0;
@@ -2580,6 +2778,11 @@ void engine_step(struct engine *e) {
 
     submask |= 1 << task_subtype_density;
     submask |= 1 << task_subtype_force;
+
+#ifdef EXTRA_HYDRO_LOOP
+    mask |= 1 << task_type_extra_ghost;
+    submask |= 1 << task_subtype_gradient;
+#endif
   }
 
   /* Add the tasks corresponding to self-gravity to the masks */
@@ -2602,6 +2805,11 @@ void engine_step(struct engine *e) {
     mask |= 1 << task_type_grav_external;
   }
 
+  /* Add the tasks corresponding to cooling to the masks */
+  if (e->policy & engine_policy_cooling) {
+    mask |= 1 << task_type_cooling;
+  }
+
   /* Add MPI tasks if need be */
   if (e->policy & engine_policy_mpi) {
 
@@ -2630,6 +2838,21 @@ int engine_is_done(struct engine *e) {
   return !(e->ti_current < max_nr_timesteps);
 }
 
+/**
+ * @brief Drift particles using the current engine drift policy.
+ *
+ * @param e The #engine.
+ */
+void engine_drift(struct engine *e) {
+
+  const ticks tic = getticks();
+  threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
+                 e->s->nr_cells, sizeof(struct cell), 1, e);
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
 /**
  * @brief Create and fill the proxies.
  *
@@ -2640,7 +2863,7 @@ void engine_makeproxies(struct engine *e) {
 #ifdef WITH_MPI
   const int *cdim = e->s->cdim;
   const struct space *s = e->s;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   struct proxy *proxies = e->proxies;
   ticks tic = getticks();
 
@@ -2753,7 +2976,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
 
   /* Re-allocate the local parts. */
   if (e->verbose)
-    message("Re-allocating parts array from %zi to %zi.", s->size_parts,
+    message("Re-allocating parts array from %zu to %zu.", s->size_parts,
             (size_t)(s->nr_parts * 1.2));
   s->size_parts = s->nr_parts * 1.2;
   struct part *parts_new = NULL;
@@ -2771,11 +2994,12 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   s->xparts = xparts_new;
 
   /* Re-link the gparts. */
-  part_relink_gparts(s->parts, s->nr_parts, 0);
+  if (s->nr_parts > 0 && s->nr_gparts > 0)
+    part_relink_gparts(s->parts, s->nr_parts, 0);
 
   /* Re-allocate the local gparts. */
   if (e->verbose)
-    message("Re-allocating gparts array from %zi to %zi.", s->size_gparts,
+    message("Re-allocating gparts array from %zu to %zu.", s->size_gparts,
             (size_t)(s->nr_gparts * 1.2));
   s->size_gparts = s->nr_gparts * 1.2;
   struct gpart *gparts_new = NULL;
@@ -2787,9 +3011,11 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   s->gparts = gparts_new;
 
   /* Re-link the parts. */
-  part_relink_parts(s->gparts, s->nr_gparts, s->parts);
+  if (s->nr_parts > 0 && s->nr_gparts > 0)
+    part_relink_parts(s->gparts, s->nr_gparts, s->parts);
 
 #ifdef SWIFT_DEBUG_CHECKS
+
   /* Verify that the links are correct */
   for (size_t k = 0; k < s->nr_gparts; ++k) {
 
@@ -2806,9 +3032,11 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   }
   for (size_t k = 0; k < s->nr_parts; ++k) {
 
-    if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k)
+    if (s->parts[k].gpart != NULL &&
+        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k)
       error("Linking problem !");
   }
+
 #endif
 
 #else
@@ -2857,6 +3085,7 @@ void engine_dump_snapshot(struct engine *e) {
 static cpu_set_t *engine_entry_affinity() {
 
   static int use_entry_affinity = 0;
+  static cpu_set_t entry_affinity;
 
   if (!use_entry_affinity) {
     pthread_t engine = pthread_self();
@@ -2897,7 +3126,8 @@ void engine_pin() {
 void engine_unpin() {
 #ifdef HAVE_SETAFFINITY
   pthread_t main_thread = pthread_self();
-  pthread_setaffinity_np(main_thread, sizeof(entry_affinity), &entry_affinity);
+  cpu_set_t *entry_affinity = engine_entry_affinity();
+  pthread_setaffinity_np(main_thread, sizeof(*entry_affinity), entry_affinity);
 #else
   error("SWIFT was not compiled with support for pinning.");
 #endif
@@ -2920,6 +3150,7 @@ void engine_unpin() {
  * @param physical_constants The #phys_const used for this run.
  * @param hydro The #hydro_props used for this run.
  * @param potential The properties of the external potential.
+ * @param cooling_func The properties of the cooling function.
  */
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
@@ -2927,7 +3158,8 @@ void engine_init(struct engine *e, struct space *s,
                  const struct UnitSystem *internal_units,
                  const struct phys_const *physical_constants,
                  const struct hydro_props *hydro,
-                 const struct external_potential *potential) {
+                 const struct external_potential *potential,
+                 const struct cooling_function_data *cooling_func) {
 
   /* Clean-up everything */
   bzero(e, sizeof(struct engine));
@@ -2954,6 +3186,7 @@ void engine_init(struct engine *e, struct space *s,
   e->timeStep = 0.;
   e->timeBase = 0.;
   e->timeBase_inv = 0.;
+  e->drift_all = (policy & engine_policy_drift_all);
   e->internalUnits = internal_units;
   e->timeFirstSnapshot =
       parser_get_param_double(params, "Snapshots:time_first");
@@ -2961,6 +3194,8 @@ void engine_init(struct engine *e, struct space *s,
       parser_get_param_double(params, "Snapshots:delta_time");
   e->ti_nextSnapshot = 0;
   parser_get_param_string(params, "Snapshots:basename", e->snapshotBaseName);
+  e->snapshotCompression =
+      parser_get_opt_param_int(params, "Snapshots:compression", 0);
   e->snapshotUnits = malloc(sizeof(struct UnitSystem));
   units_init_default(e->snapshotUnits, params, "Snapshots", internal_units);
   e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min");
@@ -2976,6 +3211,7 @@ void engine_init(struct engine *e, struct space *s,
   e->physical_constants = physical_constants;
   e->hydro_properties = hydro;
   e->external_potential = potential;
+  e->cooling_func = cooling_func;
   e->parameter_file = params;
   engine_rank = nodeID;
 
@@ -3126,11 +3362,11 @@ void engine_init(struct engine *e, struct space *s,
                                 engine_default_energy_file_name);
     sprintf(energyfileName + strlen(energyfileName), ".txt");
     e->file_stats = fopen(energyfileName, "w");
-    fprintf(
-        e->file_stats,
-        "#%14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s\n",
-        "Time", "Mass", "E_tot", "E_kin", "E_int", "E_pot", "Entropy", "p_x",
-        "p_y", "p_z", "ang_x", "ang_y", "ang_z");
+    fprintf(e->file_stats,
+            "#%14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
+            "%14s\n",
+            "Time", "Mass", "E_tot", "E_kin", "E_int", "E_pot", "E_radcool",
+            "Entropy", "p_x", "p_y", "p_z", "ang_x", "ang_y", "ang_z");
     fflush(e->file_stats);
 
     char timestepsfileName[200] = "";
@@ -3240,6 +3476,9 @@ void engine_init(struct engine *e, struct space *s,
   part_create_mpi_types();
 #endif
 
+  /* Initialize the threadpool. */
+  threadpool_init(&e->threadpool, e->nr_threads);
+
   /* First of all, init the barrier and lock it. */
   if (pthread_mutex_init(&e->barrier_mutex, NULL) != 0)
     error("Failed to initialize barrier mutex.");
@@ -3254,18 +3493,7 @@ void engine_init(struct engine *e, struct space *s,
   /* Init the scheduler with enough tasks for the initial sorting tasks. */
   const int nr_tasks = 2 * s->tot_cells + 2 * e->nr_threads;
   scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, scheduler_flag_steal,
-                 e->nodeID);
-
-  /* Create the sorting tasks. */
-  for (int i = 0; i < e->nr_threads; i++) {
-    scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0,
-                      NULL, NULL, 0);
-
-    scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0,
-                      NULL, NULL, 0);
-  }
-
-  scheduler_ranktasks(&e->sched);
+                 e->nodeID, &e->threadpool);
 
   /* Allocate and init the threads. */
   if ((e->runners = (struct runner *)malloc(sizeof(struct runner) *
@@ -3398,4 +3626,5 @@ void engine_clean(struct engine *e) {
   free(e->links);
   scheduler_clean(&e->sched);
   space_clean(e->s);
+  threadpool_clean(&e->threadpool);
 }
diff --git a/src/engine.h b/src/engine.h
index d708198c32b67c5118bbd7f4676f1ea0fe821c7d..d36914af611723bc4d496d5c2dc68050eea6ffe6 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -38,9 +38,10 @@
 
 /* Includes. */
 #include "clocks.h"
+#include "cooling_struct.h"
 #include "parser.h"
 #include "partition.h"
-#include "potentials.h"
+#include "potential.h"
 #include "runner.h"
 #include "scheduler.h"
 #include "space.h"
@@ -61,7 +62,9 @@ enum engine_policy {
   engine_policy_hydro = (1 << 8),
   engine_policy_self_gravity = (1 << 9),
   engine_policy_external_gravity = (1 << 10),
-  engine_policy_cosmology = (1 << 11)
+  engine_policy_cosmology = (1 << 11),
+  engine_policy_drift_all = (1 << 12),
+  engine_policy_cooling = (1 << 13),
 };
 
 extern const char *engine_policy_names[];
@@ -81,16 +84,6 @@ extern int engine_rank;
 /* The maximal number of timesteps in a simulation */
 #define max_nr_timesteps (1 << 28)
 
-/* Mini struct to link cells to density/force tasks. */
-struct link {
-
-  /* The task pointer. */
-  struct task *t;
-
-  /* The next pointer. */
-  struct link *next;
-};
-
 /* Data structure for the engine. */
 struct engine {
 
@@ -109,6 +102,9 @@ struct engine {
   /* The task scheduler. */
   struct scheduler sched;
 
+  /* Common threadpool for all the engine's tasks. */
+  struct threadpool threadpool;
+
   /* The minimum and maximum allowed dt */
   double dt_min, dt_max;
 
@@ -136,6 +132,9 @@ struct engine {
   /* Minimal ti_end for the next time-step */
   int ti_end_min;
 
+  /* Are we drifting all particles now ? */
+  int drift_all;
+
   /* Number of particles updated */
   size_t updates, g_updates;
 
@@ -147,6 +146,7 @@ struct engine {
   double deltaTimeSnapshot;
   int ti_nextSnapshot;
   char snapshotBaseName[200];
+  int snapshotCompression;
   struct UnitSystem *snapshotUnits;
 
   /* Statistics information */
@@ -204,6 +204,9 @@ struct engine {
   /* Properties of external gravitational potential */
   const struct external_potential *external_potential;
 
+  /* Properties of the cooling scheme */
+  const struct cooling_function_data *cooling_func;
+
   /* The (parsed) parameter file */
   const struct swift_params *parameter_file;
 };
@@ -211,6 +214,7 @@ struct engine {
 /* Function prototypes. */
 void engine_barrier(struct engine *e, int tid);
 void engine_compute_next_snapshot_time(struct engine *e);
+void engine_drift(struct engine *e);
 void engine_dump_snapshot(struct engine *e);
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
@@ -218,10 +222,11 @@ void engine_init(struct engine *e, struct space *s,
                  const struct UnitSystem *internal_units,
                  const struct phys_const *physical_constants,
                  const struct hydro_props *hydro,
-                 const struct external_potential *potential);
+                 const struct external_potential *potential,
+                 const struct cooling_function_data *cooling);
 void engine_launch(struct engine *e, int nr_runners, unsigned int mask,
                    unsigned int submask);
-void engine_prepare(struct engine *e);
+void engine_prepare(struct engine *e, int nodrift);
 void engine_print(struct engine *e);
 void engine_init_particles(struct engine *e, int flag_entropy_ICs);
 void engine_step(struct engine *e);
@@ -234,7 +239,6 @@ void engine_rebuild(struct engine *e);
 void engine_repartition(struct engine *e);
 void engine_makeproxies(struct engine *e);
 void engine_redistribute(struct engine *e);
-struct link *engine_addlink(struct engine *e, struct link *l, struct task *t);
 void engine_print_policy(struct engine *e);
 int engine_is_done(struct engine *e);
 void engine_pin();
diff --git a/src/equation_of_state.h b/src/equation_of_state.h
index b4a36e8a3ef0bcc281d1f939f89fde08ecf00be9..af59d8a2cad1632c67b6d377b5ed9dfe9484b4aa 100644
--- a/src/equation_of_state.h
+++ b/src/equation_of_state.h
@@ -132,66 +132,93 @@ gas_soundspeed_from_internal_energy(float density, float u) {
 /**
  * @brief Returns the internal energy given density and entropy
  *
- * @param density The density
- * @param entropy The entropy
+ * Since we are using an isothermal EoS, the entropy value is ignored
+ * Computes \f$u = u_{cst}\f$.
+ *
+ * @param density The density \f$\rho\f$.
+ * @param entropy The entropy \f$S\f$.
  */
 __attribute__((always_inline)) INLINE static float
 gas_internal_energy_from_entropy(float density, float entropy) {
 
-  error("Missing definition !");
-  return 0.f;
+  return const_isothermal_internal_energy;
 }
-
 /**
  * @brief Returns the pressure given density and entropy
  *
- * @param density The density
- * @param entropy The entropy
+ * Since we are using an isothermal EoS, the entropy value is ignored
+ * Computes \f$P = (\gamma - 1)u_{cst}\rho\f$.
+ *
+ * @param density The density \f$\rho\f$.
+ * @param entropy The entropy \f$S\f$.
  */
 __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy(
     float density, float entropy) {
 
-  error("Missing definition !");
-  return 0.f;
+  return hydro_gamma_minus_one * const_isothermal_internal_energy * density;
+}
+
+/**
+ * @brief Returns the sound speed given density and entropy
+ *
+ * Since we are using an isothermal EoS, the entropy value is ignored
+ * Computes \f$c = \sqrt{u_{cst} \gamma \rho^{\gamma-1}}\f$.
+ *
+ * @param density The density \f$\rho\f$.
+ * @param entropy The entropy \f$S\f$.
+ */
+__attribute__((always_inline)) INLINE static float gas_soundspeed_from_entropy(
+    float density, float entropy) {
+
+  return sqrtf(const_isothermal_internal_energy * hydro_gamma *
+               hydro_gamma_minus_one);
 }
 
 /**
  * @brief Returns the entropy given density and internal energy
  *
- * @param density The density
- * @param u The internal energy
+ * Since we are using an isothermal EoS, the energy value is ignored
+ * Computes \f$S = \frac{(\gamma - 1)u_{cst}}{\rho^{\gamma-1}}\f$.
+ *
+ * @param density The density \f$\rho\f$
+ * @param u The internal energy \f$u\f$
  */
 __attribute__((always_inline)) INLINE static float
 gas_entropy_from_internal_energy(float density, float u) {
 
-  error("Missing definition !");
-  return 0.f;
+  return hydro_gamma_minus_one * const_isothermal_internal_energy *
+         pow_minus_gamma_minus_one(density);
 }
 
 /**
  * @brief Returns the pressure given density and internal energy
  *
- * @param density The density
- * @param u The internal energy
+ * Since we are using an isothermal EoS, the energy value is ignored
+ * Computes \f$P = (\gamma - 1)u_{cst}\rho\f$.
+ *
+ * @param density The density \f$\rho\f$
+ * @param u The internal energy \f$u\f$
  */
 __attribute__((always_inline)) INLINE static float
 gas_pressure_from_internal_energy(float density, float u) {
 
-  error("Missing definition !");
-  return 0.f;
+  return hydro_gamma_minus_one * const_isothermal_internal_energy * density;
 }
 
 /**
  * @brief Returns the sound speed given density and internal energy
  *
- * @param density The density
- * @param u The internal energy
+ * Since we are using an isothermal EoS, the energy value is ignored
+ * Computes \f$c = \sqrt{u_{cst} \gamma \rho^{\gamma-1}}\f$.
+ *
+ * @param density The density \f$\rho\f$
+ * @param u The internal energy \f$u\f$
  */
 __attribute__((always_inline)) INLINE static float
 gas_soundspeed_from_internal_energy(float density, float u) {
 
-  error("Missing definition !");
-  return 0.f;
+  return sqrtf(const_isothermal_internal_energy * hydro_gamma *
+               hydro_gamma_minus_one);
 }
 
 /* ------------------------------------------------------------------------- */
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index f9b67c96331e7572b0200093f0c32eee5d2391cd..9e0ca81edff06b8a32afb185f24a88b41dc87da7 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -21,46 +21,15 @@
 #define SWIFT_DEFAULT_GRAVITY_H
 
 #include <float.h>
-#include "potentials.h"
-
-/**
- * @brief Computes the gravity time-step of a given particle due to an external
- *potential.
- *
- * This function only branches towards the potential chosen by the user.
- *
- * @param potential The properties of the external potential.
- * @param phys_const The physical constants in internal units.
- * @param gp Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static float
-gravity_compute_timestep_external(const struct external_potential* potential,
-                                  const struct phys_const* const phys_const,
-                                  const struct gpart* const gp) {
-
-  float dt = FLT_MAX;
-
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-  dt =
-      fminf(dt, external_gravity_pointmass_timestep(potential, phys_const, gp));
-#endif
-#ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-  dt = fminf(dt, external_gravity_isothermalpotential_timestep(potential,
-                                                               phys_const, gp));
-#endif
-
-  return dt;
-}
+#include "minmax.h"
 
 /**
  * @brief Computes the gravity time-step of a given particle due to self-gravity
  *
- * @param phys_const The physical constants in internal units.
  * @param gp Pointer to the g-particle data.
  */
 __attribute__((always_inline)) INLINE static float
-gravity_compute_timestep_self(const struct phys_const* const phys_const,
-                              const struct gpart* const gp) {
+gravity_compute_timestep_self(const struct gpart* const gp) {
 
   const float ac2 = gp->a_grav[0] * gp->a_grav[0] +
                     gp->a_grav[1] * gp->a_grav[1] +
@@ -68,7 +37,7 @@ gravity_compute_timestep_self(const struct phys_const* const phys_const,
 
   const float ac = (ac2 > 0.f) ? sqrtf(ac2) : FLT_MIN;
 
-  const float dt = sqrt(2.f * const_gravity_eta * gp->epsilon / ac);
+  const float dt = sqrtf(2.f * const_gravity_eta * gp->epsilon / ac);
 
   return dt;
 }
@@ -112,10 +81,10 @@ __attribute__((always_inline)) INLINE static void gravity_init_gpart(
  * Multiplies the forces and accelerations by the appropiate constants
  *
  * @param gp The particle to act upon
- * @param const_G Newton's constant
+ * @param const_G Newton's constant in internal units
  */
 __attribute__((always_inline)) INLINE static void gravity_end_force(
-    struct gpart* gp, double const_G) {
+    struct gpart* gp, float const_G) {
 
   /* Let's get physical... */
   gp->a_grav[0] *= const_G;
@@ -123,27 +92,6 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
   gp->a_grav[2] *= const_G;
 }
 
-/**
- * @brief Computes the gravitational acceleration induced by external potentials
- *
- * This function only branches towards the potential chosen by the user.
- *
- * @param potential The properties of the external potential.
- * @param phys_const The physical constants in internal units.
- * @param gp The particle to act upon.
- */
-__attribute__((always_inline)) INLINE static void external_gravity(
-    const struct external_potential* potential,
-    const struct phys_const* const phys_const, struct gpart* gp) {
-
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-  external_gravity_pointmass(potential, phys_const, gp);
-#endif
-#ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-  external_gravity_isothermalpotential(potential, phys_const, gp);
-#endif
-}
-
 /**
  * @brief Kick the additional variables
  *
diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h
index 1850ff0a1644d3593f78f150646eae8b2f074e1e..f06e65e5b30ebcd609c0c6204de33da17b770add 100644
--- a/src/gravity/Default/gravity_part.h
+++ b/src/gravity/Default/gravity_part.h
@@ -53,6 +53,6 @@ struct gpart {
      which this gpart is linked. */
   long long id_or_neg_offset;
 
-} __attribute__((aligned(gpart_align)));
+} SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_DEFAULT_GRAVITY_PART_H */
diff --git a/src/hydro.h b/src/hydro.h
index 8f626050929ea01234a33d860a0ca035b28a9f6e..9e02c2009e307f0623ffb535ff9068603c2d4147 100644
--- a/src/hydro.h
+++ b/src/hydro.h
@@ -42,6 +42,10 @@
 #include "./hydro/Default/hydro.h"
 #include "./hydro/Default/hydro_iact.h"
 #define SPH_IMPLEMENTATION "Default version of SPH"
+#elif defined(GIZMO_SPH)
+#include "./hydro/Gizmo/hydro.h"
+#include "./hydro/Gizmo/hydro_iact.h"
+#define SPH_IMPLEMENTATION "GIZMO (Hopkins 2015)"
 #else
 #error "Invalid choice of SPH variant"
 #endif
diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h
index 021599cd2daf61ff35e5f29e3f13b2ad61c8947a..ccdd0cee32b9386eff54da655b75285b8e08a598 100644
--- a/src/hydro/Default/hydro.h
+++ b/src/hydro/Default/hydro.h
@@ -22,6 +22,7 @@
 #include "adiabatic_index.h"
 #include "approx_math.h"
 #include "equation_of_state.h"
+#include "minmax.h"
 
 #include <float.h>
 
@@ -73,6 +74,28 @@ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
   return p->force.soundspeed;
 }
 
+/**
+ * @brief Returns the density of a particle
+ *
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_density(
+    const struct part *restrict p) {
+
+  return p->rho;
+}
+
+/**
+ * @brief Returns the mass of a particle
+ *
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_mass(
+    const struct part *restrict p) {
+
+  return p->mass;
+}
+
 /**
  * @brief Modifies the thermal state of a particle to the imposed internal
  * energy
@@ -126,7 +149,7 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
       (p->force.u_dt != 0.0f) ? fabsf(const_max_u_change * p->u / p->force.u_dt)
                               : FLT_MAX;
 
-  return fminf(dt_cfl, dt_u_change);
+  return min(dt_cfl, dt_u_change);
 }
 
 /**
@@ -251,7 +274,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
   const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed);
 
   /* Viscosity source term */
-  const float S = fmaxf(-normDiv_v, 0.f);
+  const float S = max(-normDiv_v, 0.f);
 
   /* Compute the particle's viscosity parameter time derivative */
   const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau +
@@ -288,16 +311,31 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  *
  * @param p The particle
  * @param xp The extended data of the particle
+ * @param dt The drift time-step.
  * @param t0 The time at the start of the drift
  * @param t1 The time at the end of the drift
  * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, struct xpart *restrict xp, int t0, int t1,
-    double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp, float dt, int t0,
+    int t1, double timeBase) {
   float u, w;
 
-  const float dt = (t1 - t0) * timeBase;
+  const float h_inv = 1.f / p->h;
+
+  /* Predict smoothing length */
+  const float w1 = p->force.h_dt * h_inv * dt;
+  if (fabsf(w1) < 0.2f)
+    p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
+  else
+    p->h *= expf(w1);
+
+  /* Predict density */
+  const float w2 = -hydro_dimension * w1;
+  if (fabsf(w2) < 0.2f)
+    p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
+  else
+    p->rho *= expf(w2);
 
   /* Predict internal energy */
   w = p->force.u_dt / p->u * dt;
diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h
index 51fa7d07229f86918ef2d7019a9708110cef02e3..7b1c8c3b91ce917af46efc28f6001a4d47747e2a 100644
--- a/src/hydro/Default/hydro_iact.h
+++ b/src/hydro/Default/hydro_iact.h
@@ -395,7 +395,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
 
   /* Compute the relative velocity. (This is 0 if the particles move away from
    * each other and negative otherwise) */
-  omega_ij = fminf(dvdr, 0.f);
+  omega_ij = min(dvdr, 0.f);
 
   /* Compute signal velocity */
   v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij;
@@ -441,8 +441,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   pj->force.h_dt -= mi * dvdr / rhoi * wj_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
-  pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
+  pj->force.v_sig = max(pj->force.v_sig, v_sig);
 }
 
 /**
@@ -635,8 +635,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
     pj[k]->force.u_dt += pju_dt.f[k];
     pi[k]->force.h_dt -= pih_dt.f[k];
     pj[k]->force.h_dt -= pjh_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
-    pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
+    pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
     for (j = 0; j < 3; j++) {
       pi[k]->a_hydro[j] -= pia[j].f[k];
       pj[k]->a_hydro[j] += pja[j].f[k];
@@ -696,7 +696,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
 
   /* Compute the relative velocity. (This is 0 if the particles move away from
    * each other and negative otherwise) */
-  omega_ij = fminf(dvdr, 0.f);
+  omega_ij = min(dvdr, 0.f);
 
   /* Compute signal velocity */
   v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij;
@@ -737,7 +737,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.h_dt -= mj * dvdr / rhoj * wi_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
 /**
@@ -920,7 +920,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
   for (k = 0; k < VEC_SIZE; k++) {
     pi[k]->force.u_dt += piu_dt.f[k];
     pi[k]->force.h_dt -= pih_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
     for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
   }
 
diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h
index a2f4453dc69ed06ca4f315b6be29844c177d0435..c7464bcf338b1c5b81ffa91d92264c2bd35e9313 100644
--- a/src/hydro/Default/hydro_part.h
+++ b/src/hydro/Default/hydro_part.h
@@ -19,6 +19,8 @@
 #ifndef SWIFT_DEFAULT_HYDRO_PART_H
 #define SWIFT_DEFAULT_HYDRO_PART_H
 
+#include "cooling_struct.h"
+
 /* Extra particle data not needed during the SPH loops over neighbours. */
 struct xpart {
 
@@ -28,10 +30,15 @@ struct xpart {
   /* Velocity at the last full step. */
   float v_full[3];
 
+  /* Additional data used to record cooling information */
+  struct cooling_xpart_data cooling_data;
+
+  float u_full;
+
   /* Old density. */
   float omega;
 
-} __attribute__((aligned(xpart_align)));
+} SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
 struct part {
@@ -118,6 +125,6 @@ struct part {
   /* Pointer to corresponding gravity part. */
   struct gpart* gpart;
 
-} __attribute__((aligned(part_align)));
+} SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_DEFAULT_HYDRO_PART_H */
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index e9d626cb8c147c0cf4fa8d27f8bab31d2471beae..2b17505c36f345779b269c90758931a13f9b4e0d 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -19,7 +19,20 @@
 #ifndef SWIFT_GADGET2_HYDRO_H
 #define SWIFT_GADGET2_HYDRO_H
 
+/**
+ * @file Gadget2/hydro.h
+ * @brief SPH interaction functions following the Gadget-2 version of SPH.
+ *
+ * The interactions computed here are the ones presented in the Gadget-2 paper
+ * Springel, V., MNRAS, Volume 364, Issue 4, pp. 1105-1134.
+ * We use the same numerical coefficients as the Gadget-2 code. When used with
+ * the Spline-3 kernel, the results should be equivalent to the ones obtained
+ * with Gadget-2 up to the rounding errors and interactions missed by the
+ * Gadget-2 tree-code neighbours search.
+ */
+
 #include "adiabatic_index.h"
+#include "approx_math.h"
 #include "dimension.h"
 #include "equation_of_state.h"
 #include "hydro_properties.h"
@@ -77,6 +90,28 @@ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
   return p->force.soundspeed;
 }
 
+/**
+ * @brief Returns the density of a particle
+ *
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_density(
+    const struct part *restrict p) {
+
+  return p->rho;
+}
+
+/**
+ * @brief Returns the mass of a particle
+ *
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_mass(
+    const struct part *restrict p) {
+
+  return p->mass;
+}
+
 /**
  * @brief Modifies the thermal state of a particle to the imposed internal
  * energy
@@ -285,13 +320,30 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  *
  * @param p The particle
  * @param xp The extended data of the particle
+ * @param dt The drift time-step.
  * @param t0 The time at the start of the drift
  * @param t1 The time at the end of the drift
  * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, const struct xpart *restrict xp, int t0, int t1,
-    double timeBase) {
+    struct part *restrict p, const struct xpart *restrict xp, float dt, int t0,
+    int t1, double timeBase) {
+
+  const float h_inv = 1.f / p->h;
+
+  /* Predict smoothing length */
+  const float w1 = p->force.h_dt * h_inv * dt;
+  if (fabsf(w1) < 0.2f)
+    p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
+  else
+    p->h *= expf(w1);
+
+  /* Predict density */
+  const float w2 = -hydro_dimension * w1;
+  if (fabsf(w2) < 0.2f)
+    p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
+  else
+    p->rho *= expf(w2);
 
   /* Drift the pressure */
   const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase;
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index b9807b6332e08012d47ad63652377f4fe5337bf9..5ee8cd0370a970aa83cef3d1c8909d923c12ba24 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -21,18 +21,19 @@
 #define SWIFT_GADGET2_HYDRO_IACT_H
 
 /**
+ * @file Gadget2/hydro_iact.h
  * @brief SPH interaction functions following the Gadget-2 version of SPH.
  *
  * The interactions computed here are the ones presented in the Gadget-2 paper
- *and use the same
- * numerical coefficients as the Gadget-2 code. When used with the Spline-3
- *kernel, the results
- * should be equivalent to the ones obtained with Gadget-2 up to the rounding
- *errors and interactions
- * missed by the Gadget-2 tree-code neighbours search.
- *
+ * Springel, V., MNRAS, Volume 364, Issue 4, pp. 1105-1134.
+ * We use the same numerical coefficients as the Gadget-2 code. When used with
+ * the Spline-3 kernel, the results should be equivalent to the ones obtained
+ * with Gadget-2 up to the rounding errors and interactions missed by the
+ * Gadget-2 tree-code neighbours search.
  */
 
+#include "minmax.h"
+
 /**
  * @brief Density loop
  */
@@ -432,7 +433,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   const float balsara_j = pj->force.balsara;
 
   /* Are the particles moving towards each others ? */
-  const float omega_ij = fminf(dvdr, 0.f);
+  const float omega_ij = (dvdr < 0.f) ? dvdr : 0.f;
   const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */
 
   /* Signal velocity */
@@ -465,8 +466,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
-  pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig);
+  pi->force.v_sig = (pi->force.v_sig > v_sig) ? pi->force.v_sig : v_sig;
+  pj->force.v_sig = (pj->force.v_sig > v_sig) ? pj->force.v_sig : v_sig;
 
   /* Change in entropy */
   pi->entropy_dt += mj * visc_term * dvdr;
@@ -641,8 +642,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
     }
     pi[k]->force.h_dt -= pih_dt.f[k];
     pj[k]->force.h_dt -= pjh_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
-    pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
+    pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
     pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k];
     pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k];
   }
@@ -707,7 +708,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   const float balsara_j = pj->force.balsara;
 
   /* Are the particles moving towards each others ? */
-  const float omega_ij = fminf(dvdr, 0.f);
+  const float omega_ij = (dvdr < 0.f) ? dvdr : 0.f;
   const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */
 
   /* Signal velocity */
@@ -735,7 +736,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
+  pi->force.v_sig = (pi->force.v_sig > v_sig) ? pi->force.v_sig : v_sig;
 
   /* Change in entropy */
   pi->entropy_dt += mj * visc_term * dvdr;
@@ -900,7 +901,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
   for (k = 0; k < VEC_SIZE; k++) {
     for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
     pi[k]->force.h_dt -= pih_dt.f[k];
-    pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]);
+    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
     pi[k]->entropy_dt += entropy_dt.f[k];
   }
 
diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h
index 484792438d2717413c1ca8d4f429eac2e6d21b20..2c12a71ad6256d7372256b6e590790ee0ff4e22e 100644
--- a/src/hydro/Gadget2/hydro_part.h
+++ b/src/hydro/Gadget2/hydro_part.h
@@ -19,6 +19,20 @@
 #ifndef SWIFT_GADGET2_HYDRO_PART_H
 #define SWIFT_GADGET2_HYDRO_PART_H
 
+/**
+ * @file Gadget2/hydro_part.h
+ * @brief SPH interaction functions following the Gadget-2 version of SPH.
+ *
+ * The interactions computed here are the ones presented in the Gadget-2 paper
+ * Springel, V., MNRAS, Volume 364, Issue 4, pp. 1105-1134.
+ * We use the same numerical coefficients as the Gadget-2 code. When used with
+ * the Spline-3 kernel, the results should be equivalent to the ones obtained
+ * with Gadget-2 up to the rounding errors and interactions missed by the
+ * Gadget-2 tree-code neighbours search.
+ */
+
+#include "cooling_struct.h"
+
 /* Extra particle data not needed during the SPH loops over neighbours. */
 struct xpart {
 
@@ -28,7 +42,10 @@ struct xpart {
   /* Velocity at the last full step. */
   float v_full[3];
 
-} __attribute__((aligned(xpart_align)));
+  /* Additional data used to record cooling information */
+  struct cooling_xpart_data cooling_data;
+
+} SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
 struct part {
@@ -110,6 +127,6 @@ struct part {
   /* Pointer to corresponding gravity part. */
   struct gpart* gpart;
 
-} __attribute__((aligned(part_align)));
+} SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_GADGET2_HYDRO_PART_H */
diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h
index f69dc3f1798f014e895c4a63760805b1739cec94..37ce34e5910a033fab82dbc69839888a28c5ab12 100644
--- a/src/hydro/Gizmo/hydro.h
+++ b/src/hydro/Gizmo/hydro.h
@@ -17,17 +17,27 @@
  *
  ******************************************************************************/
 
+#include <float.h>
+#include "adiabatic_index.h"
+#include "approx_math.h"
+#include "equation_of_state.h"
+#include "hydro_gradients.h"
+#include "minmax.h"
+
 /**
  * @brief Computes the hydro time-step of a given particle
  *
- * @param p Pointer to the particle data
- * @param xp Pointer to the extended particle data
- *
+ * @param p Pointer to the particle data.
+ * @param xp Pointer to the extended particle data.
+ * @param hydro_properties Pointer to the hydro parameters.
  */
 __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
-    struct part* p, struct xpart* xp) {
+    const struct part* restrict p, const struct xpart* restrict xp,
+    const struct hydro_props* restrict hydro_properties) {
 
-  return const_cfl * p->h / fabs(p->timestepvars.vmax);
+  const float CFL_condition = hydro_properties->CFL_condition;
+
+  return CFL_condition * p->h / fabsf(p->timestepvars.vmax);
 }
 
 /**
@@ -36,39 +46,37 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
  * This function is called only once just after the ICs have been
  * read in to do some conversions.
  *
+ * In this case, we copy the particle velocities into the corresponding
+ * primitive variable field. We do this because the particle velocities in GIZMO
+ * can be independent of the actual fluid velocity. The latter is stored as a
+ * primitive variable and integrated using the linear momentum, a conserved
+ * variable.
+ *
  * @param p The particle to act upon
  * @param xp The extended particle data to act upon
  */
 __attribute__((always_inline)) INLINE static void hydro_first_init_part(
-    struct part* p, struct xpart* xp) {}
+    struct part* p, struct xpart* xp) {
+
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+
+  p->primitives.v[0] = p->v[0];
+  p->primitives.v[1] = p->v[1];
+  p->primitives.v[2] = p->v[2];
+}
 
 /**
  * @brief Prepares a particle for the volume calculation.
  *
+ * Simply makes sure all necessary variables are initialized to zero.
+ *
  * @param p The particle to act upon
  */
 __attribute__((always_inline)) INLINE static void hydro_init_part(
     struct part* p) {
 
-#ifdef SPH_GRADIENTS
-  /* use the old volumes to estimate new primitive variables to be used for the
-     gradient calculation */
-  if (p->conserved.mass) {
-    p->primitives.rho = p->conserved.mass / p->geometry.volume;
-    p->primitives.v[0] = p->conserved.momentum[0] / p->conserved.mass;
-    p->primitives.v[1] = p->conserved.momentum[1] / p->conserved.mass;
-    p->primitives.v[2] = p->conserved.momentum[2] / p->conserved.mass;
-    p->primitives.P =
-        (const_hydro_gamma - 1.) *
-        (p->conserved.energy -
-         0.5 * (p->conserved.momentum[0] * p->conserved.momentum[0] +
-                p->conserved.momentum[1] * p->conserved.momentum[1] +
-                p->conserved.momentum[2] * p->conserved.momentum[2]) /
-             p->conserved.mass) /
-        p->geometry.volume;
-  }
-#endif
-
   p->density.wcount = 0.0f;
   p->density.wcount_dh = 0.0f;
   p->geometry.volume = 0.0f;
@@ -81,540 +89,451 @@ __attribute__((always_inline)) INLINE static void hydro_init_part(
   p->geometry.matrix_E[2][0] = 0.0f;
   p->geometry.matrix_E[2][1] = 0.0f;
   p->geometry.matrix_E[2][2] = 0.0f;
-
-#ifdef SPH_GRADIENTS
-  p->primitives.gradients.rho[0] = 0.0f;
-  p->primitives.gradients.rho[1] = 0.0f;
-  p->primitives.gradients.rho[2] = 0.0f;
-
-  p->primitives.gradients.v[0][0] = 0.0f;
-  p->primitives.gradients.v[0][1] = 0.0f;
-  p->primitives.gradients.v[0][2] = 0.0f;
-
-  p->primitives.gradients.v[1][0] = 0.0f;
-  p->primitives.gradients.v[1][1] = 0.0f;
-  p->primitives.gradients.v[1][2] = 0.0f;
-
-  p->primitives.gradients.v[2][0] = 0.0f;
-  p->primitives.gradients.v[2][1] = 0.0f;
-  p->primitives.gradients.v[2][2] = 0.0f;
-
-  p->primitives.gradients.P[0] = 0.0f;
-  p->primitives.gradients.P[1] = 0.0f;
-  p->primitives.gradients.P[2] = 0.0f;
-
-  p->primitives.limiter.rho[0] = FLT_MAX;
-  p->primitives.limiter.rho[1] = -FLT_MAX;
-  p->primitives.limiter.v[0][0] = FLT_MAX;
-  p->primitives.limiter.v[0][1] = -FLT_MAX;
-  p->primitives.limiter.v[1][0] = FLT_MAX;
-  p->primitives.limiter.v[1][1] = -FLT_MAX;
-  p->primitives.limiter.v[2][0] = FLT_MAX;
-  p->primitives.limiter.v[2][1] = -FLT_MAX;
-  p->primitives.limiter.P[0] = FLT_MAX;
-  p->primitives.limiter.P[1] = -FLT_MAX;
-
-  p->primitives.limiter.maxr = -FLT_MAX;
-#endif
 }
 
 /**
- * @brief Finishes the density calculation.
+ * @brief Finishes the volume calculation.
  *
  * Multiplies the density and number of neighbours by the appropiate constants
- * and add the self-contribution term.
+ * and adds the self-contribution term. Calculates the volume and uses it to
+ * update the primitive variables (based on the conserved variables). The latter
+ * should only be done for active particles. This is okay, since this method is
+ * only called for active particles.
  *
- * @param p The particle to act upon
- */
-__attribute__((always_inline)) INLINE static void hydro_end_volume(
-    struct part* p) {
-
-  /* Some smoothing length multiples. */
-  const float h = p->h;
-  const float ih = 1.0f / h;
-
-  /* Final operation on the density. */
-  p->density.wcount =
-      (p->density.wcount + kernel_root) * (4.0f / 3.0 * M_PI * kernel_gamma3);
-  p->density.wcount_dh =
-      p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3);
-}
-
-/**
- * @brief Prepare a particle for the force calculation.
- *
- * Computes viscosity term, conduction term and smoothing length gradient terms.
+ * Multiplies the components of the matrix E with the appropriate constants and
+ * inverts it. Initializes the variables used during the gradient loop. This
+ * cannot be done in hydro_prepare_force, since that method is called for all
+ * particles, and not just the active ones. If we would initialize the
+ * variables there, gradients for passive particles would be zero, while we
+ * actually use the old gradients in the flux calculation between active and
+ * passive particles.
  *
- * @param p The particle to act upon
- * @param xp The extended particle data to act upon
+ * @param p The particle to act upon.
+ * @param The current physical time.
  */
-__attribute__((always_inline)) INLINE static void hydro_prepare_gradient(
-    struct part* p, struct xpart* xp) {
+__attribute__((always_inline)) INLINE static void hydro_end_density(
+    struct part* restrict p, float time) {
 
   /* Some smoothing length multiples. */
   const float h = p->h;
   const float ih = 1.0f / h;
-  const float ih2 = ih * ih;
 
-  float detE, volume;
-  float E[3][3];
-  GFLOAT m, momentum[3], energy;
+  /* Final operation on the density. */
+  p->density.wcount += kernel_root;
+  p->density.wcount *= kernel_norm;
 
-#ifndef THERMAL_ENERGY
-  GFLOAT momentum2;
-#endif
+  p->density.wcount_dh *= ih * kernel_gamma * kernel_norm;
 
-#if defined(SPH_GRADIENTS) && defined(SLOPE_LIMITER)
-  GFLOAT gradrho[3], gradv[3][3], gradP[3];
-  GFLOAT gradtrue, gradmax, gradmin, alpha;
-#endif
+  const float ihdim = pow_dimension(ih);
 
   /* Final operation on the geometry. */
   /* we multiply with the smoothing kernel normalization ih3 and calculate the
    * volume */
-  volume = ih * ih2 * (p->geometry.volume + kernel_root);
-  p->geometry.volume = volume = 1. / volume;
-  /* we multiply with the smoothing kernel normalization */
-  p->geometry.matrix_E[0][0] = E[0][0] = ih * ih2 * p->geometry.matrix_E[0][0];
-  p->geometry.matrix_E[0][1] = E[0][1] = ih * ih2 * p->geometry.matrix_E[0][1];
-  p->geometry.matrix_E[0][2] = E[0][2] = ih * ih2 * p->geometry.matrix_E[0][2];
-  p->geometry.matrix_E[1][0] = E[1][0] = ih * ih2 * p->geometry.matrix_E[1][0];
-  p->geometry.matrix_E[1][1] = E[1][1] = ih * ih2 * p->geometry.matrix_E[1][1];
-  p->geometry.matrix_E[1][2] = E[1][2] = ih * ih2 * p->geometry.matrix_E[1][2];
-  p->geometry.matrix_E[2][0] = E[2][0] = ih * ih2 * p->geometry.matrix_E[2][0];
-  p->geometry.matrix_E[2][1] = E[2][1] = ih * ih2 * p->geometry.matrix_E[2][1];
-  p->geometry.matrix_E[2][2] = E[2][2] = ih * ih2 * p->geometry.matrix_E[2][2];
-
-  /* invert the E-matrix */
-  /* code shamelessly stolen from the public version of GIZMO */
-  /* But since we should never invert a matrix, this code has to be replaced */
-  detE = E[0][0] * E[1][1] * E[2][2] + E[0][1] * E[1][2] * E[2][0] +
-         E[0][2] * E[1][0] * E[2][1] - E[0][2] * E[1][1] * E[2][0] -
-         E[0][1] * E[1][0] * E[2][2] - E[0][0] * E[1][2] * E[2][1];
-  /* check for zero determinant */
-  if ((detE != 0) && !isnan(detE)) {
-    p->geometry.matrix_E[0][0] = (E[1][1] * E[2][2] - E[1][2] * E[2][1]) / detE;
-    p->geometry.matrix_E[0][1] = (E[0][2] * E[2][1] - E[0][1] * E[2][2]) / detE;
-    p->geometry.matrix_E[0][2] = (E[0][1] * E[1][2] - E[0][2] * E[1][1]) / detE;
-    p->geometry.matrix_E[1][0] = (E[1][2] * E[2][0] - E[1][0] * E[2][2]) / detE;
-    p->geometry.matrix_E[1][1] = (E[0][0] * E[2][2] - E[0][2] * E[2][0]) / detE;
-    p->geometry.matrix_E[1][2] = (E[0][2] * E[1][0] - E[0][0] * E[1][2]) / detE;
-    p->geometry.matrix_E[2][0] = (E[1][0] * E[2][1] - E[1][1] * E[2][0]) / detE;
-    p->geometry.matrix_E[2][1] = (E[0][1] * E[2][0] - E[0][0] * E[2][1]) / detE;
-    p->geometry.matrix_E[2][2] = (E[0][0] * E[1][1] - E[0][1] * E[1][0]) / detE;
-  } else {
-    /* if the E-matrix is not well behaved, we cannot use it */
-    p->geometry.matrix_E[0][0] = 0.0f;
-    p->geometry.matrix_E[0][1] = 0.0f;
-    p->geometry.matrix_E[0][2] = 0.0f;
-    p->geometry.matrix_E[1][0] = 0.0f;
-    p->geometry.matrix_E[1][1] = 0.0f;
-    p->geometry.matrix_E[1][2] = 0.0f;
-    p->geometry.matrix_E[2][0] = 0.0f;
-    p->geometry.matrix_E[2][1] = 0.0f;
-    p->geometry.matrix_E[2][2] = 0.0f;
-  }
-
-#ifdef SPH_GRADIENTS
-  /* finalize gradients by multiplying with volume */
-  p->primitives.gradients.rho[0] *= ih2 * ih2 * volume;
-  p->primitives.gradients.rho[1] *= ih2 * ih2 * volume;
-  p->primitives.gradients.rho[2] *= ih2 * ih2 * volume;
-
-  p->primitives.gradients.v[0][0] *= ih2 * ih2 * volume;
-  p->primitives.gradients.v[0][1] *= ih2 * ih2 * volume;
-  p->primitives.gradients.v[0][2] *= ih2 * ih2 * volume;
-
-  p->primitives.gradients.v[1][0] *= ih2 * ih2 * volume;
-  p->primitives.gradients.v[1][1] *= ih2 * ih2 * volume;
-  p->primitives.gradients.v[1][2] *= ih2 * ih2 * volume;
-
-  p->primitives.gradients.v[2][0] *= ih2 * ih2 * volume;
-  p->primitives.gradients.v[2][1] *= ih2 * ih2 * volume;
-  p->primitives.gradients.v[2][2] *= ih2 * ih2 * volume;
-
-  p->primitives.gradients.P[0] *= ih2 * ih2 * volume;
-  p->primitives.gradients.P[1] *= ih2 * ih2 * volume;
-  p->primitives.gradients.P[2] *= ih2 * ih2 * volume;
-
-/* slope limiter */
-#ifdef SLOPE_LIMITER
-  gradrho[0] = p->primitives.gradients.rho[0];
-  gradrho[1] = p->primitives.gradients.rho[1];
-  gradrho[2] = p->primitives.gradients.rho[2];
-
-  gradv[0][0] = p->primitives.gradients.v[0][0];
-  gradv[0][1] = p->primitives.gradients.v[0][1];
-  gradv[0][2] = p->primitives.gradients.v[0][2];
-
-  gradv[1][0] = p->primitives.gradients.v[1][0];
-  gradv[1][1] = p->primitives.gradients.v[1][1];
-  gradv[1][2] = p->primitives.gradients.v[1][2];
-
-  gradv[2][0] = p->primitives.gradients.v[2][0];
-  gradv[2][1] = p->primitives.gradients.v[2][1];
-  gradv[2][2] = p->primitives.gradients.v[2][2];
-
-  gradP[0] = p->primitives.gradients.P[0];
-  gradP[1] = p->primitives.gradients.P[1];
-  gradP[2] = p->primitives.gradients.P[2];
-
-  gradtrue = sqrtf(gradrho[0] * gradrho[0] + gradrho[1] * gradrho[1] +
-                   gradrho[2] * gradrho[2]);
-  /* gradtrue might be zero. In this case, there is no gradient and we don't
-     need to slope limit anything... */
-  if (gradtrue) {
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.rho[1] - p->primitives.rho;
-    gradmin = p->primitives.rho - p->primitives.limiter.rho[0];
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.rho[0] *= alpha;
-    p->primitives.gradients.rho[1] *= alpha;
-    p->primitives.gradients.rho[2] *= alpha;
-  }
-
-  gradtrue = sqrtf(gradv[0][0] * gradv[0][0] + gradv[0][1] * gradv[0][1] +
-                   gradv[0][2] * gradv[0][2]);
-  if (gradtrue) {
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.v[0][1] - p->primitives.v[0];
-    gradmin = p->primitives.v[0] - p->primitives.limiter.v[0][0];
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.v[0][0] *= alpha;
-    p->primitives.gradients.v[0][1] *= alpha;
-    p->primitives.gradients.v[0][2] *= alpha;
-  }
+  const float volume = 1.f / (ihdim * (p->geometry.volume + kernel_root));
+  p->geometry.volume = volume;
 
-  gradtrue = sqrtf(gradv[1][0] * gradv[1][0] + gradv[1][1] * gradv[1][1] +
-                   gradv[1][2] * gradv[1][2]);
-  if (gradtrue) {
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.v[1][1] - p->primitives.v[1];
-    gradmin = p->primitives.v[1] - p->primitives.limiter.v[1][0];
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.v[1][0] *= alpha;
-    p->primitives.gradients.v[1][1] *= alpha;
-    p->primitives.gradients.v[1][2] *= alpha;
-  }
+  /* we multiply with the smoothing kernel normalization */
+  p->geometry.matrix_E[0][0] = ihdim * p->geometry.matrix_E[0][0];
+  p->geometry.matrix_E[0][1] = ihdim * p->geometry.matrix_E[0][1];
+  p->geometry.matrix_E[0][2] = ihdim * p->geometry.matrix_E[0][2];
+  p->geometry.matrix_E[1][0] = ihdim * p->geometry.matrix_E[1][0];
+  p->geometry.matrix_E[1][1] = ihdim * p->geometry.matrix_E[1][1];
+  p->geometry.matrix_E[1][2] = ihdim * p->geometry.matrix_E[1][2];
+  p->geometry.matrix_E[2][0] = ihdim * p->geometry.matrix_E[2][0];
+  p->geometry.matrix_E[2][1] = ihdim * p->geometry.matrix_E[2][1];
+  p->geometry.matrix_E[2][2] = ihdim * p->geometry.matrix_E[2][2];
 
-  gradtrue = sqrtf(gradv[2][0] * gradv[2][0] + gradv[2][1] * gradv[2][1] +
-                   gradv[2][2] * gradv[2][2]);
-  if (gradtrue) {
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.v[2][1] - p->primitives.v[2];
-    gradmin = p->primitives.v[2] - p->primitives.limiter.v[2][0];
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.v[2][0] *= alpha;
-    p->primitives.gradients.v[2][1] *= alpha;
-    p->primitives.gradients.v[2][2] *= alpha;
-  }
+  invert_dimension_by_dimension_matrix(p->geometry.matrix_E);
 
-  gradtrue =
-      sqrtf(gradP[0] * gradP[0] + gradP[1] * gradP[1] + gradP[2] * gradP[2]);
-  if (gradtrue) {
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.P[1] - p->primitives.P;
-    gradmin = p->primitives.P - p->primitives.limiter.P[0];
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.P[0] *= alpha;
-    p->primitives.gradients.P[1] *= alpha;
-    p->primitives.gradients.P[2] *= alpha;
-  }
-#endif  // SLOPE_LIMITER
-#else   // SPH_GRADIENTS
-  p->primitives.gradients.rho[0] = 0.0f;
-  p->primitives.gradients.rho[1] = 0.0f;
-  p->primitives.gradients.rho[2] = 0.0f;
-
-  p->primitives.gradients.v[0][0] = 0.0f;
-  p->primitives.gradients.v[0][1] = 0.0f;
-  p->primitives.gradients.v[0][2] = 0.0f;
-
-  p->primitives.gradients.v[1][0] = 0.0f;
-  p->primitives.gradients.v[1][1] = 0.0f;
-  p->primitives.gradients.v[1][2] = 0.0f;
-
-  p->primitives.gradients.v[2][0] = 0.0f;
-  p->primitives.gradients.v[2][1] = 0.0f;
-  p->primitives.gradients.v[2][2] = 0.0f;
-
-  p->primitives.gradients.P[0] = 0.0f;
-  p->primitives.gradients.P[1] = 0.0f;
-  p->primitives.gradients.P[2] = 0.0f;
-
-  p->primitives.limiter.rho[0] = FLT_MAX;
-  p->primitives.limiter.rho[1] = -FLT_MAX;
-  p->primitives.limiter.v[0][0] = FLT_MAX;
-  p->primitives.limiter.v[0][1] = -FLT_MAX;
-  p->primitives.limiter.v[1][0] = FLT_MAX;
-  p->primitives.limiter.v[1][1] = -FLT_MAX;
-  p->primitives.limiter.v[2][0] = FLT_MAX;
-  p->primitives.limiter.v[2][1] = -FLT_MAX;
-  p->primitives.limiter.P[0] = FLT_MAX;
-  p->primitives.limiter.P[1] = -FLT_MAX;
-
-  p->primitives.limiter.maxr = -FLT_MAX;
-#endif  // SPH_GRADIENTS
+  hydro_gradients_init(p);
 
   /* compute primitive variables */
   /* eqns (3)-(5) */
-  m = p->conserved.mass;
-  if (m) {
+  const float m = p->conserved.mass;
+  if (m > 0.f) {
+    float momentum[3];
     momentum[0] = p->conserved.momentum[0];
     momentum[1] = p->conserved.momentum[1];
     momentum[2] = p->conserved.momentum[2];
-#ifndef THERMAL_ENERGY
-    momentum2 = (momentum[0] * momentum[0] + momentum[1] * momentum[1] +
-                 momentum[2] * momentum[2]);
-#endif
-    energy = p->conserved.energy;
     p->primitives.rho = m / volume;
     p->primitives.v[0] = momentum[0] / m;
     p->primitives.v[1] = momentum[1] / m;
     p->primitives.v[2] = momentum[2] / m;
-#ifndef THERMAL_ENERGY
-    p->primitives.P =
-        (const_hydro_gamma - 1.) * (energy - 0.5 * momentum2 / m) / volume;
-#else
-    p->primitives.P = (const_hydro_gamma - 1.) * energy / volume;
-#endif
+    const float energy = p->conserved.energy;
+    p->primitives.P = hydro_gamma_minus_one * energy / volume;
   }
 }
 
+/**
+ * @brief Prepare a particle for the gradient calculation.
+ *
+ * The name of this method is confusing, as this method is really called after
+ * the density loop and before the gradient loop.
+ *
+ * We use it to set the physical timestep for the particle and to copy the
+ * actual velocities, which we need to boost our interfaces during the flux
+ * calculation. We also initialize the variables used for the time step
+ * calculation.
+ *
+ * @param p The particle to act upon.
+ * @param xp The extended particle data to act upon.
+ * @param ti_current Current integer time.
+ * @param timeBase Conversion factor between integer time and physical time.
+ */
+__attribute__((always_inline)) INLINE static void hydro_prepare_force(
+    struct part* restrict p, struct xpart* restrict xp, int ti_current,
+    double timeBase) {
+
+  /* Set the physical time step */
+  p->force.dt = (p->ti_end - p->ti_begin) * timeBase;
+
+  /* Initialize time step criterion variables */
+  p->timestepvars.vmax = 0.0f;
+
+  /* Set the actual velocity of the particle */
+  p->force.v_full[0] = xp->v_full[0];
+  p->force.v_full[1] = xp->v_full[1];
+  p->force.v_full[2] = xp->v_full[2];
+}
+
 /**
  * @brief Finishes the gradient calculation.
  *
- * @param p The particle to act upon
+ * Just a wrapper around hydro_gradients_finalize, which can be an empty method,
+ * in which case no gradients are used.
+ *
+ * This method also initializes the force loop variables.
+ *
+ * @param p The particle to act upon.
  */
 __attribute__((always_inline)) INLINE static void hydro_end_gradient(
     struct part* p) {
 
-#ifndef SPH_GRADIENTS
-  float h, ih, ih2, ih3;
-#ifdef SLOPE_LIMITER
-  GFLOAT gradrho[3], gradv[3][3], gradP[3];
-  GFLOAT gradtrue, gradmax, gradmin, alpha;
-#endif
-
-  /* add kernel normalization to gradients */
-  h = p->h;
-  ih = 1.0f / h;
-  ih2 = ih * ih;
-  ih3 = ih * ih2;
-
-  p->primitives.gradients.rho[0] *= ih3;
-  p->primitives.gradients.rho[1] *= ih3;
-  p->primitives.gradients.rho[2] *= ih3;
-
-  p->primitives.gradients.v[0][0] *= ih3;
-  p->primitives.gradients.v[0][1] *= ih3;
-  p->primitives.gradients.v[0][2] *= ih3;
-  p->primitives.gradients.v[1][0] *= ih3;
-  p->primitives.gradients.v[1][1] *= ih3;
-  p->primitives.gradients.v[1][2] *= ih3;
-  p->primitives.gradients.v[2][0] *= ih3;
-  p->primitives.gradients.v[2][1] *= ih3;
-  p->primitives.gradients.v[2][2] *= ih3;
-
-  p->primitives.gradients.P[0] *= ih3;
-  p->primitives.gradients.P[1] *= ih3;
-  p->primitives.gradients.P[2] *= ih3;
-
-/* slope limiter */
-#ifdef SLOPE_LIMITER
-  gradrho[0] = p->primitives.gradients.rho[0];
-  gradrho[1] = p->primitives.gradients.rho[1];
-  gradrho[2] = p->primitives.gradients.rho[2];
-
-  gradv[0][0] = p->primitives.gradients.v[0][0];
-  gradv[0][1] = p->primitives.gradients.v[0][1];
-  gradv[0][2] = p->primitives.gradients.v[0][2];
-
-  gradv[1][0] = p->primitives.gradients.v[1][0];
-  gradv[1][1] = p->primitives.gradients.v[1][1];
-  gradv[1][2] = p->primitives.gradients.v[1][2];
-
-  gradv[2][0] = p->primitives.gradients.v[2][0];
-  gradv[2][1] = p->primitives.gradients.v[2][1];
-  gradv[2][2] = p->primitives.gradients.v[2][2];
-
-  gradP[0] = p->primitives.gradients.P[0];
-  gradP[1] = p->primitives.gradients.P[1];
-  gradP[2] = p->primitives.gradients.P[2];
-
-  gradtrue = gradrho[0] * gradrho[0] + gradrho[1] * gradrho[1] +
-             gradrho[2] * gradrho[2];
-  /* gradtrue might be zero. In this case, there is no gradient and we don't
-     need to slope limit anything... */
-  if (gradtrue) {
-    gradtrue = sqrtf(gradtrue);
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.rho[1] - p->primitives.rho;
-    gradmin = p->primitives.rho - p->primitives.limiter.rho[0];
-    /* gradmin and gradmax might be negative if the value of the current
-       particle is larger/smaller than all neighbouring values */
-    gradmax = fabs(gradmax);
-    gradmin = fabs(gradmin);
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.rho[0] *= alpha;
-    p->primitives.gradients.rho[1] *= alpha;
-    p->primitives.gradients.rho[2] *= alpha;
-  }
+  hydro_gradients_finalize(p);
+
+  p->gravity.mflux[0] = 0.0f;
+  p->gravity.mflux[1] = 0.0f;
+  p->gravity.mflux[2] = 0.0f;
+}
+
+/**
+ * @brief Reset acceleration fields of a particle
+ *
+ * This is actually not necessary for GIZMO, since we just set the accelerations
+ * after the flux calculation.
+ *
+ * @param p The particle to act upon.
+ */
+__attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
+    struct part* p) {
+
+  /* Reset the acceleration. */
+  p->a_hydro[0] = 0.0f;
+  p->a_hydro[1] = 0.0f;
+  p->a_hydro[2] = 0.0f;
+
+  /* Reset the time derivatives. */
+  p->force.h_dt = 0.0f;
+}
+
+/**
+ * @brief Converts the hydrodynamic variables from the initial condition file to
+ * conserved variables that can be used during the integration
+ *
+ * Requires the volume to be known.
+ *
+ * The initial condition file contains a mixture of primitive and conserved
+ * variables. Mass is a conserved variable, and we just copy the particle
+ * mass into the corresponding conserved quantity. We need the volume to
+ * also derive a density, which is then used to convert the internal energy
+ * to a pressure. However, we do not actually use these variables anymore.
+ * We do need to initialize the linear momentum, based on the mass and the
+ * velocity of the particle.
+ *
+ * @param p The particle to act upon.
+ */
+__attribute__((always_inline)) INLINE static void hydro_convert_quantities(
+    struct part* p) {
 
-  gradtrue = gradv[0][0] * gradv[0][0] + gradv[0][1] * gradv[0][1] +
-             gradv[0][2] * gradv[0][2];
-  if (gradtrue) {
-    gradtrue = sqrtf(gradtrue);
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.v[0][1] - p->primitives.v[0];
-    gradmin = p->primitives.v[0] - p->primitives.limiter.v[0][0];
-    gradmax = fabs(gradmax);
-    gradmin = fabs(gradmin);
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.v[0][0] *= alpha;
-    p->primitives.gradients.v[0][1] *= alpha;
-    p->primitives.gradients.v[0][2] *= alpha;
+  const float volume = p->geometry.volume;
+  const float m = p->conserved.mass;
+  p->primitives.rho = m / volume;
+
+  p->conserved.momentum[0] = m * p->primitives.v[0];
+  p->conserved.momentum[1] = m * p->primitives.v[1];
+  p->conserved.momentum[2] = m * p->primitives.v[2];
+
+  p->primitives.P =
+      hydro_gamma_minus_one * p->conserved.energy * p->primitives.rho;
+
+  p->conserved.energy *= m;
+}
+
+/**
+ * @brief Extra operations to be done during the drift
+ *
+ * @param p Particle to act upon.
+ * @param xp The extended particle data to act upon.
+ * @param dt The drift time-step.
+ * @param t0 Integer start time of the drift interval.
+ * @param t1 Integer end time of the drift interval.
+ * @param timeBase Conversion factor between integer and physical time.
+ */
+__attribute__((always_inline)) INLINE static void hydro_predict_extra(
+    struct part* p, struct xpart* xp, float dt, int t0, int t1,
+    double timeBase) {
+
+  const float h_inv = 1.0f / p->h;
+
+  /* Predict smoothing length */
+  const float w1 = p->force.h_dt * h_inv * dt;
+  if (fabsf(w1) < 0.2f)
+    p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
+  else
+    p->h *= expf(w1);
+
+  const float w2 = -hydro_dimension * w1;
+  if (fabsf(w2) < 0.2f) {
+    p->primitives.rho *= approx_expf(w2);
+  } else {
+    p->primitives.rho *= expf(w2);
   }
 
-  gradtrue = gradv[1][0] * gradv[1][0] + gradv[1][1] * gradv[1][1] +
-             gradv[1][2] * gradv[1][2];
-  if (gradtrue) {
-    gradtrue = sqrtf(gradtrue);
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.v[1][1] - p->primitives.v[1];
-    gradmin = p->primitives.v[1] - p->primitives.limiter.v[1][0];
-    gradmax = fabs(gradmax);
-    gradmin = fabs(gradmin);
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.v[1][0] *= alpha;
-    p->primitives.gradients.v[1][1] *= alpha;
-    p->primitives.gradients.v[1][2] *= alpha;
+  p->primitives.v[0] += (p->a_hydro[0] + p->gravity.old_a[0]) * dt;
+  p->primitives.v[1] += (p->a_hydro[1] + p->gravity.old_a[1]) * dt;
+  p->primitives.v[2] += (p->a_hydro[2] + p->gravity.old_a[2]) * dt;
+  const float u = p->conserved.energy + p->du_dt * dt;
+  p->primitives.P =
+      hydro_gamma_minus_one * u * p->primitives.rho / p->conserved.mass;
+}
+
+/**
+ * @brief Set the particle acceleration after the flux loop
+ *
+ * We use the new conserved variables to calculate the new velocity of the
+ * particle, and use that to derive the change of the velocity over the particle
+ * time step.
+ *
+ * If the particle time step is zero, we set the accelerations to zero. This
+ * should only happen at the start of the simulation.
+ *
+ * @param p Particle to act upon.
+ */
+__attribute__((always_inline)) INLINE static void hydro_end_force(
+    struct part* p) {
+
+  /* Add normalization to h_dt. */
+  p->force.h_dt *= p->h * hydro_dimension_inv;
+
+  /* Set the hydro acceleration, based on the new momentum and mass */
+  /* NOTE: the momentum and mass are only correct for active particles, since
+           only active particles have received flux contributions from all their
+           neighbours. Since this method is only called for active particles,
+           this is indeed the case. */
+  if (p->force.dt) {
+    float mnew;
+    float vnew[3];
+
+    mnew = p->conserved.mass + p->conserved.flux.mass;
+    vnew[0] = (p->conserved.momentum[0] + p->conserved.flux.momentum[0]) / mnew;
+    vnew[1] = (p->conserved.momentum[1] + p->conserved.flux.momentum[1]) / mnew;
+    vnew[2] = (p->conserved.momentum[2] + p->conserved.flux.momentum[2]) / mnew;
+
+    p->a_hydro[0] = (vnew[0] - p->force.v_full[0]) / p->force.dt;
+    p->a_hydro[1] = (vnew[1] - p->force.v_full[1]) / p->force.dt;
+    p->a_hydro[2] = (vnew[2] - p->force.v_full[2]) / p->force.dt;
+
+    p->du_dt = p->conserved.flux.energy / p->force.dt;
+  } else {
+    p->a_hydro[0] = 0.0f;
+    p->a_hydro[1] = 0.0f;
+    p->a_hydro[2] = 0.0f;
+
+    p->du_dt = 0.0f;
   }
+}
 
-  gradtrue = gradv[2][0] * gradv[2][0] + gradv[2][1] * gradv[2][1] +
-             gradv[2][2] * gradv[2][2];
-  if (gradtrue) {
-    gradtrue = sqrtf(gradtrue);
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.v[2][1] - p->primitives.v[2];
-    gradmin = p->primitives.v[2] - p->primitives.limiter.v[2][0];
-    gradmax = fabs(gradmax);
-    gradmin = fabs(gradmin);
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.v[2][0] *= alpha;
-    p->primitives.gradients.v[2][1] *= alpha;
-    p->primitives.gradients.v[2][2] *= alpha;
+/**
+ * @brief Extra operations done during the kick
+ *
+ * Not used for GIZMO.
+ *
+ * @param p Particle to act upon.
+ * @param xp Extended particle data to act upon.
+ * @param dt Physical time step.
+ * @param half_dt Half the physical time step.
+ */
+__attribute__((always_inline)) INLINE static void hydro_kick_extra(
+    struct part* p, struct xpart* xp, float dt, float half_dt) {
+
+  float oldm, oldp[3], anew[3];
+
+  /* Retrieve the current value of the gravitational acceleration from the
+     gpart. We are only allowed to do this because this is the kick. We still
+     need to check whether gpart exists though.*/
+  if (p->gpart) {
+    anew[0] = p->gpart->a_grav[0];
+    anew[1] = p->gpart->a_grav[1];
+    anew[2] = p->gpart->a_grav[2];
+
+    /* Copy the old mass and momentum before updating the conserved variables */
+    oldm = p->conserved.mass;
+    oldp[0] = p->conserved.momentum[0];
+    oldp[1] = p->conserved.momentum[1];
+    oldp[2] = p->conserved.momentum[2];
   }
 
-  gradtrue = gradP[0] * gradP[0] + gradP[1] * gradP[1] + gradP[2] * gradP[2];
-  if (gradtrue) {
-    gradtrue = sqrtf(gradtrue);
-    gradtrue *= p->primitives.limiter.maxr;
-    gradmax = p->primitives.limiter.P[1] - p->primitives.P;
-    gradmin = p->primitives.P - p->primitives.limiter.P[0];
-    gradmax = fabs(gradmax);
-    gradmin = fabs(gradmin);
-    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
-    p->primitives.gradients.P[0] *= alpha;
-    p->primitives.gradients.P[1] *= alpha;
-    p->primitives.gradients.P[2] *= alpha;
+  /* Update conserved variables. */
+  p->conserved.mass += p->conserved.flux.mass;
+  p->conserved.momentum[0] += p->conserved.flux.momentum[0];
+  p->conserved.momentum[1] += p->conserved.flux.momentum[1];
+  p->conserved.momentum[2] += p->conserved.flux.momentum[2];
+  p->conserved.energy += p->conserved.flux.energy;
+
+  /* Add gravity. We only do this if we have gravity activated. */
+  if (p->gpart) {
+    p->conserved.momentum[0] +=
+        half_dt * (oldm * p->gravity.old_a[0] + p->conserved.mass * anew[0]);
+    p->conserved.momentum[1] +=
+        half_dt * (oldm * p->gravity.old_a[1] + p->conserved.mass * anew[1]);
+    p->conserved.momentum[2] +=
+        half_dt * (oldm * p->gravity.old_a[2] + p->conserved.mass * anew[2]);
+
+    float paold, panew;
+    paold = oldp[0] * p->gravity.old_a[0] + oldp[1] * p->gravity.old_a[1] +
+            oldp[2] * p->gravity.old_a[2];
+    panew = p->conserved.momentum[0] * anew[0] +
+            p->conserved.momentum[1] * anew[1] +
+            p->conserved.momentum[2] * anew[2];
+    p->conserved.energy += half_dt * (paold + panew);
+
+    float fluxaold, fluxanew;
+    fluxaold = p->gravity.old_a[0] * p->gravity.old_mflux[0] +
+               p->gravity.old_a[1] * p->gravity.old_mflux[1] +
+               p->gravity.old_a[2] * p->gravity.old_mflux[2];
+    fluxanew = anew[0] * p->gravity.mflux[0] + anew[1] * p->gravity.mflux[1] +
+               anew[2] * p->gravity.mflux[2];
+    p->conserved.energy += half_dt * (fluxaold + fluxanew);
+
+    /* Store gravitational acceleration and mass flux for next step */
+    p->gravity.old_a[0] = anew[0];
+    p->gravity.old_a[1] = anew[1];
+    p->gravity.old_a[2] = anew[2];
+    p->gravity.old_mflux[0] = p->gravity.mflux[0];
+    p->gravity.old_mflux[1] = p->gravity.mflux[1];
+    p->gravity.old_mflux[2] = p->gravity.mflux[2];
   }
-#endif  // SLOPE_LIMITER
 
-#endif  // SPH_GRADIENTS
+  /* reset fluxes */
+  /* we can only do this here, since we need to keep the fluxes for inactive
+     particles */
+  p->conserved.flux.mass = 0.0f;
+  p->conserved.flux.momentum[0] = 0.0f;
+  p->conserved.flux.momentum[1] = 0.0f;
+  p->conserved.flux.momentum[2] = 0.0f;
+  p->conserved.flux.energy = 0.0f;
 }
 
 /**
- * @brief Prepare a particle for the fluxes calculation.
+ * @brief Returns the internal energy of a particle
  *
- * @param p The particle to act upon
- * @param xp The extended particle data to act upon
+ * @param p The particle of interest.
+ * @param dt Time since the last kick.
  */
-__attribute__((always_inline)) INLINE static void hydro_prepare_fluxes(
-    struct part* p, struct xpart* xp) {
+__attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
+    const struct part* restrict p, float dt) {
 
-  /* initialize variables used for timestep calculation */
-  p->timestepvars.vmax = 0.0f;
+  return p->primitives.P / hydro_gamma_minus_one / p->primitives.rho;
 }
 
 /**
- * @brief Reset acceleration fields of a particle
+ * @brief Returns the entropy of a particle
  *
- * Resets all hydro acceleration and time derivative fields in preparation
- * for the sums taking place in the variaous force tasks
- *
- * @param p The particle to act upon
+ * @param p The particle of interest.
+ * @param dt Time since the last kick.
  */
-__attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
-    struct part* p) {
+__attribute__((always_inline)) INLINE static float hydro_get_entropy(
+    const struct part* restrict p, float dt) {
 
-  /* figure out what to put here */
+  return p->primitives.P / pow_gamma(p->primitives.rho);
 }
 
 /**
- * @brief Finishes the fluxes calculation.
+ * @brief Returns the sound speed of a particle
  *
- * Multiplies the forces and accelerationsby the appropiate constants
+ * @param p The particle of interest.
+ * @param dt Time since the last kick.
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
+    const struct part* restrict p, float dt) {
+
+  return sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho);
+}
+
+/**
+ * @brief Returns the pressure of a particle
  *
- * @param p The particle to act upon
+ * @param p The particle of interest
+ * @param dt Time since the last kick
  */
-__attribute__((always_inline)) INLINE static void hydro_end_fluxes(
-    struct part* p) {
+__attribute__((always_inline)) INLINE static float hydro_get_pressure(
+    const struct part* restrict p, float dt) {
 
-  /* do nothing */
+  return p->primitives.P;
 }
 
 /**
- * @brief Converts hydro quantity of a particle
+ * @brief Returns the mass of a particle
  *
- * Requires the volume to be known
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_mass(
+    const struct part* restrict p) {
+
+  return p->conserved.mass;
+}
+
+/**
+ * @brief Returns the density of a particle
  *
- * @param p The particle to act upon
+ * @param p The particle of interest
  */
-__attribute__((always_inline)) INLINE static void hydro_convert_quantities(
-    struct part* p) {
+__attribute__((always_inline)) INLINE static float hydro_get_density(
+    const struct part* restrict p) {
+
+  return p->primitives.rho;
+}
 
-  float volume;
-  GFLOAT m;
-  GFLOAT momentum[3];
-#ifndef THERMAL_ENERGY
-  GFLOAT momentum2;
-#endif
-  volume = p->geometry.volume;
+/**
+ * @brief Modifies the thermal state of a particle to the imposed internal
+ * energy
+ *
+ * This overrides the current state of the particle but does *not* change its
+ * time-derivatives
+ *
+ * @param p The particle
+ * @param u The new internal energy
+ */
+__attribute__((always_inline)) INLINE static void hydro_set_internal_energy(
+    struct part* restrict p, float u) {
 
-  /* set hydro velocities */
-  p->primitives.v[0] = p->v[0];
-  p->primitives.v[1] = p->v[1];
-  p->primitives.v[2] = p->v[2];
-  /* P actually contains internal energy at this point */
-  p->primitives.P *= (const_hydro_gamma - 1.) * p->primitives.rho;
-
-  p->conserved.mass = m = p->primitives.rho * volume;
-  p->conserved.momentum[0] = momentum[0] = m * p->primitives.v[0];
-  p->conserved.momentum[1] = momentum[1] = m * p->primitives.v[1];
-  p->conserved.momentum[2] = momentum[2] = m * p->primitives.v[2];
-#ifndef THERMAL_ENERGY
-  momentum2 = momentum[0] * momentum[0] + momentum[1] * momentum[1] +
-              momentum[2] * momentum[2];
-  p->conserved.energy =
-      p->primitives.P / (const_hydro_gamma - 1.) * volume + 0.5 * momentum2 / m;
-#else
-  p->conserved.energy = p->primitives.P / (const_hydro_gamma - 1.) * volume;
-#endif
+  /* conserved.energy is NOT the specific energy (u), but the total thermal
+     energy (u*m) */
+  p->conserved.energy = u * p->conserved.mass;
 }
 
-// MATTHIEU
-__attribute__((always_inline)) INLINE static void hydro_end_density(
-    struct part* p, float time) {}
-__attribute__((always_inline)) INLINE static void hydro_prepare_force(
-    struct part* p, struct xpart* xp, int ti_current, double timeBase) {}
-__attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part* p, struct xpart* xp, int t0, int t1, double timeBase) {}
-__attribute__((always_inline)) INLINE static void hydro_end_force(
-    struct part* p) {}
-__attribute__((always_inline)) INLINE static void hydro_kick_extra(
-    struct part* p, struct xpart* xp, float dt, float half_dt) {}
-__attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
-    struct part* p) {
-  return 0.f;
+/**
+ * @brief Modifies the thermal state of a particle to the imposed entropy
+ *
+ * This overrides the current state of the particle but does *not* change its
+ * time-derivatives
+ *
+ * @param p The particle
+ * @param S The new entropy
+ */
+__attribute__((always_inline)) INLINE static void hydro_set_entropy(
+    struct part* restrict p, float S) {
+
+  p->conserved.energy = gas_internal_energy_from_entropy(p->primitives.rho, S) *
+                        p->conserved.mass;
 }
diff --git a/src/hydro/Gizmo/hydro_debug.h b/src/hydro/Gizmo/hydro_debug.h
index 365d85a2f651cf98b0713e8d82f11ae70fa9beaa..f4c071023a627b177fd06373856f25611fc9485d 100644
--- a/src/hydro/Gizmo/hydro_debug.h
+++ b/src/hydro/Gizmo/hydro_debug.h
@@ -18,10 +18,65 @@
  ******************************************************************************/
 
 __attribute__((always_inline)) INLINE static void hydro_debug_particle(
-    struct part* p, struct xpart* xp) {
+    const struct part* p, const struct xpart* xp) {
   printf(
       "x=[%.16e,%.16e,%.16e], "
-      "v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], volume=%.3e\n",
+      "v=[%.3e,%.3e,%.3e], "
+      "a=[%.3e,%.3e,%.3e], "
+      "h=%.3e, "
+      "ti_begin=%d, "
+      "ti_end=%d, "
+      "primitives={"
+      "v=[%.3e,%.3e,%.3e], "
+      "rho=%.3e, "
+      "P=%.3e, "
+      "gradients={"
+      "rho=[%.3e,%.3e,%.3e], "
+      "v=[[%.3e,%.3e,%.3e],[%.3e,%.3e,%.3e],[%.3e,%.3e,%.3e]], "
+      "P=[%.3e,%.3e,%.3e]}, "
+      "limiter={"
+      "rho=[%.3e,%.3e], "
+      "v=[[%.3e,%.3e],[%.3e,%.3e],[%.3e,%.3e]], "
+      "P=[%.3e,%.3e], "
+      "maxr=%.3e}}, "
+      "conserved={"
+      "momentum=[%.3e,%.3e,%.3e], "
+      "mass=%.3e, "
+      "energy=%.3e}, "
+      "geometry={"
+      "volume=%.3e, "
+      "matrix_E=[[%.3e,%.3e,%.3e],[%.3e,%.3e,%.3e],[%.3e,%.3e,%.3e]]}, "
+      "timestepvars={"
+      "vmax=%.3e}, "
+      "density={"
+      "div_v=%.3e, "
+      "wcount_dh=%.3e, "
+      "curl_v=[%.3e,%.3e,%.3e], "
+      "wcount=%.3e}\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0],
-      p->a_hydro[1], p->a_hydro[2], p->geometry.volume);
+      p->a_hydro[1], p->a_hydro[2], p->h, p->ti_begin, p->ti_end,
+      p->primitives.v[0], p->primitives.v[1], p->primitives.v[2],
+      p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0],
+      p->primitives.gradients.rho[1], p->primitives.gradients.rho[2],
+      p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1],
+      p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0],
+      p->primitives.gradients.v[1][1], p->primitives.gradients.v[1][2],
+      p->primitives.gradients.v[2][0], p->primitives.gradients.v[2][1],
+      p->primitives.gradients.v[2][2], p->primitives.gradients.P[0],
+      p->primitives.gradients.P[1], p->primitives.gradients.P[2],
+      p->primitives.limiter.rho[0], p->primitives.limiter.rho[1],
+      p->primitives.limiter.v[0][0], p->primitives.limiter.v[0][1],
+      p->primitives.limiter.v[1][0], p->primitives.limiter.v[1][1],
+      p->primitives.limiter.v[2][0], p->primitives.limiter.v[2][1],
+      p->primitives.limiter.P[0], p->primitives.limiter.P[1],
+      p->primitives.limiter.maxr, p->conserved.momentum[0],
+      p->conserved.momentum[1], p->conserved.momentum[2], p->conserved.mass,
+      p->conserved.energy, p->geometry.volume, p->geometry.matrix_E[0][0],
+      p->geometry.matrix_E[0][1], p->geometry.matrix_E[0][2],
+      p->geometry.matrix_E[1][0], p->geometry.matrix_E[1][1],
+      p->geometry.matrix_E[1][2], p->geometry.matrix_E[2][0],
+      p->geometry.matrix_E[2][1], p->geometry.matrix_E[2][2],
+      p->timestepvars.vmax, p->density.div_v, p->density.wcount_dh,
+      p->density.curl_v[0], p->density.curl_v[1], p->density.curl_v[2],
+      p->density.wcount);
 }
diff --git a/src/hydro/Gizmo/hydro_gradients.h b/src/hydro/Gizmo/hydro_gradients.h
new file mode 100644
index 0000000000000000000000000000000000000000..90448efc7adb8ccecaaa98c7388f89eaa8d16bcd
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_gradients.h
@@ -0,0 +1,208 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_HYDRO_GRADIENTS_H
+#define SWIFT_HYDRO_GRADIENTS_H
+
+#include "hydro_slope_limiters.h"
+
+#if defined(GRADIENTS_SPH)
+
+#define HYDRO_GRADIENT_IMPLEMENTATION "SPH gradients (Price 2012)"
+#include "hydro_gradients_sph.h"
+
+#elif defined(GRADIENTS_GIZMO)
+
+#define HYDRO_GRADIENT_IMPLEMENTATION "GIZMO gradients (Hopkins 2015)"
+#include "hydro_gradients_gizmo.h"
+
+#else
+
+/* No gradients. Perfectly acceptable, but we have to provide empty functions */
+#define HYDRO_GRADIENT_IMPLEMENTATION "No gradients (first order scheme)"
+
+/**
+ * @brief Initialize gradient variables
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_init(
+    struct part* p) {}
+
+/**
+ * @brief Gradient calculations done during the neighbour loop
+ *
+ * @param r2 Squared distance between the two particles.
+ * @param dx Distance vector (pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_collect(
+    float r2, float* dx, float hi, float hj, struct part* pi, struct part* pj) {
+}
+
+/**
+ * @brief Gradient calculations done during the neighbour loop: non-symmetric
+ * version
+ *
+ * @param r2 Squared distance between the two particles.
+ * @param dx Distance vector (pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void
+hydro_gradients_nonsym_collect(float r2, float* dx, float hi, float hj,
+                               struct part* pi, struct part* pj) {}
+
+/**
+ * @brief Finalize the gradient variables after all data have been collected
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_finalize(
+    struct part* p) {}
+
+#endif
+
+/**
+ * @brief Gradients reconstruction. Is the same for all gradient types (although
+ * gradients_none does nothing, since all gradients are zero -- are they?).
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_predict(
+    struct part* pi, struct part* pj, float hi, float hj, float* dx, float r,
+    float* xij_i, float* Wi, float* Wj, float mindt) {
+
+  float dWi[5], dWj[5];
+  float xij_j[3];
+  int k;
+  float xfac;
+
+  /* perform gradient reconstruction in space and time */
+  /* space */
+  /* Compute interface position (relative to pj, since we don't need the actual
+   * position) */
+  /* eqn. (8) */
+  xfac = hj / (hi + hj);
+  for (k = 0; k < 3; k++) xij_j[k] = xfac * dx[k];
+
+  dWi[0] = pi->primitives.gradients.rho[0] * xij_i[0] +
+           pi->primitives.gradients.rho[1] * xij_i[1] +
+           pi->primitives.gradients.rho[2] * xij_i[2];
+  dWi[1] = pi->primitives.gradients.v[0][0] * xij_i[0] +
+           pi->primitives.gradients.v[0][1] * xij_i[1] +
+           pi->primitives.gradients.v[0][2] * xij_i[2];
+  dWi[2] = pi->primitives.gradients.v[1][0] * xij_i[0] +
+           pi->primitives.gradients.v[1][1] * xij_i[1] +
+           pi->primitives.gradients.v[1][2] * xij_i[2];
+  dWi[3] = pi->primitives.gradients.v[2][0] * xij_i[0] +
+           pi->primitives.gradients.v[2][1] * xij_i[1] +
+           pi->primitives.gradients.v[2][2] * xij_i[2];
+  dWi[4] = pi->primitives.gradients.P[0] * xij_i[0] +
+           pi->primitives.gradients.P[1] * xij_i[1] +
+           pi->primitives.gradients.P[2] * xij_i[2];
+
+  dWj[0] = pj->primitives.gradients.rho[0] * xij_j[0] +
+           pj->primitives.gradients.rho[1] * xij_j[1] +
+           pj->primitives.gradients.rho[2] * xij_j[2];
+  dWj[1] = pj->primitives.gradients.v[0][0] * xij_j[0] +
+           pj->primitives.gradients.v[0][1] * xij_j[1] +
+           pj->primitives.gradients.v[0][2] * xij_j[2];
+  dWj[2] = pj->primitives.gradients.v[1][0] * xij_j[0] +
+           pj->primitives.gradients.v[1][1] * xij_j[1] +
+           pj->primitives.gradients.v[1][2] * xij_j[2];
+  dWj[3] = pj->primitives.gradients.v[2][0] * xij_j[0] +
+           pj->primitives.gradients.v[2][1] * xij_j[1] +
+           pj->primitives.gradients.v[2][2] * xij_j[2];
+  dWj[4] = pj->primitives.gradients.P[0] * xij_j[0] +
+           pj->primitives.gradients.P[1] * xij_j[1] +
+           pj->primitives.gradients.P[2] * xij_j[2];
+
+  hydro_slope_limit_face(Wi, Wj, dWi, dWj, xij_i, xij_j, r);
+
+  /* time */
+  dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] +
+                           Wi[2] * pi->primitives.gradients.rho[1] +
+                           Wi[3] * pi->primitives.gradients.rho[2] +
+                           Wi[0] * (pi->primitives.gradients.v[0][0] +
+                                    pi->primitives.gradients.v[1][1] +
+                                    pi->primitives.gradients.v[2][2]));
+  dWi[1] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[0][0] +
+                           Wi[2] * pi->primitives.gradients.v[0][1] +
+                           Wi[3] * pi->primitives.gradients.v[0][2] +
+                           pi->primitives.gradients.P[0] / Wi[0]);
+  dWi[2] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[1][0] +
+                           Wi[2] * pi->primitives.gradients.v[1][1] +
+                           Wi[3] * pi->primitives.gradients.v[1][2] +
+                           pi->primitives.gradients.P[1] / Wi[0]);
+  dWi[3] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[2][0] +
+                           Wi[2] * pi->primitives.gradients.v[2][1] +
+                           Wi[3] * pi->primitives.gradients.v[2][2] +
+                           pi->primitives.gradients.P[2] / Wi[0]);
+  dWi[4] -=
+      0.5 * mindt * (Wi[1] * pi->primitives.gradients.P[0] +
+                     Wi[2] * pi->primitives.gradients.P[1] +
+                     Wi[3] * pi->primitives.gradients.P[2] +
+                     hydro_gamma * Wi[4] * (pi->primitives.gradients.v[0][0] +
+                                            pi->primitives.gradients.v[1][1] +
+                                            pi->primitives.gradients.v[2][2]));
+
+  dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] +
+                           Wj[2] * pj->primitives.gradients.rho[1] +
+                           Wj[3] * pj->primitives.gradients.rho[2] +
+                           Wj[0] * (pj->primitives.gradients.v[0][0] +
+                                    pj->primitives.gradients.v[1][1] +
+                                    pj->primitives.gradients.v[2][2]));
+  dWj[1] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[0][0] +
+                           Wj[2] * pj->primitives.gradients.v[0][1] +
+                           Wj[3] * pj->primitives.gradients.v[0][2] +
+                           pj->primitives.gradients.P[0] / Wj[0]);
+  dWj[2] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[1][0] +
+                           Wj[2] * pj->primitives.gradients.v[1][1] +
+                           Wj[3] * pj->primitives.gradients.v[1][2] +
+                           pj->primitives.gradients.P[1] / Wj[0]);
+  dWj[3] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[2][0] +
+                           Wj[2] * pj->primitives.gradients.v[2][1] +
+                           Wj[3] * pj->primitives.gradients.v[2][2] +
+                           pj->primitives.gradients.P[2] / Wj[0]);
+  dWj[4] -=
+      0.5 * mindt * (Wj[1] * pj->primitives.gradients.P[0] +
+                     Wj[2] * pj->primitives.gradients.P[1] +
+                     Wj[3] * pj->primitives.gradients.P[2] +
+                     hydro_gamma * Wj[4] * (pj->primitives.gradients.v[0][0] +
+                                            pj->primitives.gradients.v[1][1] +
+                                            pj->primitives.gradients.v[2][2]));
+
+  Wi[0] += dWi[0];
+  Wi[1] += dWi[1];
+  Wi[2] += dWi[2];
+  Wi[3] += dWi[3];
+  Wi[4] += dWi[4];
+
+  Wj[0] += dWj[0];
+  Wj[1] += dWj[1];
+  Wj[2] += dWj[2];
+  Wj[3] += dWj[3];
+  Wj[4] += dWj[4];
+}
+
+#endif  // SWIFT_HYDRO_GRADIENTS_H
diff --git a/src/hydro/Gizmo/hydro_gradients_gizmo.h b/src/hydro/Gizmo/hydro_gradients_gizmo.h
new file mode 100644
index 0000000000000000000000000000000000000000..aa6e4406b94e7a5cafcd0ca556162476003477de
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_gradients_gizmo.h
@@ -0,0 +1,341 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/**
+ * @brief Initialize gradient variables
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_init(
+    struct part *p) {
+
+  p->primitives.gradients.rho[0] = 0.0f;
+  p->primitives.gradients.rho[1] = 0.0f;
+  p->primitives.gradients.rho[2] = 0.0f;
+
+  p->primitives.gradients.v[0][0] = 0.0f;
+  p->primitives.gradients.v[0][1] = 0.0f;
+  p->primitives.gradients.v[0][2] = 0.0f;
+
+  p->primitives.gradients.v[1][0] = 0.0f;
+  p->primitives.gradients.v[1][1] = 0.0f;
+  p->primitives.gradients.v[1][2] = 0.0f;
+
+  p->primitives.gradients.v[2][0] = 0.0f;
+  p->primitives.gradients.v[2][1] = 0.0f;
+  p->primitives.gradients.v[2][2] = 0.0f;
+
+  p->primitives.gradients.P[0] = 0.0f;
+  p->primitives.gradients.P[1] = 0.0f;
+  p->primitives.gradients.P[2] = 0.0f;
+
+  hydro_slope_limit_cell_init(p);
+}
+
+/**
+ * @brief Gradient calculations done during the neighbour loop
+ *
+ * @param r2 Squared distance between the two particles.
+ * @param dx Distance vector (pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_collect(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
+
+  float r = sqrtf(r2);
+  float xi, xj;
+  float hi_inv, hj_inv;
+  float wi, wj, wi_dx, wj_dx;
+  int k, l;
+  float Bi[3][3];
+  float Bj[3][3];
+  float Wi[5], Wj[5];
+
+  /* Initialize local variables */
+  for (k = 0; k < 3; k++) {
+    for (l = 0; l < 3; l++) {
+      Bi[k][l] = pi->geometry.matrix_E[k][l];
+      Bj[k][l] = pj->geometry.matrix_E[k][l];
+    }
+  }
+  Wi[0] = pi->primitives.rho;
+  Wi[1] = pi->primitives.v[0];
+  Wi[2] = pi->primitives.v[1];
+  Wi[3] = pi->primitives.v[2];
+  Wi[4] = pi->primitives.P;
+  Wj[0] = pj->primitives.rho;
+  Wj[1] = pj->primitives.v[0];
+  Wj[2] = pj->primitives.v[1];
+  Wj[3] = pj->primitives.v[2];
+  Wj[4] = pj->primitives.P;
+
+  /* Compute kernel of pi. */
+  hi_inv = 1.0 / hi;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* Compute gradients for pi */
+  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
+   * definition of dx */
+  pi->primitives.gradients.rho[0] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.rho[1] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.rho[2] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.v[0][0] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[0][1] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[0][2] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[1][0] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[1][1] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[1][2] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[2][0] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[2][1] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[2][2] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.P[0] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.P[1] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.P[2] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  hydro_slope_limit_cell_collect(pi, pj, r);
+
+  /* Compute kernel of pj. */
+  hj_inv = 1.0 / hj;
+  xj = r * hj_inv;
+  kernel_deval(xj, &wj, &wj_dx);
+
+  /* Compute gradients for pj */
+  /* there is no sign difference w.r.t. eqn. (6) because dx is now what we
+   * want
+   * it to be */
+  pj->primitives.gradients.rho[0] +=
+      (Wi[0] - Wj[0]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.rho[1] +=
+      (Wi[0] - Wj[0]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.rho[2] +=
+      (Wi[0] - Wj[0]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+  pj->primitives.gradients.v[0][0] +=
+      (Wi[1] - Wj[1]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.v[0][1] +=
+      (Wi[1] - Wj[1]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.v[0][2] +=
+      (Wi[1] - Wj[1]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+  pj->primitives.gradients.v[1][0] +=
+      (Wi[2] - Wj[2]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.v[1][1] +=
+      (Wi[2] - Wj[2]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.v[1][2] +=
+      (Wi[2] - Wj[2]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+  pj->primitives.gradients.v[2][0] +=
+      (Wi[3] - Wj[3]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.v[2][1] +=
+      (Wi[3] - Wj[3]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.v[2][2] +=
+      (Wi[3] - Wj[3]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+  pj->primitives.gradients.P[0] +=
+      (Wi[4] - Wj[4]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.P[1] +=
+      (Wi[4] - Wj[4]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.P[2] +=
+      (Wi[4] - Wj[4]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+  hydro_slope_limit_cell_collect(pj, pi, r);
+}
+
+/**
+ * @brief Gradient calculations done during the neighbour loop
+ *
+ * @param r2 Squared distance between the two particles.
+ * @param dx Distance vector (pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void
+hydro_gradients_nonsym_collect(float r2, float *dx, float hi, float hj,
+                               struct part *pi, struct part *pj) {
+
+  float r = sqrtf(r2);
+  float xi;
+  float hi_inv;
+  float wi, wi_dx;
+  int k, l;
+  float Bi[3][3];
+  float Wi[5], Wj[5];
+
+  /* Initialize local variables */
+  for (k = 0; k < 3; k++) {
+    for (l = 0; l < 3; l++) {
+      Bi[k][l] = pi->geometry.matrix_E[k][l];
+    }
+  }
+  Wi[0] = pi->primitives.rho;
+  Wi[1] = pi->primitives.v[0];
+  Wi[2] = pi->primitives.v[1];
+  Wi[3] = pi->primitives.v[2];
+  Wi[4] = pi->primitives.P;
+  Wj[0] = pj->primitives.rho;
+  Wj[1] = pj->primitives.v[0];
+  Wj[2] = pj->primitives.v[1];
+  Wj[3] = pj->primitives.v[2];
+  Wj[4] = pj->primitives.P;
+
+  /* Compute kernel of pi. */
+  hi_inv = 1.0 / hi;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* Compute gradients for pi */
+  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
+   * definition of dx */
+  pi->primitives.gradients.rho[0] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.rho[1] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.rho[2] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.v[0][0] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[0][1] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[0][2] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[1][0] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[1][1] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[1][2] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[2][0] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[2][1] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[2][2] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.P[0] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.P[1] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.P[2] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  hydro_slope_limit_cell_collect(pi, pj, r);
+}
+
+/**
+ * @brief Finalize the gradient variables after all data have been collected
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_finalize(
+    struct part *p) {
+
+  float h, ih;
+
+  /* add kernel normalization to gradients */
+  h = p->h;
+  ih = 1.0f / h;
+  const float ihdim = pow_dimension(ih);
+
+  p->primitives.gradients.rho[0] *= ihdim;
+  p->primitives.gradients.rho[1] *= ihdim;
+  p->primitives.gradients.rho[2] *= ihdim;
+
+  p->primitives.gradients.v[0][0] *= ihdim;
+  p->primitives.gradients.v[0][1] *= ihdim;
+  p->primitives.gradients.v[0][2] *= ihdim;
+  p->primitives.gradients.v[1][0] *= ihdim;
+  p->primitives.gradients.v[1][1] *= ihdim;
+  p->primitives.gradients.v[1][2] *= ihdim;
+  p->primitives.gradients.v[2][0] *= ihdim;
+  p->primitives.gradients.v[2][1] *= ihdim;
+  p->primitives.gradients.v[2][2] *= ihdim;
+
+  p->primitives.gradients.P[0] *= ihdim;
+  p->primitives.gradients.P[1] *= ihdim;
+  p->primitives.gradients.P[2] *= ihdim;
+
+  hydro_slope_limit_cell(p);
+}
diff --git a/src/hydro/Gizmo/hydro_gradients_sph.h b/src/hydro/Gizmo/hydro_gradients_sph.h
new file mode 100644
index 0000000000000000000000000000000000000000..f635faecea549f7da280ade9b944021a5e4aeb4c
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_gradients_sph.h
@@ -0,0 +1,248 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/**
+ * @brief Initialize gradient variables
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_init(
+    struct part *p) {
+
+  p->primitives.gradients.rho[0] = 0.0f;
+  p->primitives.gradients.rho[1] = 0.0f;
+  p->primitives.gradients.rho[2] = 0.0f;
+
+  p->primitives.gradients.v[0][0] = 0.0f;
+  p->primitives.gradients.v[0][1] = 0.0f;
+  p->primitives.gradients.v[0][2] = 0.0f;
+
+  p->primitives.gradients.v[1][0] = 0.0f;
+  p->primitives.gradients.v[1][1] = 0.0f;
+  p->primitives.gradients.v[1][2] = 0.0f;
+  p->primitives.gradients.v[2][0] = 0.0f;
+  p->primitives.gradients.v[2][1] = 0.0f;
+  p->primitives.gradients.v[2][2] = 0.0f;
+
+  p->primitives.gradients.P[0] = 0.0f;
+  p->primitives.gradients.P[1] = 0.0f;
+  p->primitives.gradients.P[2] = 0.0f;
+
+  hydro_slope_limit_cell_init(p);
+}
+
+/**
+ * @brief Gradient calculations done during the neighbour loop
+ *
+ * @param r2 Squared distance between the two particles.
+ * @param dx Distance vector (pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_collect(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
+
+  float wi, wi_dx, xi, hi_inv;
+  float wj, wj_dx, xj, hj_inv;
+  float r = sqrtf(r2);
+
+  hi_inv = 1.0f / hi;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* very basic gradient estimate */
+  pi->primitives.gradients.rho[0] -=
+      wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[1] -=
+      wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[2] -=
+      wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+  pi->primitives.gradients.v[0][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+  pi->primitives.gradients.v[1][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+  pi->primitives.gradients.v[2][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+  pi->primitives.gradients.P[0] -=
+      wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[1] -=
+      wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[2] -=
+      wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+  hydro_slope_limit_cell_collect(pi, pj, r);
+
+  hj_inv = 1.0f / hj;
+  xj = r * hj_inv;
+  kernel_deval(xj, &wj, &wj_dx);
+
+  /* signs are the same as before, since we swap i and j twice */
+  pj->primitives.gradients.rho[0] -=
+      wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pj->primitives.gradients.rho[1] -=
+      wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pj->primitives.gradients.rho[2] -=
+      wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+  pj->primitives.gradients.v[0][0] -=
+      wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pj->primitives.gradients.v[0][1] -=
+      wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pj->primitives.gradients.v[0][2] -=
+      wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+  pj->primitives.gradients.v[1][0] -=
+      wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pj->primitives.gradients.v[1][1] -=
+      wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pj->primitives.gradients.v[1][2] -=
+      wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pj->primitives.gradients.v[2][0] -=
+      wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pj->primitives.gradients.v[2][1] -=
+      wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pj->primitives.gradients.v[2][2] -=
+      wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+  pj->primitives.gradients.P[0] -=
+      wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+  pj->primitives.gradients.P[1] -=
+      wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+  pj->primitives.gradients.P[2] -=
+      wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+  hydro_slope_limit_cell_collect(pj, pi, r);
+}
+
+/**
+ * @brief Gradient calculations done during the neighbour loop: non-symmetric
+ * version
+ *
+ * @param r2 Squared distance between the two particles.
+ * @param dx Distance vector (pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void
+hydro_gradients_nonsym_collect(float r2, float *dx, float hi, float hj,
+                               struct part *pi, struct part *pj) {
+
+  float wi, wi_dx, xi, hi_inv;
+  float r = sqrtf(r2);
+
+  hi_inv = 1.0f / hi;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* very basic gradient estimate */
+  pi->primitives.gradients.rho[0] -=
+      wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[1] -=
+      wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[2] -=
+      wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+  pi->primitives.gradients.v[0][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+  pi->primitives.gradients.v[1][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+  pi->primitives.gradients.v[2][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+  pi->primitives.gradients.P[0] -=
+      wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[1] -=
+      wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[2] -=
+      wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+  hydro_slope_limit_cell_collect(pi, pj, r);
+}
+
+/**
+ * @brief Finalize the gradient variables after all data have been collected
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_gradients_finalize(
+    struct part *p) {
+
+  const float h = p->h;
+  const float ih = 1.0f / h;
+  const float ihdimp1 = pow_dimension_plus_one(ih);
+
+  float volume = p->geometry.volume;
+
+  /* finalize gradients by multiplying with volume */
+  p->primitives.gradients.rho[0] *= ihdimp1 * volume;
+  p->primitives.gradients.rho[1] *= ihdimp1 * volume;
+  p->primitives.gradients.rho[2] *= ihdimp1 * volume;
+
+  p->primitives.gradients.v[0][0] *= ihdimp1 * volume;
+  p->primitives.gradients.v[0][1] *= ihdimp1 * volume;
+  p->primitives.gradients.v[0][2] *= ihdimp1 * volume;
+
+  p->primitives.gradients.v[1][0] *= ihdimp1 * volume;
+  p->primitives.gradients.v[1][1] *= ihdimp1 * volume;
+  p->primitives.gradients.v[1][2] *= ihdimp1 * volume;
+
+  p->primitives.gradients.v[2][0] *= ihdimp1 * volume;
+  p->primitives.gradients.v[2][1] *= ihdimp1 * volume;
+  p->primitives.gradients.v[2][2] *= ihdimp1 * volume;
+
+  p->primitives.gradients.P[0] *= ihdimp1 * volume;
+  p->primitives.gradients.P[1] *= ihdimp1 * volume;
+  p->primitives.gradients.P[2] *= ihdimp1 * volume;
+
+  hydro_slope_limit_cell(p);
+}
diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h
index 30a8d6cbebc851b44a5ee2339950aec9e15057c0..cf2b9a223b49c3ce2fbd6874b83c523e8213a5ce 100644
--- a/src/hydro/Gizmo/hydro_iact.h
+++ b/src/hydro/Gizmo/hydro_iact.h
@@ -19,14 +19,28 @@
  *
  ******************************************************************************/
 
+#include "adiabatic_index.h"
+#include "hydro_gradients.h"
 #include "riemann.h"
 
-#define USE_GRADIENTS
-#define PER_FACE_LIMITER
-/* #define PRINT_ID 0 */
-
-/* this corresponds to task_subtype_hydro_loop1 */
-__attribute__((always_inline)) INLINE static void runner_iact_hydro_loop1(
+/**
+ * @brief Calculate the volume interaction between particle i and particle j
+ *
+ * The volume is in essence the same as the weighted number of neighbours in a
+ * classical SPH density calculation.
+ *
+ * We also calculate the components of the matrix E, which is used for second
+ * order accurate gradient calculations and for the calculation of the interface
+ * surface areas.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
   float r = sqrtf(r2);
@@ -48,71 +62,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_hydro_loop1(
   for (k = 0; k < 3; k++)
     for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi;
 
-#ifdef SPH_GRADIENTS
-  /* very basic gradient estimate */
-  pi->primitives.gradients.rho[0] -=
-      wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
-  pi->primitives.gradients.rho[1] -=
-      wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
-  pi->primitives.gradients.rho[2] -=
-      wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
-
-  pi->primitives.gradients.v[0][0] -=
-      wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-  pi->primitives.gradients.v[0][1] -=
-      wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-  pi->primitives.gradients.v[0][2] -=
-      wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-
-  pi->primitives.gradients.v[1][0] -=
-      wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-  pi->primitives.gradients.v[1][1] -=
-      wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-  pi->primitives.gradients.v[1][2] -=
-      wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-
-  pi->primitives.gradients.v[2][0] -=
-      wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-  pi->primitives.gradients.v[2][1] -=
-      wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-  pi->primitives.gradients.v[2][2] -=
-      wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-
-  pi->primitives.gradients.P[0] -=
-      wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
-  pi->primitives.gradients.P[1] -=
-      wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
-  pi->primitives.gradients.P[2] -=
-      wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
-
-  /* basic slope limiter: collect the maximal and the minimal value for the
-   * primitive variables among the ngbs */
-  pi->primitives.limiter.rho[0] =
-      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
-  pi->primitives.limiter.rho[1] =
-      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
-
-  pi->primitives.limiter.v[0][0] =
-      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
-  pi->primitives.limiter.v[0][1] =
-      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
-  pi->primitives.limiter.v[1][0] =
-      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
-  pi->primitives.limiter.v[1][1] =
-      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
-  pi->primitives.limiter.v[2][0] =
-      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
-  pi->primitives.limiter.v[2][1] =
-      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
-
-  pi->primitives.limiter.P[0] =
-      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
-  pi->primitives.limiter.P[1] =
-      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
-
-  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
-#endif
-
   /* Compute density of pj. */
   h_inv = 1.0 / hj;
   xj = r * h_inv;
@@ -125,78 +74,28 @@ __attribute__((always_inline)) INLINE static void runner_iact_hydro_loop1(
   pj->geometry.volume += wj;
   for (k = 0; k < 3; k++)
     for (l = 0; l < 3; l++) pj->geometry.matrix_E[k][l] += dx[k] * dx[l] * wj;
-
-#ifdef SPH_GRADIENTS
-  /* very basic gradient estimate */
-  /* signs are the same as before, since we swap i and j twice */
-  pj->primitives.gradients.rho[0] -=
-      wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
-  pj->primitives.gradients.rho[1] -=
-      wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
-  pj->primitives.gradients.rho[2] -=
-      wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
-
-  pj->primitives.gradients.v[0][0] -=
-      wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-  pj->primitives.gradients.v[0][1] -=
-      wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-  pj->primitives.gradients.v[0][2] -=
-      wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-
-  pj->primitives.gradients.v[1][0] -=
-      wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-  pj->primitives.gradients.v[1][1] -=
-      wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-  pj->primitives.gradients.v[1][2] -=
-      wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-
-  pj->primitives.gradients.v[2][0] -=
-      wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-  pj->primitives.gradients.v[2][1] -=
-      wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-  pj->primitives.gradients.v[2][2] -=
-      wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-
-  pj->primitives.gradients.P[0] -=
-      wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
-  pj->primitives.gradients.P[1] -=
-      wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
-  pj->primitives.gradients.P[2] -=
-      wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
-
-  /* basic slope limiter: collect the maximal and the minimal value for the
-   * primitive variables among the ngbs */
-  pj->primitives.limiter.rho[0] =
-      fmin(pi->primitives.rho, pj->primitives.limiter.rho[0]);
-  pj->primitives.limiter.rho[1] =
-      fmax(pi->primitives.rho, pj->primitives.limiter.rho[1]);
-
-  pj->primitives.limiter.v[0][0] =
-      fmin(pi->primitives.v[0], pj->primitives.limiter.v[0][0]);
-  pj->primitives.limiter.v[0][1] =
-      fmax(pi->primitives.v[0], pj->primitives.limiter.v[0][1]);
-  pj->primitives.limiter.v[1][0] =
-      fmin(pi->primitives.v[1], pj->primitives.limiter.v[1][0]);
-  pj->primitives.limiter.v[1][1] =
-      fmax(pi->primitives.v[1], pj->primitives.limiter.v[1][1]);
-  pj->primitives.limiter.v[2][0] =
-      fmin(pi->primitives.v[2], pj->primitives.limiter.v[2][0]);
-  pj->primitives.limiter.v[2][1] =
-      fmax(pi->primitives.v[2], pj->primitives.limiter.v[2][1]);
-
-  pj->primitives.limiter.P[0] =
-      fmin(pi->primitives.P, pj->primitives.limiter.P[0]);
-  pj->primitives.limiter.P[1] =
-      fmax(pi->primitives.P, pj->primitives.limiter.P[1]);
-
-  pj->primitives.limiter.maxr = fmax(r, pj->primitives.limiter.maxr);
-#endif
 }
 
-/* this corresponds to task_subtype_hydro_loop1 */
-__attribute__((always_inline)) INLINE static void
-runner_iact_nonsym_hydro_loop1(float r2, float *dx, float hi, float hj,
-                               struct part *pi, struct part *pj) {
+/**
+ * @brief Calculate the volume interaction between particle i and particle j:
+ * non-symmetric version
+ *
+ * The volume is in essence the same as the weighted number of neighbours in a
+ * classical SPH density calculation.
+ *
+ * We also calculate the components of the matrix E, which is used for second
+ * order accurate gradient calculations and for the calculation of the interface
+ * surface areas.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
   float r;
   float xi;
@@ -218,397 +117,80 @@ runner_iact_nonsym_hydro_loop1(float r2, float *dx, float hi, float hj,
   pi->geometry.volume += wi;
   for (k = 0; k < 3; k++)
     for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi;
-
-#ifdef SPH_GRADIENTS
-  /* very basic gradient estimate */
-  pi->primitives.gradients.rho[0] -=
-      wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
-  pi->primitives.gradients.rho[1] -=
-      wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
-  pi->primitives.gradients.rho[2] -=
-      wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
-
-  pi->primitives.gradients.v[0][0] -=
-      wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-  pi->primitives.gradients.v[0][1] -=
-      wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-  pi->primitives.gradients.v[0][2] -=
-      wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
-
-  pi->primitives.gradients.v[1][0] -=
-      wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-  pi->primitives.gradients.v[1][1] -=
-      wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-  pi->primitives.gradients.v[1][2] -=
-      wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
-
-  pi->primitives.gradients.v[2][0] -=
-      wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-  pi->primitives.gradients.v[2][1] -=
-      wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-  pi->primitives.gradients.v[2][2] -=
-      wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
-
-  pi->primitives.gradients.P[0] -=
-      wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
-  pi->primitives.gradients.P[1] -=
-      wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
-  pi->primitives.gradients.P[2] -=
-      wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
-
-  /* slope limiter */
-  pi->primitives.limiter.rho[0] =
-      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
-  pi->primitives.limiter.rho[1] =
-      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
-
-  pi->primitives.limiter.v[0][0] =
-      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
-  pi->primitives.limiter.v[0][1] =
-      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
-  pi->primitives.limiter.v[1][0] =
-      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
-  pi->primitives.limiter.v[1][1] =
-      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
-  pi->primitives.limiter.v[2][0] =
-      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
-  pi->primitives.limiter.v[2][1] =
-      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
-
-  pi->primitives.limiter.P[0] =
-      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
-  pi->primitives.limiter.P[1] =
-      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
-
-  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
-#endif
 }
 
-__attribute__((always_inline)) INLINE static void runner_iact_hydro_loop2(
+/**
+ * @brief Calculate the gradient interaction between particle i and particle j
+ *
+ * This method wraps around hydro_gradients_collect, which can be an empty
+ * method, in which case no gradients are used.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_gradient(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
-#ifndef SPH_GRADIENTS
-
-  float r = sqrtf(r2);
-  float xi, xj;
-  float hi_inv, hj_inv;
-  float wi, wj, wi_dx, wj_dx;
-  int k, l;
-  float Bi[3][3];
-  float Bj[3][3];
-  GFLOAT Wi[5], Wj[5];
-
-  /* Initialize local variables */
-  for (k = 0; k < 3; k++) {
-    for (l = 0; l < 3; l++) {
-      Bi[k][l] = pi->geometry.matrix_E[k][l];
-      Bj[k][l] = pj->geometry.matrix_E[k][l];
-    }
-  }
-  Wi[0] = pi->primitives.rho;
-  Wi[1] = pi->primitives.v[0];
-  Wi[2] = pi->primitives.v[1];
-  Wi[3] = pi->primitives.v[2];
-  Wi[4] = pi->primitives.P;
-  Wj[0] = pj->primitives.rho;
-  Wj[1] = pj->primitives.v[0];
-  Wj[2] = pj->primitives.v[1];
-  Wj[3] = pj->primitives.v[2];
-  Wj[4] = pj->primitives.P;
-
-  /* Compute kernel of pi. */
-  hi_inv = 1.0 / hi;
-  xi = r * hi_inv;
-  kernel_deval(xi, &wi, &wi_dx);
-
-  /* Compute gradients for pi */
-  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
-   * definition of dx */
-  pi->primitives.gradients.rho[0] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.rho[1] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.rho[2] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.v[0][0] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[0][1] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[0][2] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[1][0] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[1][1] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[1][2] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[2][0] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[2][1] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[2][2] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.P[0] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.P[1] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.P[2] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  /* basic slope limiter: collect the maximal and the minimal value for the
-   * primitive variables among the ngbs */
-  pi->primitives.limiter.rho[0] =
-      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
-  pi->primitives.limiter.rho[1] =
-      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
-
-  pi->primitives.limiter.v[0][0] =
-      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
-  pi->primitives.limiter.v[0][1] =
-      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
-  pi->primitives.limiter.v[1][0] =
-      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
-  pi->primitives.limiter.v[1][1] =
-      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
-  pi->primitives.limiter.v[2][0] =
-      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
-  pi->primitives.limiter.v[2][1] =
-      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
-
-  pi->primitives.limiter.P[0] =
-      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
-  pi->primitives.limiter.P[1] =
-      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
-
-  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
-
-  /* Compute kernel of pj. */
-  hj_inv = 1.0 / hj;
-  xj = r * hj_inv;
-  kernel_deval(xj, &wj, &wj_dx);
-
-  /* Compute gradients for pj */
-  /* there is no sign difference w.r.t. eqn. (6) because dx is now what we want
-   * it to be */
-  pj->primitives.gradients.rho[0] +=
-      (Wi[0] - Wj[0]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.rho[1] +=
-      (Wi[0] - Wj[0]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.rho[2] +=
-      (Wi[0] - Wj[0]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-
-  pj->primitives.gradients.v[0][0] +=
-      (Wi[1] - Wj[1]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.v[0][1] +=
-      (Wi[1] - Wj[1]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.v[0][2] +=
-      (Wi[1] - Wj[1]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-  pj->primitives.gradients.v[1][0] +=
-      (Wi[2] - Wj[2]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.v[1][1] +=
-      (Wi[2] - Wj[2]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.v[1][2] +=
-      (Wi[2] - Wj[2]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-  pj->primitives.gradients.v[2][0] +=
-      (Wi[3] - Wj[3]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.v[2][1] +=
-      (Wi[3] - Wj[3]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.v[2][2] +=
-      (Wi[3] - Wj[3]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-
-  pj->primitives.gradients.P[0] +=
-      (Wi[4] - Wj[4]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.P[1] +=
-      (Wi[4] - Wj[4]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.P[2] +=
-      (Wi[4] - Wj[4]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-
-  /* basic slope limiter: collect the maximal and the minimal value for the
-   * primitive variables among the ngbs */
-  pj->primitives.limiter.rho[0] =
-      fmin(pi->primitives.rho, pj->primitives.limiter.rho[0]);
-  pj->primitives.limiter.rho[1] =
-      fmax(pi->primitives.rho, pj->primitives.limiter.rho[1]);
-
-  pj->primitives.limiter.v[0][0] =
-      fmin(pi->primitives.v[0], pj->primitives.limiter.v[0][0]);
-  pj->primitives.limiter.v[0][1] =
-      fmax(pi->primitives.v[0], pj->primitives.limiter.v[0][1]);
-  pj->primitives.limiter.v[1][0] =
-      fmin(pi->primitives.v[1], pj->primitives.limiter.v[1][0]);
-  pj->primitives.limiter.v[1][1] =
-      fmax(pi->primitives.v[1], pj->primitives.limiter.v[1][1]);
-  pj->primitives.limiter.v[2][0] =
-      fmin(pi->primitives.v[2], pj->primitives.limiter.v[2][0]);
-  pj->primitives.limiter.v[2][1] =
-      fmax(pi->primitives.v[2], pj->primitives.limiter.v[2][1]);
-
-  pj->primitives.limiter.P[0] =
-      fmin(pi->primitives.P, pj->primitives.limiter.P[0]);
-  pj->primitives.limiter.P[1] =
-      fmax(pi->primitives.P, pj->primitives.limiter.P[1]);
-
-  pj->primitives.limiter.maxr = fmax(r, pj->primitives.limiter.maxr);
-
-#endif
+  hydro_gradients_collect(r2, dx, hi, hj, pi, pj);
 }
 
-__attribute__((always_inline)) INLINE static void
-runner_iact_nonsym_hydro_loop2(float r2, float *dx, float hi, float hj,
-                               struct part *pi, struct part *pj) {
-
-#ifndef SPH_GRADIENTS
-
-  float r = sqrtf(r2);
-  float xi;
-  float hi_inv;
-  float wi, wi_dx;
-  int k, l;
-  float Bi[3][3];
-  GFLOAT Wi[5], Wj[5];
-
-  /* Initialize local variables */
-  for (k = 0; k < 3; k++) {
-    for (l = 0; l < 3; l++) {
-      Bi[k][l] = pi->geometry.matrix_E[k][l];
-    }
-  }
-  Wi[0] = pi->primitives.rho;
-  Wi[1] = pi->primitives.v[0];
-  Wi[2] = pi->primitives.v[1];
-  Wi[3] = pi->primitives.v[2];
-  Wi[4] = pi->primitives.P;
-  Wj[0] = pj->primitives.rho;
-  Wj[1] = pj->primitives.v[0];
-  Wj[2] = pj->primitives.v[1];
-  Wj[3] = pj->primitives.v[2];
-  Wj[4] = pj->primitives.P;
-
-  /* Compute kernel of pi. */
-  hi_inv = 1.0 / hi;
-  xi = r * hi_inv;
-  kernel_deval(xi, &wi, &wi_dx);
-
-  /* Compute gradients for pi */
-  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
-   * definition of dx */
-  pi->primitives.gradients.rho[0] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.rho[1] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.rho[2] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.v[0][0] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[0][1] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[0][2] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[1][0] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[1][1] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[1][2] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[2][0] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[2][1] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[2][2] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.P[0] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.P[1] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.P[2] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  /* slope limiter */
-  pi->primitives.limiter.rho[0] =
-      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
-  pi->primitives.limiter.rho[1] =
-      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
-
-  pi->primitives.limiter.v[0][0] =
-      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
-  pi->primitives.limiter.v[0][1] =
-      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
-  pi->primitives.limiter.v[1][0] =
-      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
-  pi->primitives.limiter.v[1][1] =
-      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
-  pi->primitives.limiter.v[2][0] =
-      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
-  pi->primitives.limiter.v[2][1] =
-      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
-
-  pi->primitives.limiter.P[0] =
-      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
-  pi->primitives.limiter.P[1] =
-      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
-
-  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
+/**
+ * @brief Calculate the gradient interaction between particle i and particle j:
+ * non-symmetric version
+ *
+ * This method wraps around hydro_gradients_nonsym_collect, which can be an
+ * empty method, in which case no gradients are used.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_gradient(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
-#endif
+  hydro_gradients_nonsym_collect(r2, dx, hi, hj, pi, pj);
 }
 
+/**
+ * @brief Common part of the flux calculation between particle i and j
+ *
+ * Since the only difference between the symmetric and non-symmetric version
+ * of the flux calculation  is in the update of the conserved variables at the
+ * very end (which is not done for particle j if mode is 0 and particle j is
+ * active), both runner_iact_force and runner_iact_nonsym_force call this
+ * method, with an appropriate mode.
+ *
+ * This method calculates the surface area of the interface between particle i
+ * and particle j, as well as the interface position and velocity. These are
+ * then used to reconstruct and predict the primitive variables, which are then
+ * fed to a Riemann solver that calculates a flux. This flux is used to update
+ * the conserved variables of particle i or both particles.
+ *
+ * This method also calculates the maximal velocity used to calculate the time
+ * step.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
 __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj,
     int mode) {
 
   float r = sqrtf(r2);
   float xi, xj;
-  float hi_inv, hi_inv2;
-  float hj_inv, hj_inv2;
+  float hi_inv, hi_inv_dim;
+  float hj_inv, hj_inv_dim;
   float wi, wj, wi_dx, wj_dx;
   int k, l;
   float A[3];
@@ -619,13 +201,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   float xij_i[3], xfac, xijdotdx;
   float vmax, dvdotdx;
   float vi[3], vj[3], vij[3];
-  GFLOAT Wi[5], Wj[5];  //, Whalf[5];
-#ifdef USE_GRADIENTS
-  GFLOAT dWi[5], dWj[5];
-  float xij_j[3];
-#endif
-  //    GFLOAT rhoe;
-  //    GFLOAT flux[5][3];
+  float Wi[5], Wj[5];
   float dti, dtj, mindt;
   float n_unit[3];
 
@@ -635,8 +211,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
       Bi[k][l] = pi->geometry.matrix_E[k][l];
       Bj[k][l] = pj->geometry.matrix_E[k][l];
     }
-    vi[k] = pi->v[k]; /* particle velocities */
-    vj[k] = pj->v[k];
+    vi[k] = pi->force.v_full[k]; /* particle velocities */
+    vj[k] = pj->force.v_full[k];
   }
   Vi = pi->geometry.volume;
   Vj = pj->geometry.volume;
@@ -650,51 +226,68 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   Wj[2] = pj->primitives.v[1];
   Wj[3] = pj->primitives.v[2];
   Wj[4] = pj->primitives.P;
-  dti = pi->ti_end - pi->ti_begin;  // MATTHIEU
-  dtj = pj->ti_end - pj->ti_begin;
 
-  //    if(dti > 1.e-7 || dtj > 1.e-7){
-  //        message("Timestep too large: %g %g!", dti, dtj);
-  //    }
+  dti = pi->force.dt;
+  dtj = pj->force.dt;
 
   /* calculate the maximal signal velocity */
-  vmax = sqrtf(const_hydro_gamma * Wi[4] / Wi[0]) +
-         sqrtf(const_hydro_gamma * Wj[4] / Wj[0]);
+  if (Wi[0] && Wj[0]) {
+    vmax =
+        sqrtf(hydro_gamma * Wi[4] / Wi[0]) + sqrtf(hydro_gamma * Wj[4] / Wj[0]);
+  } else {
+    vmax = 0.0f;
+  }
   dvdotdx = (Wi[1] - Wj[1]) * dx[0] + (Wi[2] - Wj[2]) * dx[1] +
             (Wi[3] - Wj[3]) * dx[2];
   if (dvdotdx > 0.) {
     vmax -= dvdotdx / r;
   }
-  pi->timestepvars.vmax = fmaxf(pi->timestepvars.vmax, vmax);
+  pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax);
   if (mode == 1) {
-    pj->timestepvars.vmax = fmaxf(pj->timestepvars.vmax, vmax);
+    pj->timestepvars.vmax = max(pj->timestepvars.vmax, vmax);
   }
 
   /* The flux will be exchanged using the smallest time step of the two
    * particles */
-  mindt = fminf(dti, dtj);
+  mindt = min(dti, dtj);
+  dti = mindt;
+  dtj = mindt;
 
   /* Compute kernel of pi. */
   hi_inv = 1.0 / hi;
-  hi_inv2 = hi_inv * hi_inv;
+  hi_inv_dim = pow_dimension(hi_inv);
   xi = r * hi_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
   /* Compute kernel of pj. */
   hj_inv = 1.0 / hj;
-  hj_inv2 = hj_inv * hj_inv;
+  hj_inv_dim = pow_dimension(hj_inv);
   xj = r * hj_inv;
   kernel_deval(xj, &wj, &wj_dx);
 
+  /* Compute h_dt. We are going to use an SPH-like estimate of div_v for that */
+  float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] +
+               (pi->v[2] - pj->v[2]) * dx[2];
+  float ri = 1.0f / r;
+  float hidp1 = pow_dimension_plus_one(hi_inv);
+  float hjdp1 = pow_dimension_plus_one(hj_inv);
+  float wi_dr = hidp1 * wi_dx;
+  float wj_dr = hjdp1 * wj_dx;
+  dvdr *= ri;
+  pi->force.h_dt -= pj->conserved.mass * dvdr / pj->primitives.rho * wi_dr;
+  if (mode == 1) {
+    pj->force.h_dt -= pi->conserved.mass * dvdr / pi->primitives.rho * wj_dr;
+  }
+
   /* Compute area */
   /* eqn. (7) */
   Anorm = 0.0f;
   for (k = 0; k < 3; k++) {
     /* we add a minus sign since dx is pi->x - pj->x */
     A[k] = -Vi * (Bi[k][0] * dx[0] + Bi[k][1] * dx[1] + Bi[k][2] * dx[2]) * wi *
-               hi_inv * hi_inv2 -
+               hi_inv_dim -
            Vj * (Bj[k][0] * dx[0] + Bj[k][1] * dx[1] + Bj[k][2] * dx[2]) * wj *
-               hj_inv * hj_inv2;
+               hj_inv_dim;
     Anorm += A[k] * A[k];
   }
 
@@ -708,13 +301,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   Anorm = sqrtf(Anorm);
   for (k = 0; k < 3; k++) n_unit[k] = A[k] / Anorm;
 
-#ifdef PRINT_ID
-  if (pi->id == PRINT_ID || pj->id == PRINT_ID) {
-    printf("pi: %g %g %g\npj: %g %g %g\nA = %g %g %g\n", pi->x[0], pi->x[1],
-           pi->x[2], pj->x[0], pj->x[1], pj->x[2], A[0], A[1], A[2]);
-  }
-#endif
-
   /* Compute interface position (relative to pi, since we don't need the actual
    * position) */
   /* eqn. (8) */
@@ -746,192 +332,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   Wj[2] -= vij[1];
   Wj[3] -= vij[2];
 
-#ifdef USE_GRADIENTS
-  /* perform gradient reconstruction in space and time */
-  /* space */
-  /* Compute interface position (relative to pj, since we don't need the actual
-   * position) */
-  /* eqn. (8) */
-  xfac = hj / (hi + hj);
-  for (k = 0; k < 3; k++) xij_j[k] = xfac * dx[k];
-
-  dWi[0] = pi->primitives.gradients.rho[0] * xij_i[0] +
-           pi->primitives.gradients.rho[1] * xij_i[1] +
-           pi->primitives.gradients.rho[2] * xij_i[2];
-  dWi[1] = pi->primitives.gradients.v[0][0] * xij_i[0] +
-           pi->primitives.gradients.v[0][1] * xij_i[1] +
-           pi->primitives.gradients.v[0][2] * xij_i[2];
-  dWi[2] = pi->primitives.gradients.v[1][0] * xij_i[0] +
-           pi->primitives.gradients.v[1][1] * xij_i[1] +
-           pi->primitives.gradients.v[1][2] * xij_i[2];
-  dWi[3] = pi->primitives.gradients.v[2][0] * xij_i[0] +
-           pi->primitives.gradients.v[2][1] * xij_i[1] +
-           pi->primitives.gradients.v[2][2] * xij_i[2];
-  dWi[4] = pi->primitives.gradients.P[0] * xij_i[0] +
-           pi->primitives.gradients.P[1] * xij_i[1] +
-           pi->primitives.gradients.P[2] * xij_i[2];
-
-  dWj[0] = pj->primitives.gradients.rho[0] * xij_j[0] +
-           pj->primitives.gradients.rho[1] * xij_j[1] +
-           pj->primitives.gradients.rho[2] * xij_j[2];
-  dWj[1] = pj->primitives.gradients.v[0][0] * xij_j[0] +
-           pj->primitives.gradients.v[0][1] * xij_j[1] +
-           pj->primitives.gradients.v[0][2] * xij_j[2];
-  dWj[2] = pj->primitives.gradients.v[1][0] * xij_j[0] +
-           pj->primitives.gradients.v[1][1] * xij_j[1] +
-           pj->primitives.gradients.v[1][2] * xij_j[2];
-  dWj[3] = pj->primitives.gradients.v[2][0] * xij_j[0] +
-           pj->primitives.gradients.v[2][1] * xij_j[1] +
-           pj->primitives.gradients.v[2][2] * xij_j[2];
-  dWj[4] = pj->primitives.gradients.P[0] * xij_j[0] +
-           pj->primitives.gradients.P[1] * xij_j[1] +
-           pj->primitives.gradients.P[2] * xij_j[2];
-
-#ifdef PER_FACE_LIMITER
-
-  float xij_i_norm;
-  GFLOAT phi_i, phi_j;
-  GFLOAT delta1, delta2;
-  GFLOAT phiminus, phiplus;
-  GFLOAT phimin, phimax;
-  GFLOAT phibar;
-  /* free parameters, values from Hopkins */
-  GFLOAT psi1 = 0.5, psi2 = 0.25;
-  GFLOAT phi_mid0, phi_mid;
-
-  for (k = 0; k < 10; k++) {
-    if (k < 5) {
-      phi_i = Wi[k];
-      phi_j = Wj[k];
-      phi_mid0 = Wi[k] + dWi[k];
-      xij_i_norm = sqrtf(xij_i[0] * xij_i[0] + xij_i[1] * xij_i[1] +
-                         xij_i[2] * xij_i[2]);
-    } else {
-      phi_i = Wj[k - 5];
-      phi_j = Wi[k - 5];
-      phi_mid0 = Wj[k - 5] + dWj[k - 5];
-      xij_i_norm = sqrtf(xij_j[0] * xij_j[0] + xij_j[1] * xij_j[1] +
-                         xij_j[2] * xij_j[2]);
-    }
-
-    delta1 = psi1 * fabs(phi_i - phi_j);
-    delta2 = psi2 * fabs(phi_i - phi_j);
-
-    phimin = fmin(phi_i, phi_j);
-    phimax = fmax(phi_i, phi_j);
-
-    phibar = phi_i + xij_i_norm / r * (phi_j - phi_i);
-
-    /* if sign(phimax+delta1) == sign(phimax) */
-    if ((phimax + delta1) * phimax > 0.0f) {
-      phiplus = phimax + delta1;
-    } else {
-      phiplus = phimax / (1.0f + delta1 / fabs(phimax));
-    }
-
-    /* if sign(phimin-delta1) == sign(phimin) */
-    if ((phimin - delta1) * phimin > 0.0f) {
-      phiminus = phimin - delta1;
-    } else {
-      phiminus = phimin / (1.0f + delta1 / fabs(phimin));
-    }
-
-    if (phi_i == phi_j) {
-      phi_mid = phi_i;
-    } else {
-      if (phi_i < phi_j) {
-        phi_mid = fmax(phiminus, fmin(phibar + delta2, phi_mid0));
-      } else {
-        phi_mid = fmin(phiplus, fmax(phibar - delta2, phi_mid0));
-      }
-    }
-
-    if (k < 5) {
-      dWi[k] = phi_mid - phi_i;
-    } else {
-      dWj[k - 5] = phi_mid - phi_i;
-    }
-  }
-
-#endif
-
-  //    printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
-  //    printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
-
-  /* time */
-  dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] +
-                           Wi[2] * pi->primitives.gradients.rho[1] +
-                           Wi[3] * pi->primitives.gradients.rho[2] +
-                           Wi[0] * (pi->primitives.gradients.v[0][0] +
-                                    pi->primitives.gradients.v[1][1] +
-                                    pi->primitives.gradients.v[2][2]));
-  dWi[1] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[0][0] +
-                           Wi[2] * pi->primitives.gradients.v[0][1] +
-                           Wi[3] * pi->primitives.gradients.v[0][2] +
-                           pi->primitives.gradients.P[0] / Wi[0]);
-  dWi[2] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[1][0] +
-                           Wi[2] * pi->primitives.gradients.v[1][1] +
-                           Wi[3] * pi->primitives.gradients.v[1][2] +
-                           pi->primitives.gradients.P[1] / Wi[0]);
-  dWi[3] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[2][0] +
-                           Wi[2] * pi->primitives.gradients.v[2][1] +
-                           Wi[3] * pi->primitives.gradients.v[2][2] +
-                           pi->primitives.gradients.P[2] / Wi[0]);
-  dWi[4] -= 0.5 * mindt *
-            (Wi[1] * pi->primitives.gradients.P[0] +
-             Wi[2] * pi->primitives.gradients.P[1] +
-             Wi[3] * pi->primitives.gradients.P[2] +
-             const_hydro_gamma * Wi[4] * (pi->primitives.gradients.v[0][0] +
-                                          pi->primitives.gradients.v[1][1] +
-                                          pi->primitives.gradients.v[2][2]));
-
-  dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] +
-                           Wj[2] * pj->primitives.gradients.rho[1] +
-                           Wj[3] * pj->primitives.gradients.rho[2] +
-                           Wj[0] * (pj->primitives.gradients.v[0][0] +
-                                    pj->primitives.gradients.v[1][1] +
-                                    pj->primitives.gradients.v[2][2]));
-  dWj[1] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[0][0] +
-                           Wj[2] * pj->primitives.gradients.v[0][1] +
-                           Wj[3] * pj->primitives.gradients.v[0][2] +
-                           pj->primitives.gradients.P[0] / Wj[0]);
-  dWj[2] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[1][0] +
-                           Wj[2] * pj->primitives.gradients.v[1][1] +
-                           Wj[3] * pj->primitives.gradients.v[1][2] +
-                           pj->primitives.gradients.P[1] / Wj[0]);
-  dWj[3] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[2][0] +
-                           Wj[2] * pj->primitives.gradients.v[2][1] +
-                           Wj[3] * pj->primitives.gradients.v[2][2] +
-                           pj->primitives.gradients.P[2] / Wj[0]);
-  dWj[4] -= 0.5 * mindt *
-            (Wj[1] * pj->primitives.gradients.P[0] +
-             Wj[2] * pj->primitives.gradients.P[1] +
-             Wj[3] * pj->primitives.gradients.P[2] +
-             const_hydro_gamma * Wj[4] * (pj->primitives.gradients.v[0][0] +
-                                          pj->primitives.gradients.v[1][1] +
-                                          pj->primitives.gradients.v[2][2]));
-
-  //    printf("WL: %g %g %g %g %g\n", Wi[0], Wi[1], Wi[2], Wi[3], Wi[4]);
-  //    printf("WR: %g %g %g %g %g\n", Wj[0], Wj[1], Wj[2], Wj[3], Wj[4]);
-
-  //    printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
-  //    printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
-
-  Wi[0] += dWi[0];
-  Wi[1] += dWi[1];
-  Wi[2] += dWi[2];
-  Wi[3] += dWi[3];
-  Wi[4] += dWi[4];
-
-  Wj[0] += dWj[0];
-  Wj[1] += dWj[1];
-  Wj[2] += dWj[2];
-  Wj[3] += dWj[3];
-  Wj[4] += dWj[4];
-#endif
-
-  /* apply slope limiter interface by interface */
-  /* ... to be done ... */
+  hydro_gradients_predict(pi, pj, hi, hj, dx, r, xij_i, Wi, Wj, mindt);
 
   /* we don't need to rotate, we can use the unit vector in the Riemann problem
    * itself (see GIZMO) */
@@ -940,7 +341,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
     printf("mindt: %g\n", mindt);
     printf("WL: %g %g %g %g %g\n", pi->primitives.rho, pi->primitives.v[0],
            pi->primitives.v[1], pi->primitives.v[2], pi->primitives.P);
+#ifdef USE_GRADIENTS
     printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
+#endif
     printf("gradWL[0]: %g %g %g\n", pi->primitives.gradients.rho[0],
            pi->primitives.gradients.rho[1], pi->primitives.gradients.rho[2]);
     printf("gradWL[1]: %g %g %g\n", pi->primitives.gradients.v[0][0],
@@ -954,7 +357,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
     printf("WL': %g %g %g %g %g\n", Wi[0], Wi[1], Wi[2], Wi[3], Wi[4]);
     printf("WR: %g %g %g %g %g\n", pj->primitives.rho, pj->primitives.v[0],
            pj->primitives.v[1], pj->primitives.v[2], pj->primitives.P);
+#ifdef USE_GRADIENTS
     printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
+#endif
     printf("gradWR[0]: %g %g %g\n", pj->primitives.gradients.rho[0],
            pj->primitives.gradients.rho[1], pj->primitives.gradients.rho[2]);
     printf("gradWR[1]: %g %g %g\n", pj->primitives.gradients.v[0][0],
@@ -969,61 +374,133 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
     error("Negative density or pressure!\n");
   }
 
-  GFLOAT totflux[5];
+  float totflux[5];
   riemann_solve_for_flux(Wi, Wj, n_unit, vij, totflux);
 
+  /* Store mass flux */
+  float mflux = dti * Anorm * totflux[0];
+  pi->gravity.mflux[0] += mflux * dx[0];
+  pi->gravity.mflux[1] += mflux * dx[1];
+  pi->gravity.mflux[2] += mflux * dx[2];
+
   /* Update conserved variables */
   /* eqn. (16) */
-  pi->conserved.mass -= mindt * Anorm * totflux[0];
-  pi->conserved.momentum[0] -= mindt * Anorm * totflux[1];
-  pi->conserved.momentum[1] -= mindt * Anorm * totflux[2];
-  pi->conserved.momentum[2] -= mindt * Anorm * totflux[3];
-  pi->conserved.energy -= mindt * Anorm * totflux[4];
-
-#ifdef THERMAL_ENERGY
-  float ekin = 0.5 * (pi->primitives.v[0] * pi->primitives.v[0] +
-                      pi->primitives.v[1] * pi->primitives.v[1] +
-                      pi->primitives.v[2] * pi->primitives.v[2]);
-  pi->conserved.energy += mindt * Anorm * totflux[1] * pi->primitives.v[0];
-  pi->conserved.energy += mindt * Anorm * totflux[2] * pi->primitives.v[1];
-  pi->conserved.energy += mindt * Anorm * totflux[3] * pi->primitives.v[2];
-  pi->conserved.energy -= mindt * Anorm * totflux[0] * ekin;
-#endif
-
-  /* the non symmetric version is never called when using mindt, whether this
-   * piece of code
-   * should always be executed or only in the symmetric case is currently
-   * unclear */
-  if (mode == 1) {
-    pj->conserved.mass += mindt * Anorm * totflux[0];
-    pj->conserved.momentum[0] += mindt * Anorm * totflux[1];
-    pj->conserved.momentum[1] += mindt * Anorm * totflux[2];
-    pj->conserved.momentum[2] += mindt * Anorm * totflux[3];
-    pj->conserved.energy += mindt * Anorm * totflux[4];
-
-#ifdef THERMAL_ENERGY
-    ekin = 0.5 * (pj->primitives.v[0] * pj->primitives.v[0] +
-                  pj->primitives.v[1] * pj->primitives.v[1] +
-                  pj->primitives.v[2] * pj->primitives.v[2]);
-    pj->conserved.energy -= mindt * Anorm * totflux[1] * pj->primitives.v[0];
-    pj->conserved.energy -= mindt * Anorm * totflux[2] * pj->primitives.v[1];
-    pj->conserved.energy -= mindt * Anorm * totflux[3] * pj->primitives.v[2];
-    pj->conserved.energy += mindt * Anorm * totflux[0] * ekin;
-#endif
+  pi->conserved.flux.mass -= dti * Anorm * totflux[0];
+  pi->conserved.flux.momentum[0] -= dti * Anorm * totflux[1];
+  pi->conserved.flux.momentum[1] -= dti * Anorm * totflux[2];
+  pi->conserved.flux.momentum[2] -= dti * Anorm * totflux[3];
+  pi->conserved.flux.energy -= dti * Anorm * totflux[4];
+
+  float ekin = 0.5f * (pi->primitives.v[0] * pi->primitives.v[0] +
+                       pi->primitives.v[1] * pi->primitives.v[1] +
+                       pi->primitives.v[2] * pi->primitives.v[2]);
+  pi->conserved.flux.energy += dti * Anorm * totflux[1] * pi->primitives.v[0];
+  pi->conserved.flux.energy += dti * Anorm * totflux[2] * pi->primitives.v[1];
+  pi->conserved.flux.energy += dti * Anorm * totflux[3] * pi->primitives.v[2];
+  pi->conserved.flux.energy -= dti * Anorm * totflux[0] * ekin;
+
+  /* here is how it works:
+     Mode will only be 1 if both particles are ACTIVE and they are in the same
+     cell. In this case, this method IS the flux calculation for particle j, and
+     we HAVE TO UPDATE it.
+     Mode 0 can mean several things: it can mean that particle j is INACTIVE, in
+     which case we NEED TO UPDATE it, since otherwise the flux is lost from the
+     system and the conserved variable is not conserved.
+     It can also mean that particle j sits in another cell and is ACTIVE. In
+     this case, the flux exchange for particle j is done TWICE and we SHOULD NOT
+     UPDATE particle j.
+     ==> we update particle j if (MODE IS 1) OR (j IS INACTIVE)
+  */
+  if (mode == 1 || pj->ti_end > pi->ti_end) {
+    /* Store mass flux */
+    mflux = dtj * Anorm * totflux[0];
+    pj->gravity.mflux[0] -= mflux * dx[0];
+    pj->gravity.mflux[1] -= mflux * dx[1];
+    pj->gravity.mflux[2] -= mflux * dx[2];
+
+    pj->conserved.flux.mass += dtj * Anorm * totflux[0];
+    pj->conserved.flux.momentum[0] += dtj * Anorm * totflux[1];
+    pj->conserved.flux.momentum[1] += dtj * Anorm * totflux[2];
+    pj->conserved.flux.momentum[2] += dtj * Anorm * totflux[3];
+    pj->conserved.flux.energy += dtj * Anorm * totflux[4];
+
+    ekin = 0.5f * (pj->primitives.v[0] * pj->primitives.v[0] +
+                   pj->primitives.v[1] * pj->primitives.v[1] +
+                   pj->primitives.v[2] * pj->primitives.v[2]);
+    pj->conserved.flux.energy -= dtj * Anorm * totflux[1] * pj->primitives.v[0];
+    pj->conserved.flux.energy -= dtj * Anorm * totflux[2] * pj->primitives.v[1];
+    pj->conserved.flux.energy -= dtj * Anorm * totflux[3] * pj->primitives.v[2];
+    pj->conserved.flux.energy += dtj * Anorm * totflux[0] * ekin;
   }
 }
 
-/* this corresponds to task_subtype_fluxes */
-__attribute__((always_inline)) INLINE static void runner_iact_hydro_loop3(
+/**
+ * @brief Flux calculation between particle i and particle j
+ *
+ * This method calls runner_iact_fluxes_common with mode 1.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_force(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
   runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 1);
 }
 
-/* this corresponds to task_subtype_fluxes */
-__attribute__((always_inline)) INLINE static void
-runner_iact_nonsym_hydro_loop3(float r2, float *dx, float hi, float hj,
-                               struct part *pi, struct part *pj) {
+/**
+ * @brief Flux calculation between particle i and particle j: non-symmetric
+ * version
+ *
+ * This method calls runner_iact_fluxes_common with mode 0.
+ *
+ * @param r2 Squared distance between particle i and particle j.
+ * @param dx Distance vector between the particles (dx = pi->x - pj->x).
+ * @param hi Smoothing length of particle i.
+ * @param hj Smoothing length of particle j.
+ * @param pi Particle i.
+ * @param pj Particle j.
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
   runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0);
 }
+
+//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...)
+
+__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
+    float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
+    struct part **pj) {
+  error(
+      "Vectorised versions of the Gizmo interaction functions do not exist "
+      "yet!");
+}
+
+__attribute__((always_inline)) INLINE static void
+runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
+                               struct part **pi, struct part **pj) {
+  error(
+      "Vectorised versions of the Gizmo interaction functions do not exist "
+      "yet!");
+}
+
+__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
+    float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
+    struct part **pj) {
+  error(
+      "Vectorised versions of the Gizmo interaction functions do not exist "
+      "yet!");
+}
+
+__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
+    float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
+    struct part **pj) {
+  error(
+      "Vectorised versions of the Gizmo interaction functions do not exist "
+      "yet!");
+}
diff --git a/src/hydro/Gizmo/hydro_io.h b/src/hydro/Gizmo/hydro_io.h
index 3c51653d994bd9f01864bcc24c6886eba25d1d05..e5f221ae4345dc519a50d332131ecf296f318338 100644
--- a/src/hydro/Gizmo/hydro_io.h
+++ b/src/hydro/Gizmo/hydro_io.h
@@ -1,6 +1,6 @@
 /*******************************************************************************
  * This file is part of SWIFT.
- * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ * Coypright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -17,77 +17,124 @@
  *
  ******************************************************************************/
 
+#include "adiabatic_index.h"
+#include "hydro_gradients.h"
+#include "hydro_slope_limiters.h"
+#include "io_properties.h"
+#include "riemann.h"
+
 /**
- * @brief Reads the different particles to the HDF5 file
+ * @brief Specifies which particle fields to read from a dataset
  *
- * @param h_grp The HDF5 group in which to read the arrays.
- * @param N The number of particles on that MPI rank.
- * @param N_total The total number of particles (only used in MPI mode)
- * @param offset The offset of the particles for this MPI rank (only used in MPI
- *mode)
- * @param parts The particle array
+ * @param parts The particle array.
+ * @param list The list of i/o properties to read.
+ * @param num_fields The number of i/o fields to read.
+ */
+void hydro_read_particles(struct part* parts, struct io_props* list,
+                          int* num_fields) {
+
+  *num_fields = 8;
+
+  /* List what we want to read */
+  list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY,
+                                UNIT_CONV_LENGTH, parts, x);
+  list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY,
+                                UNIT_CONV_SPEED, parts, v);
+  list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS,
+                                parts, conserved.mass);
+  list[3] = io_make_input_field("SmoothingLength", FLOAT, 1, COMPULSORY,
+                                UNIT_CONV_LENGTH, parts, h);
+  list[4] = io_make_input_field("InternalEnergy", FLOAT, 1, COMPULSORY,
+                                UNIT_CONV_ENERGY_PER_UNIT_MASS, parts,
+                                conserved.energy);
+  list[5] = io_make_input_field("ParticleIDs", ULONGLONG, 1, COMPULSORY,
+                                UNIT_CONV_NO_UNITS, parts, id);
+  list[6] = io_make_input_field("Accelerations", FLOAT, 3, OPTIONAL,
+                                UNIT_CONV_ACCELERATION, parts, a_hydro);
+  list[7] = io_make_input_field("Density", FLOAT, 1, OPTIONAL,
+                                UNIT_CONV_DENSITY, parts, primitives.rho);
+}
+
+/**
+ * @brief Get the internal energy of a particle
  *
+ * @param e #engine.
+ * @param p Particle.
+ * @return Internal energy of the particle
  */
-__attribute__((always_inline)) INLINE static void hydro_read_particles(
-    hid_t h_grp, int N, long long N_total, long long offset,
-    struct part* parts) {
-
-  /* Read arrays */
-  readArray(h_grp, "Coordinates", DOUBLE, N, 3, parts, N_total, offset, x,
-            COMPULSORY);
-  readArray(h_grp, "Velocities", FLOAT, N, 3, parts, N_total, offset, v,
-            COMPULSORY);
-  readArray(h_grp, "Masses", FLOAT, N, 1, parts, N_total, offset,
-            conserved.mass, COMPULSORY);
-  readArray(h_grp, "SmoothingLength", FLOAT, N, 1, parts, N_total, offset, h,
-            COMPULSORY);
-  readArray(h_grp, "InternalEnergy", FLOAT, N, 1, parts, N_total, offset,
-            primitives.P, COMPULSORY);
-  readArray(h_grp, "ParticleIDs", ULONGLONG, N, 1, parts, N_total, offset, id,
-            COMPULSORY);
-  readArray(h_grp, "Acceleration", FLOAT, N, 3, parts, N_total, offset, a_hydro,
-            OPTIONAL);
-  readArray(h_grp, "Density", FLOAT, N, 1, parts, N_total, offset,
-            primitives.rho, OPTIONAL);
+float convert_u(struct engine* e, struct part* p) {
+  return p->primitives.P / hydro_gamma_minus_one / p->primitives.rho;
 }
 
 /**
- * @brief Writes the different particles to the HDF5 file
+ * @brief Get the entropic function of a particle
  *
- * @param h_grp The HDF5 group in which to write the arrays.
- * @param fileName The name of the file (unsued in MPI mode).
- * @param xmfFile The XMF file to write to (unused in MPI mode).
- * @param N The number of particles on that MPI rank.
- * @param N_total The total number of particles (only used in MPI mode)
- * @param mpi_rank The MPI rank of this node (only used in MPI mode)
- * @param offset The offset of the particles for this MPI rank (only used in MPI
- *mode)
- * @param parts The particle array
- * @param us The unit system to use
+ * @param e #engine.
+ * @param p Particle.
+ * @return Entropic function of the particle
+ */
+float convert_A(struct engine* e, struct part* p) {
+  return p->primitives.P / pow_gamma(p->primitives.rho);
+}
+
+/**
+ * @brief Get the total energy of a particle
  *
+ * @param e #engine.
+ * @param p Particle.
+ * @return Total energy of the particle
  */
-__attribute__((always_inline)) INLINE static void hydro_write_particles(
-    hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total,
-    int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) {
-
-  /* Write arrays */
-  writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts,
-             N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
-  writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, conserved.mass, us, UNIT_CONV_MASS);
-  writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, primitives.P, us,
-             UNIT_CONV_ENTROPY_PER_UNIT_MASS);
-  writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts,
-             N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS);
-  writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION);
-  writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, primitives.rho, us, UNIT_CONV_DENSITY);
+float convert_Etot(struct engine* e, struct part* p) {
+  float momentum2;
+
+  momentum2 = p->conserved.momentum[0] * p->conserved.momentum[0] +
+              p->conserved.momentum[1] * p->conserved.momentum[1] +
+              p->conserved.momentum[2] * p->conserved.momentum[2];
+
+  return p->conserved.energy + 0.5f * momentum2 / p->conserved.mass;
+}
+
+/**
+ * @brief Specifies which particle fields to write to a dataset
+ *
+ * @param parts The particle array.
+ * @param list The list of i/o properties to write.
+ * @param num_fields The number of i/o fields to write.
+ */
+void hydro_write_particles(struct part* parts, struct io_props* list,
+                           int* num_fields) {
+
+  *num_fields = 13;
+
+  /* List what we want to write */
+  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
+                                 parts, x);
+  list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts,
+                                 primitives.v);
+  list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts,
+                                 conserved.mass);
+  list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH,
+                                 parts, h);
+  list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1,
+                                              UNIT_CONV_ENERGY_PER_UNIT_MASS,
+                                              parts, primitives.P, convert_u);
+  list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
+                                 UNIT_CONV_NO_UNITS, parts, id);
+  list[6] = io_make_output_field("Acceleration", FLOAT, 3,
+                                 UNIT_CONV_ACCELERATION, parts, a_hydro);
+  list[7] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts,
+                                 primitives.rho);
+  list[8] = io_make_output_field("Volume", FLOAT, 1, UNIT_CONV_VOLUME, parts,
+                                 geometry.volume);
+  list[9] = io_make_output_field("GradDensity", FLOAT, 3, UNIT_CONV_DENSITY,
+                                 parts, primitives.gradients.rho);
+  list[10] = io_make_output_field_convert_part(
+      "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A);
+  list[11] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE,
+                                  parts, primitives.P);
+  list[12] =
+      io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY,
+                                        parts, conserved.energy, convert_Etot);
 }
 
 /**
@@ -95,26 +142,24 @@ __attribute__((always_inline)) INLINE static void hydro_write_particles(
  * @param h_grpsph The HDF5 group in which to write
  */
 void writeSPHflavour(hid_t h_grpsph) {
+  /* Gradient information */
+  writeAttribute_s(h_grpsph, "Gradient reconstruction model",
+                   HYDRO_GRADIENT_IMPLEMENTATION);
+
+  /* Slope limiter information */
+  writeAttribute_s(h_grpsph, "Cell wide slope limiter model",
+                   HYDRO_SLOPE_LIMITER_CELL_IMPLEMENTATION);
+  writeAttribute_s(h_grpsph, "Piecewise slope limiter model",
+                   HYDRO_SLOPE_LIMITER_FACE_IMPLEMENTATION);
 
-  /* Kernel function description */
-  writeAttribute_s(h_grpsph, "Kernel", kernel_name);
-  writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel);
-  writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh);
-  writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh);
-  writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma);
-
-  /* Viscosity and thermal conduction */
-  writeAttribute_s(h_grpsph, "Thermal Conductivity Model",
-                   "(No treatment) Legacy Gadget-2 as in Springel (2005)");
-  writeAttribute_s(h_grpsph, "Viscosity Model",
-                   "Legacy Gadget-2 as in Springel (2005)");
-  writeAttribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha);
-  writeAttribute_f(h_grpsph, "Viscosity beta", 3.f);
-
-  /* Time integration properties */
-  writeAttribute_f(h_grpsph, "CFL parameter", const_cfl);
-  writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt",
-                   const_ln_max_h_change);
-  writeAttribute_f(h_grpsph, "Maximal Delta h change over dt",
-                   exp(const_ln_max_h_change));
+  /* Riemann solver information */
+  writeAttribute_s(h_grpsph, "Riemann solver type",
+                   RIEMANN_SOLVER_IMPLEMENTATION);
 }
+
+/**
+ * @brief Are we writing entropy in the internal energy field ?
+ *
+ * @return 1 if entropy is in 'internal energy', 0 otherwise.
+ */
+int writeEntropyFlag() { return 0; }
diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h
index 9e5f32f758248d1d1616f4556c81fc8e0b52e83b..c4919ff173c64a4a83a5d1bf61ab82697cc03096 100644
--- a/src/hydro/Gizmo/hydro_part.h
+++ b/src/hydro/Gizmo/hydro_part.h
@@ -1,8 +1,6 @@
 /*******************************************************************************
  * This file is part of SWIFT.
- * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *                    Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ * Coypright (c) 2014 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -18,28 +16,24 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
+#ifndef SWIFT_GIZMO_HYDRO_PART_H
+#define SWIFT_GIZMO_HYDRO_PART_H
 
-/* Some standard headers. */
-#include <stdlib.h>
-
-#define GFLOAT float
+#include "cooling_struct.h"
 
 /* Extra particle data not needed during the computation. */
 struct xpart {
 
-  /* Old position, at last tree rebuild. */
-  double x_old[3];
+  /* Offset between current position and position at last tree rebuild. */
+  float x_diff[3];
 
   /* Velocity at the last full step. */
   float v_full[3];
 
-  /* Entropy at the half-step. */
-  float u_hdt;
-
-  /* Old density. */
-  float omega;
+  /* Additional data used to record cooling information */
+  struct cooling_xpart_data cooling_data;
 
-} __attribute__((aligned(xpart_align)));
+} SWIFT_STRUCT_ALIGN;
 
 /* Data of a single particle. */
 struct part {
@@ -47,18 +41,13 @@ struct part {
   /* Particle position. */
   double x[3];
 
-  /* Particle velocity. */
+  /* Particle predicted velocity. */
   float v[3];
 
   /* Particle acceleration. */
   float a_hydro[3];
 
-  float mass;  // MATTHIEU
-  float h_dt;
-  float rho;
-  float rho_dh;
-
-  /* Particle cutoff radius. */
+  /* Particle smoothing length. */
   float h;
 
   /* Particle time of beginning of time-step. */
@@ -67,76 +56,103 @@ struct part {
   /* Particle time of end of time-step. */
   int ti_end;
 
-  /* The primitive hydrodynamical variables */
+  /* Old internal energy flux */
+  float du_dt;
+
+  /* The primitive hydrodynamical variables. */
   struct {
 
-    /* fluid velocity */
-    GFLOAT v[3];
+    /* Fluid velocity. */
+    float v[3];
 
-    /* density */
-    GFLOAT rho;
+    /* Density. */
+    float rho;
 
-    /* pressure */
-    GFLOAT P;
+    /* Pressure. */
+    float P;
 
+    /* Gradients of the primitive variables. */
     struct {
 
-      GFLOAT rho[3];
+      /* Density gradients. */
+      float rho[3];
 
-      GFLOAT v[3][3];
+      /* Fluid velocity gradients. */
+      float v[3][3];
 
-      GFLOAT P[3];
+      /* Pressure gradients. */
+      float P[3];
 
     } gradients;
 
+    /* Quantities needed by the slope limiter. */
     struct {
 
-      /* extreme values among the neighbours */
-      GFLOAT rho[2];
+      /* Extreme values of the density among the neighbours. */
+      float rho[2];
 
-      GFLOAT v[3][2];
+      /* Extreme values of the fluid velocity among the neighbours. */
+      float v[3][2];
 
-      GFLOAT P[2];
+      /* Extreme values of the pressure among the neighbours. */
+      float P[2];
 
-      /* maximal distance to all neighbouring faces */
+      /* Maximal distance to all neighbouring faces. */
       float maxr;
 
     } limiter;
 
   } primitives;
 
-  /* The conserved hydrodynamical variables */
+  /* The conserved hydrodynamical variables. */
   struct {
 
-    /* fluid momentum */
-    GFLOAT momentum[3];
+    /* Fluid momentum. */
+    float momentum[3];
+
+    /* Fluid mass */
+    float mass;
+
+    /* Fluid thermal energy (not per unit mass!). */
+    float energy;
 
-    /* fluid mass */
-    GFLOAT mass;
+    /* Fluxes. */
+    struct {
+
+      /* Mass flux. */
+      float mass;
+
+      /* Momentum flux. */
+      float momentum[3];
 
-    /* fluid energy */
-    GFLOAT energy;
+      /* Energy flux. */
+      float energy;
+
+    } flux;
 
   } conserved;
 
-  /* Geometrical quantities used for hydro */
+  /* Geometrical quantities used for hydro. */
   struct {
 
-    /* volume of the particle */
+    /* Volume of the particle. */
     float volume;
 
-    /* gradient matrix */
+    /* Geometrical shear matrix used to calculate second order accurate
+       gradients */
     float matrix_E[3][3];
 
   } geometry;
 
+  /* Variables used for timestep calculation (currently not used). */
   struct {
 
+    /* Maximum fluid velocity among all neighbours. */
     float vmax;
 
   } timestepvars;
 
-  /* Quantities used during the density loop */
+  /* Quantities used during the volume (=density) loop. */
   struct {
 
     /* Particle velocity divergence. */
@@ -153,10 +169,40 @@ struct part {
 
   } density;
 
+  /* Quantities used during the force loop. */
+  struct {
+
+    /* Needed to drift the primitive variables. */
+    float h_dt;
+
+    /* Physical time step of the particle. */
+    float dt;
+
+    /* Actual velocity of the particle. */
+    float v_full[3];
+
+  } force;
+
+  /* Specific stuff for the gravity-hydro coupling. */
+  struct {
+
+    /* Previous value of the gravitational acceleration. */
+    float old_a[3];
+
+    /* Previous value of the mass flux vector. */
+    float old_mflux[3];
+
+    /* Current value of the mass flux vector. */
+    float mflux[3];
+
+  } gravity;
+
   /* Particle ID. */
-  unsigned long long id;
+  long long id;
 
   /* Associated gravitas. */
   struct gpart *gpart;
 
-} __attribute__((aligned(part_align)));
+} SWIFT_STRUCT_ALIGN;
+
+#endif /* SWIFT_GIZMO_HYDRO_PART_H */
diff --git a/src/hydro/Gizmo/hydro_slope_limiters.h b/src/hydro/Gizmo/hydro_slope_limiters.h
new file mode 100644
index 0000000000000000000000000000000000000000..cd66f05ac9eb9d51744723d93f899b0c8c668e2e
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_slope_limiters.h
@@ -0,0 +1,94 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_HYDRO_SLOPE_LIMITERS_H
+#define SWIFT_HYDRO_SLOPE_LIMITERS_H
+
+#include "dimension.h"
+#include "kernel_hydro.h"
+
+#ifdef SLOPE_LIMITER_PER_FACE
+
+#define HYDRO_SLOPE_LIMITER_FACE_IMPLEMENTATION \
+  "GIZMO piecewise slope limiter (Hopkins 2015)"
+#include "hydro_slope_limiters_face.h"
+
+#else
+
+#define HYDRO_SLOPE_LIMITER_FACE_IMPLEMENTATION "No piecewise slope limiter"
+
+/**
+ * @brief Slope limit the slopes at the interface between two particles
+ *
+ * @param Wi Hydrodynamic variables of particle i.
+ * @param Wj Hydrodynamic variables of particle j.
+ * @param dWi Difference between the hydrodynamic variables of particle i at the
+ * position of particle i and at the interface position.
+ * @param dWj Difference between the hydrodynamic variables of particle j at the
+ * position of particle j and at the interface position.
+ * @param xij_i Relative position vector of the interface w.r.t. particle i.
+ * @param xij_j Relative position vector of the interface w.r.t. partilce j.
+ * @param r Distance between particle i and particle j.
+ */
+__attribute__((always_inline)) INLINE static void hydro_slope_limit_face(
+    float *Wi, float *Wj, float *dWi, float *dWj, float *xij_i, float *xij_j,
+    float r) {}
+
+#endif
+
+#ifdef SLOPE_LIMITER_CELL_WIDE
+
+#define HYDRO_SLOPE_LIMITER_CELL_IMPLEMENTATION \
+  "Cell wide slope limiter (Springel 2010)"
+#include "hydro_slope_limiters_cell.h"
+
+#else
+
+#define HYDRO_SLOPE_LIMITER_CELL_IMPLEMENTATION "No cell wide slope limiter"
+
+/**
+ * @brief Initialize variables for the cell wide slope limiter
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_slope_limit_cell_init(
+    struct part *p) {}
+
+/**
+ * @brief Collect information for the cell wide slope limiter during the
+ * neighbour loop
+ *
+ * @param pi Particle i.
+ * @param pj Particle j.
+ * @param r Distance between particle i and particle j.
+ */
+__attribute__((always_inline)) INLINE static void
+hydro_slope_limit_cell_collect(struct part *pi, struct part *pj, float r) {}
+
+/**
+ * @brief Slope limit cell gradients
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_slope_limit_cell(
+    struct part *p) {}
+
+#endif
+
+#endif  // SWIFT_HYDRO_SLOPE_LIMITERS_H
diff --git a/src/hydro/Gizmo/hydro_slope_limiters_cell.h b/src/hydro/Gizmo/hydro_slope_limiters_cell.h
new file mode 100644
index 0000000000000000000000000000000000000000..aa99b43721f669f47a7888a5da0b1933ca1ebd62
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_slope_limiters_cell.h
@@ -0,0 +1,173 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <float.h>
+
+/**
+ * @brief Initialize variables for the cell wide slope limiter
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_slope_limit_cell_init(
+    struct part* p) {
+
+  p->primitives.limiter.rho[0] = FLT_MAX;
+  p->primitives.limiter.rho[1] = -FLT_MAX;
+  p->primitives.limiter.v[0][0] = FLT_MAX;
+  p->primitives.limiter.v[0][1] = -FLT_MAX;
+  p->primitives.limiter.v[1][0] = FLT_MAX;
+  p->primitives.limiter.v[1][1] = -FLT_MAX;
+  p->primitives.limiter.v[2][0] = FLT_MAX;
+  p->primitives.limiter.v[2][1] = -FLT_MAX;
+  p->primitives.limiter.P[0] = FLT_MAX;
+  p->primitives.limiter.P[1] = -FLT_MAX;
+
+  p->primitives.limiter.maxr = -FLT_MAX;
+}
+
+/**
+ * @brief Collect information for the cell wide slope limiter during the
+ * neighbour loop
+ *
+ * @param pi Particle i.
+ * @param pj Particle j.
+ * @param r Distance between particle i and particle j.
+ */
+__attribute__((always_inline)) INLINE static void
+hydro_slope_limit_cell_collect(struct part* pi, struct part* pj, float r) {
+
+  /* basic slope limiter: collect the maximal and the minimal value for the
+   * primitive variables among the ngbs */
+  pi->primitives.limiter.rho[0] =
+      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
+  pi->primitives.limiter.rho[1] =
+      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
+
+  pi->primitives.limiter.v[0][0] =
+      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
+  pi->primitives.limiter.v[0][1] =
+      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
+  pi->primitives.limiter.v[1][0] =
+      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
+  pi->primitives.limiter.v[1][1] =
+      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
+  pi->primitives.limiter.v[2][0] =
+      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
+  pi->primitives.limiter.v[2][1] =
+      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
+
+  pi->primitives.limiter.P[0] =
+      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
+  pi->primitives.limiter.P[1] =
+      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
+
+  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
+}
+
+/**
+ * @brief Slope limit cell gradients
+ *
+ * @param p Particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_slope_limit_cell(
+    struct part* p) {
+
+  float gradrho[3], gradv[3][3], gradP[3];
+  float gradtrue, gradmax, gradmin, alpha;
+
+  gradrho[0] = p->primitives.gradients.rho[0];
+  gradrho[1] = p->primitives.gradients.rho[1];
+  gradrho[2] = p->primitives.gradients.rho[2];
+
+  gradv[0][0] = p->primitives.gradients.v[0][0];
+  gradv[0][1] = p->primitives.gradients.v[0][1];
+  gradv[0][2] = p->primitives.gradients.v[0][2];
+
+  gradv[1][0] = p->primitives.gradients.v[1][0];
+  gradv[1][1] = p->primitives.gradients.v[1][1];
+  gradv[1][2] = p->primitives.gradients.v[1][2];
+
+  gradv[2][0] = p->primitives.gradients.v[2][0];
+  gradv[2][1] = p->primitives.gradients.v[2][1];
+  gradv[2][2] = p->primitives.gradients.v[2][2];
+
+  gradP[0] = p->primitives.gradients.P[0];
+  gradP[1] = p->primitives.gradients.P[1];
+  gradP[2] = p->primitives.gradients.P[2];
+
+  gradtrue = sqrtf(gradrho[0] * gradrho[0] + gradrho[1] * gradrho[1] +
+                   gradrho[2] * gradrho[2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.rho[1] - p->primitives.rho;
+    gradmin = p->primitives.rho - p->primitives.limiter.rho[0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.rho[0] *= alpha;
+    p->primitives.gradients.rho[1] *= alpha;
+    p->primitives.gradients.rho[2] *= alpha;
+  }
+
+  gradtrue = sqrtf(gradv[0][0] * gradv[0][0] + gradv[0][1] * gradv[0][1] +
+                   gradv[0][2] * gradv[0][2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[0][1] - p->primitives.v[0];
+    gradmin = p->primitives.v[0] - p->primitives.limiter.v[0][0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[0][0] *= alpha;
+    p->primitives.gradients.v[0][1] *= alpha;
+    p->primitives.gradients.v[0][2] *= alpha;
+  }
+
+  gradtrue = sqrtf(gradv[1][0] * gradv[1][0] + gradv[1][1] * gradv[1][1] +
+                   gradv[1][2] * gradv[1][2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[1][1] - p->primitives.v[1];
+    gradmin = p->primitives.v[1] - p->primitives.limiter.v[1][0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[1][0] *= alpha;
+    p->primitives.gradients.v[1][1] *= alpha;
+    p->primitives.gradients.v[1][2] *= alpha;
+  }
+
+  gradtrue = sqrtf(gradv[2][0] * gradv[2][0] + gradv[2][1] * gradv[2][1] +
+                   gradv[2][2] * gradv[2][2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[2][1] - p->primitives.v[2];
+    gradmin = p->primitives.v[2] - p->primitives.limiter.v[2][0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[2][0] *= alpha;
+    p->primitives.gradients.v[2][1] *= alpha;
+    p->primitives.gradients.v[2][2] *= alpha;
+  }
+
+  gradtrue =
+      sqrtf(gradP[0] * gradP[0] + gradP[1] * gradP[1] + gradP[2] * gradP[2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.P[1] - p->primitives.P;
+    gradmin = p->primitives.P - p->primitives.limiter.P[0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.P[0] *= alpha;
+    p->primitives.gradients.P[1] *= alpha;
+    p->primitives.gradients.P[2] *= alpha;
+  }
+}
diff --git a/src/hydro/Gizmo/hydro_slope_limiters_face.h b/src/hydro/Gizmo/hydro_slope_limiters_face.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ae5dd2eb073d9aae8ab6f2efffdf8df15b4bb4a
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_slope_limiters_face.h
@@ -0,0 +1,121 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/**
+ * @brief Slope limit a single quantity at the interface
+ *
+ * @param phi_i Value of the quantity at the particle position.
+ * @param phi_j Value of the quantity at the neighbouring particle position.
+ * @param phi_mid0 Extrapolated value of the quantity at the interface position.
+ * @param xij_norm Distance between the particle position and the interface
+ * position.
+ * @param r Distance between the particle and its neighbour.
+ * @return The slope limited difference between the quantity at the particle
+ * position and the quantity at the interface position.
+ */
+__attribute__((always_inline)) INLINE static float
+hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
+                                float xij_norm, float r) {
+
+  float delta1, delta2, phimin, phimax, phibar, phiplus, phiminus, phi_mid;
+  const float psi1 = 0.5f;
+  const float psi2 = 0.25f;
+
+  if (phi_i == phi_j) {
+    return 0.0f;
+  }
+
+  delta1 = psi1 * fabs(phi_i - phi_j);
+  delta2 = psi2 * fabs(phi_i - phi_j);
+
+  phimin = fmin(phi_i, phi_j);
+  phimax = fmax(phi_i, phi_j);
+
+  phibar = phi_i + xij_norm / r * (phi_j - phi_i);
+
+  /* if sign(phimax+delta1) == sign(phimax) */
+  if ((phimax + delta1) * phimax > 0.0f) {
+    phiplus = phimax + delta1;
+  } else {
+    phiplus = phimax / (1.0f + delta1 / fabs(phimax));
+  }
+
+  /* if sign(phimin-delta1) == sign(phimin) */
+  if ((phimin - delta1) * phimin > 0.0f) {
+    phiminus = phimin - delta1;
+  } else {
+    phiminus = phimin / (1.0f + delta1 / fabs(phimin));
+  }
+
+  if (phi_i < phi_j) {
+    phi_mid = fmax(phiminus, fmin(phibar + delta2, phi_mid0));
+  } else {
+    phi_mid = fmin(phiplus, fmax(phibar - delta2, phi_mid0));
+  }
+
+  return phi_mid - phi_i;
+}
+
+/**
+ * @brief Slope limit the slopes at the interface between two particles
+ *
+ * @param Wi Hydrodynamic variables of particle i.
+ * @param Wj Hydrodynamic variables of particle j.
+ * @param dWi Difference between the hydrodynamic variables of particle i at the
+ * position of particle i and at the interface position.
+ * @param dWj Difference between the hydrodynamic variables of particle j at the
+ * position of particle j and at the interface position.
+ * @param xij_i Relative position vector of the interface w.r.t. particle i.
+ * @param xij_j Relative position vector of the interface w.r.t. partilce j.
+ * @param r Distance between particle i and particle j.
+ */
+__attribute__((always_inline)) INLINE static void hydro_slope_limit_face(
+    float *Wi, float *Wj, float *dWi, float *dWj, float *xij_i, float *xij_j,
+    float r) {
+
+  float xij_i_norm, xij_j_norm;
+
+  xij_i_norm =
+      sqrtf(xij_i[0] * xij_i[0] + xij_i[1] * xij_i[1] + xij_i[2] * xij_i[2]);
+
+  xij_j_norm =
+      sqrtf(xij_j[0] * xij_j[0] + xij_j[1] * xij_j[1] + xij_j[2] * xij_j[2]);
+
+  dWi[0] = hydro_slope_limit_face_quantity(Wi[0], Wj[0], Wi[0] + dWi[0],
+                                           xij_i_norm, r);
+  dWi[1] = hydro_slope_limit_face_quantity(Wi[1], Wj[1], Wi[1] + dWi[1],
+                                           xij_i_norm, r);
+  dWi[2] = hydro_slope_limit_face_quantity(Wi[2], Wj[2], Wi[2] + dWi[2],
+                                           xij_i_norm, r);
+  dWi[3] = hydro_slope_limit_face_quantity(Wi[3], Wj[3], Wi[3] + dWi[3],
+                                           xij_i_norm, r);
+  dWi[4] = hydro_slope_limit_face_quantity(Wi[4], Wj[4], Wi[4] + dWi[4],
+                                           xij_i_norm, r);
+
+  dWj[0] = hydro_slope_limit_face_quantity(Wj[0], Wi[0], Wj[0] + dWj[0],
+                                           xij_j_norm, r);
+  dWj[1] = hydro_slope_limit_face_quantity(Wj[1], Wi[1], Wj[1] + dWj[1],
+                                           xij_j_norm, r);
+  dWj[2] = hydro_slope_limit_face_quantity(Wj[2], Wi[2], Wj[2] + dWj[2],
+                                           xij_j_norm, r);
+  dWj[3] = hydro_slope_limit_face_quantity(Wj[3], Wi[3], Wj[3] + dWj[3],
+                                           xij_j_norm, r);
+  dWj[4] = hydro_slope_limit_face_quantity(Wj[4], Wi[4], Wj[4] + dWj[4],
+                                           xij_j_norm, r);
+}
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index a5d73aad02372aa22b840c2cb0d3100cd439e75d..eeb389537c56876126c60b2d29a728029c72844b 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -29,12 +29,12 @@
  * term is implemented.
  *
  * This corresponds to equations (43), (44), (45), (101), (103)  and (104) with
- * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of
- * Price, D., Journal of Computational Physics, 2012, Volume 231, Issue 3,
- * pp. 759-794.
+ * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of Price, D., Journal of Computational
+ * Physics, 2012, Volume 231, Issue 3, pp. 759-794.
  */
 
 #include "adiabatic_index.h"
+#include "approx_math.h"
 #include "dimension.h"
 #include "equation_of_state.h"
 #include "hydro_properties.h"
@@ -102,6 +102,28 @@ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
   return gas_soundspeed_from_internal_energy(p->rho, u);
 }
 
+/**
+ * @brief Returns the density of a particle
+ *
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_density(
+    const struct part *restrict p) {
+
+  return p->rho;
+}
+
+/**
+ * @brief Returns the mass of a particle
+ *
+ * @param p The particle of interest
+ */
+__attribute__((always_inline)) INLINE static float hydro_get_mass(
+    const struct part *restrict p) {
+
+  return p->mass;
+}
+
 /**
  * @brief Modifies the thermal state of a particle to the imposed internal
  * energy
@@ -286,15 +308,32 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  * Additional hydrodynamic quantites are drifted forward in time here. These
  * include thermal quantities (thermal energy or total energy or entropy, ...).
  *
- * @param p The particle
- * @param xp The extended data of the particle
- * @param t0 The time at the start of the drift (on the timeline)
- * @param t1 The time at the end of the drift (on the timeline)
- * @param timeBase The minimal time-step size
+ * @param p The particle.
+ * @param xp The extended data of the particle.
+ * @param dt The drift time-step.
+ * @param t0 The time at the start of the drift (on the timeline).
+ * @param t1 The time at the end of the drift (on the timeline).
+ * @param timeBase The minimal time-step size.
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, const struct xpart *restrict xp, int t0, int t1,
-    double timeBase) {
+    struct part *restrict p, const struct xpart *restrict xp, float dt, int t0,
+    int t1, double timeBase) {
+
+  const float h_inv = 1.f / p->h;
+
+  /* Predict smoothing length */
+  const float w1 = p->force.h_dt * h_inv * dt;
+  if (fabsf(w1) < 0.2f)
+    p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
+  else
+    p->h *= expf(w1);
+
+  /* Predict density */
+  const float w2 = -hydro_dimension * w1;
+  if (fabsf(w2) < 0.2f)
+    p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
+  else
+    p->rho *= expf(w2);
 
   /* Drift the pressure */
   const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase;
diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h
index a8a855d9db81f6927c1d8b45410a57d50a8366de..edb060e4fd71fcc136e1bedf6e8a752d1d50d54f 100644
--- a/src/hydro/Minimal/hydro_iact.h
+++ b/src/hydro/Minimal/hydro_iact.h
@@ -28,12 +28,12 @@
  * term is implemented.
  *
  * This corresponds to equations (43), (44), (45), (101), (103)  and (104) with
- * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of
- * Price, D., Journal of Computational Physics, 2012, Volume 231, Issue 3,
- * pp. 759-794.
+ * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of Price, D., Journal of Computational
+ * Physics, 2012, Volume 231, Issue 3, pp. 759-794.
  */
 
 #include "adiabatic_index.h"
+#include "minmax.h"
 
 /**
  * @brief Density loop
@@ -161,7 +161,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
                      (pi->v[2] - pj->v[2]) * dx[2];
 
   /* Are the particles moving towards each others ? */
-  const float omega_ij = fminf(dvdr, 0.f);
+  const float omega_ij = min(dvdr, 0.f);
   const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */
 
   /* Compute sound speeds and signal velocity */
@@ -212,8 +212,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
-  pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
+  pj->force.v_sig = max(pj->force.v_sig, v_sig);
 }
 
 /**
@@ -272,7 +272,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
                      (pi->v[2] - pj->v[2]) * dx[2];
 
   /* Are the particles moving towards each others ? */
-  const float omega_ij = fminf(dvdr, 0.f);
+  const float omega_ij = min(dvdr, 0.f);
   const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */
 
   /* Compute sound speeds and signal velocity */
@@ -315,7 +315,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr;
 
   /* Update the signal velocity. */
-  pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
+  pi->force.v_sig = max(pi->force.v_sig, v_sig);
 }
 
 /**
diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h
index ad65f8b44fc67f4aae6470246cbab91bc3710007..8e23bddf5153043421319810683266b27d297f93 100644
--- a/src/hydro/Minimal/hydro_part.h
+++ b/src/hydro/Minimal/hydro_part.h
@@ -28,11 +28,12 @@
  * term is implemented.
  *
  * This corresponds to equations (43), (44), (45), (101), (103)  and (104) with
- * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of
- * Price, D., Journal of Computational Physics, 2012, Volume 231, Issue 3,
- * pp. 759-794.
+ * \f$\beta=3\f$ and \f$\alpha_u=0\f$ of Price, D., Journal of Computational
+ * Physics, 2012, Volume 231, Issue 3, pp. 759-794.
  */
 
+#include "cooling_struct.h"
+
 /**
  * @brief Particle fields not needed during the SPH loops over neighbours.
  *
@@ -42,12 +43,16 @@
  */
 struct xpart {
 
-  float x_diff[3]; /*!< Offset between current position and position at last
-                      tree rebuild. */
+  /*! Offset between current position and position at last tree rebuild. */
+  float x_diff[3];
+
+  /*! Velocity at the last full step. */
+  float v_full[3];
 
-  float v_full[3]; /*!< Velocity at the last full step. */
+  /*! Additional data used to record cooling information */
+  struct cooling_xpart_data cooling_data;
 
-} __attribute__((aligned(xpart_align)));
+} SWIFT_STRUCT_ALIGN;
 
 /**
  * @brief Particle fields for the SPH particles
@@ -58,27 +63,38 @@ struct xpart {
  */
 struct part {
 
-  double x[3]; /*!< Particle position. */
+  /*! Particle position. */
+  double x[3];
 
-  float v[3]; /*!< Particle predicted velocity. */
+  /*! Particle predicted velocity. */
+  float v[3];
 
-  float a_hydro[3]; /*!< Particle acceleration. */
+  /*! Particle acceleration. */
+  float a_hydro[3];
 
-  float mass; /*!< Particle mass. */
+  /*! Particle mass. */
+  float mass;
 
-  float h; /*!< Particle smoothing length. */
+  /*! Particle smoothing length. */
+  float h;
 
-  int ti_begin; /*!< Time at the beginning of time-step. */
+  /*! Time at the beginning of time-step. */
+  int ti_begin;
 
-  int ti_end; /*!< Time at the end of time-step. */
+  /*! Time at the end of time-step. */
+  int ti_end;
 
-  float u; /*!< Particle internal energy. */
+  /*! Particle internal energy. */
+  float u;
 
-  float u_dt; /*!< Time derivative of the internal energy. */
+  /*! Time derivative of the internal energy. */
+  float u_dt;
 
-  float rho; /*!< Particle density. */
+  /*! Particle density. */
+  float rho;
 
-  float rho_dh; /*!< Derivative of density with respect to h */
+  /*! Derivative of density with respect to h */
+  float rho_dh;
 
   /* Store density/force specific stuff. */
   union {
@@ -92,10 +108,12 @@ struct part {
      */
     struct {
 
-      float wcount; /*!< Neighbour number count. */
+      /*! Neighbour number count. */
+      float wcount;
+
+      /*! Derivative of the neighbour number with respect to h. */
+      float wcount_dh;
 
-      float wcount_dh; /*!< Derivative of the neighbour number with respect to
-                          h. */
     } density;
 
     /**
@@ -107,19 +125,24 @@ struct part {
      */
     struct {
 
-      float pressure; /*!< Particle pressure. */
+      /*! Particle pressure. */
+      float pressure;
 
-      float v_sig; /*!< Particle signal velocity */
+      /*! Particle signal velocity */
+      float v_sig;
 
-      float h_dt; /*!< Time derivative of smoothing length  */
+      /*! Time derivative of smoothing length  */
+      float h_dt;
 
     } force;
   };
 
-  long long id; /*!< Particle unique ID. */
+  /*! Particle unique ID. */
+  long long id;
 
-  struct gpart* gpart; /*!< Pointer to corresponding gravity part. */
+  /*! Pointer to corresponding gravity part. */
+  struct gpart* gpart;
 
-} __attribute__((aligned(part_align)));
+} SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_MINIMAL_HYDRO_PART_H */
diff --git a/src/hydro_io.h b/src/hydro_io.h
index 5ca09635e20cbd20e677ae5d7a390bfb38792af4..05ae94ade7b103ff1b584dc2447cbab40479d1fc 100644
--- a/src/hydro_io.h
+++ b/src/hydro_io.h
@@ -30,6 +30,8 @@
 #include "./hydro/PressureEntropy/hydro_io.h"
 #elif defined(DEFAULT_SPH)
 #include "./hydro/Default/hydro_io.h"
+#elif defined(GIZMO_SPH)
+#include "./hydro/Gizmo/hydro_io.h"
 #else
 #error "Invalid choice of SPH variant"
 #endif
diff --git a/src/hydro_properties.c b/src/hydro_properties.c
index 0e4eaf150c764e27156747bb01db20a71f03c7b5..815969975b4f5e6b39099e71bbbec4e43c875ddc 100644
--- a/src/hydro_properties.c
+++ b/src/hydro_properties.c
@@ -56,6 +56,15 @@ void hydro_props_init(struct hydro_props *p,
 
 void hydro_props_print(const struct hydro_props *p) {
 
+#if defined(EOS_IDEAL_GAS)
+  message("Equation of state: Ideal gas.");
+#elif defined(EOS_ISOTHERMAL_GAS)
+  message(
+      "Equation of state: Isothermal with internal energy "
+      "per unit mass set to %f.",
+      const_isothermal_internal_energy);
+#endif
+
   message("Adiabatic index gamma: %f.", hydro_gamma);
 
   message("Hydrodynamic scheme: %s in %dD.", SPH_IMPLEMENTATION,
diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h
index b38feb5758debf87add2007ee3684d869f393f7e..a1e382a21d04b7354aaf215069e999627e56ee07 100644
--- a/src/kernel_gravity.h
+++ b/src/kernel_gravity.h
@@ -50,7 +50,7 @@ static const float
                                     -10.66666667f,
                                     38.4f,
                                     -48.f,
-                                    21.3333333,
+                                    21.3333333f,
                                     0.f,
                                     0.f,
                                     -0.066666667f, /* 0.5 < u < 1 */
diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h
index a53465571dbea73c1e2460491500bf6561066e85..a2eb065d60beb1ea624f0e2387bc0b0f3f01c3f5 100644
--- a/src/kernel_hydro.h
+++ b/src/kernel_hydro.h
@@ -46,8 +46,8 @@
 
 /* Coefficients for the kernel. */
 #define kernel_name "Cubic spline (M4)"
-#define kernel_degree 3 /* Degree of the polynomial */
-#define kernel_ivals 2  /* Number of branches */
+#define kernel_degree 3 /*!< Degree of the polynomial */
+#define kernel_ivals 2  /*!< Number of branches */
 #if defined(HYDRO_DIMENSION_3D)
 #define kernel_gamma ((float)(1.825742))
 #define kernel_constant ((float)(16. * M_1_PI))
@@ -238,11 +238,14 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
 /**
  * @brief Computes the kernel function and its derivative.
  *
- * Returns garbage if $u > \\gamma = H/h$
+ * The kernel function needs to be mutliplied by \f$h^{-d}\f$ and the gradient
+ * by \f$h^{-(d+1)}\f$, where \f$d\f$ is the dimensionality of the problem.
  *
- * @param u The ratio of the distance to the smoothing length $u = x/h$.
- * @param W (return) The value of the kernel function $W(x,h)$.
- * @param dW_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
+ * Returns 0 if \f$u > \gamma = H/h\f$.
+ *
+ * @param u The ratio of the distance to the smoothing length \f$u = x/h\f$.
+ * @param W (return) The value of the kernel function \f$W(x,h)\f$.
+ * @param dW_dx (return) The norm of the gradient of \f$|\nabla W(x,h)|\f$.
  */
 __attribute__((always_inline)) INLINE static void kernel_deval(
     float u, float *restrict W, float *restrict dW_dx) {
@@ -250,15 +253,10 @@ __attribute__((always_inline)) INLINE static void kernel_deval(
   /* Go to the range [0,1[ from [0,H[ */
   const float x = u * kernel_gamma_inv;
 
-  //#if kernel_ivals == 1
-  ///* Only one branch in this case */
-  // const float *const coeffs = &kernel_coeffs[0];
-  //#else
   /* Pick the correct branch of the kernel */
   const int temp = (int)(x * kernel_ivals_f);
   const int ind = temp > kernel_ivals ? kernel_ivals : temp;
   const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  //#endif
 
   /* First two terms of the polynomial ... */
   float w = coeffs[0] * x + coeffs[1];
@@ -278,25 +276,24 @@ __attribute__((always_inline)) INLINE static void kernel_deval(
 /**
  * @brief Computes the kernel function.
  *
- * Returns garbage if $u > \\gamma = H/h$
+ * The kernel function needs to be mutliplied by \f$h^{-d}\f$,
+ * where \f$d\f$ is the dimensionality of the problem.
  *
- * @param u The ratio of the distance to the smoothing length $u = x/h$.
- * @param W (return) The value of the kernel function $W(x,h)$.
+ * Returns 0 if \f$u > \gamma = H/h\f$
+ *
+ * @param u The ratio of the distance to the smoothing length \f$u = x/h\f$.
+ * @param W (return) The value of the kernel function \f$W(x,h)\f$.
  */
 __attribute__((always_inline)) INLINE static void kernel_eval(
     float u, float *restrict W) {
+
   /* Go to the range [0,1[ from [0,H[ */
   const float x = u * kernel_gamma_inv;
 
-  //#if kernel_ivals == 1
-  ///* Only one branch in this case */
-  // const float *const coeffs = &kernel_coeffs[0];
-  //#else
   /* Pick the correct branch of the kernel */
   const int temp = (int)(x * kernel_ivals_f);
   const int ind = temp > kernel_ivals ? kernel_ivals : temp;
   const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  //#endif
 
   /* First two terms of the polynomial ... */
   float w = coeffs[0] * x + coeffs[1];
diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h
index d247c7a461d4bd116f30ab106143f6c75e1b941e..6952681999f833bce7755a72aaee742a7fa0ed22 100644
--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -41,7 +41,7 @@ __attribute__((always_inline)) INLINE static void kernel_long_grav_eval(
   const float arg2 = u * one_over_sqrt_pi;
   const float arg3 = -arg1 * arg1;
 
-  const float term1 = erfc(arg1);
+  const float term1 = erfcf(arg1);
   const float term2 = arg2 * expf(arg3);
 
   *W = term1 + term2;
diff --git a/src/kick.h b/src/kick.h
index b57e13d4ebf27d3a366d571e7fd4cd819653f726..e3fa3bf78c7da514abacf697a9d94212020e5a7b 100644
--- a/src/kick.h
+++ b/src/kick.h
@@ -39,8 +39,8 @@ __attribute__((always_inline)) INLINE static void kick_gpart(
   /* Compute the time step for this kick */
   const int ti_start = (gp->ti_begin + gp->ti_end) / 2;
   const int ti_end = gp->ti_end + new_dti / 2;
-  const double dt = (ti_end - ti_start) * timeBase;
-  const double half_dt = (ti_end - gp->ti_end) * timeBase;
+  const float dt = (ti_end - ti_start) * timeBase;
+  const float half_dt = (ti_end - gp->ti_end) * timeBase;
 
   /* Move particle forward in time */
   gp->ti_begin = gp->ti_end;
@@ -70,8 +70,8 @@ __attribute__((always_inline)) INLINE static void kick_part(
   /* Compute the time step for this kick */
   const int ti_start = (p->ti_begin + p->ti_end) / 2;
   const int ti_end = p->ti_end + new_dti / 2;
-  const double dt = (ti_end - ti_start) * timeBase;
-  const double half_dt = (ti_end - p->ti_end) * timeBase;
+  const float dt = (ti_end - ti_start) * timeBase;
+  const float half_dt = (ti_end - p->ti_end) * timeBase;
 
   /* Move particle forward in time */
   p->ti_begin = p->ti_end;
diff --git a/src/lock.h b/src/lock.h
index ca7f01ee029cd1c57ed8fd0f3237ea54cb43e9a7..b2dd2eac9d0ca5d7807907e31cf3fa31894f9aed 100644
--- a/src/lock.h
+++ b/src/lock.h
@@ -34,6 +34,7 @@
 #define lock_trylock(l) (pthread_spin_lock(l) != 0)
 #define lock_unlock(l) (pthread_spin_unlock(l) != 0)
 #define lock_unlock_blind(l) pthread_spin_unlock(l)
+
 #elif defined(PTHREAD_LOCK)
 #include <pthread.h>
 #define swift_lock_type pthread_mutex_t
@@ -43,6 +44,7 @@
 #define lock_trylock(l) (pthread_mutex_trylock(l) != 0)
 #define lock_unlock(l) (pthread_mutex_unlock(l) != 0)
 #define lock_unlock_blind(l) pthread_mutex_unlock(l)
+
 #else
 #define swift_lock_type volatile int
 #define lock_init(l) (*(l) = 0)
diff --git a/src/minmax.h b/src/minmax.h
index 8000df6edf93a30b964c578c6f25d324fed3f4cf..9d92cd71d849dba615fdb05bc342014e0593d989 100644
--- a/src/minmax.h
+++ b/src/minmax.h
@@ -24,11 +24,11 @@
  *
  * This macro evaluates its arguments exactly once.
  */
-#define min(a, b)           \
-  ({                        \
-    __typeof__(a) _a = (a); \
-    __typeof__(b) _b = (b); \
-    _a < _b ? _a : _b;      \
+#define min(a, b)                 \
+  ({                              \
+    const __typeof__(a) _a = (a); \
+    const __typeof__(b) _b = (b); \
+    _a < _b ? _a : _b;            \
   })
 
 /**
@@ -36,11 +36,11 @@
  *
  * This macro evaluates its arguments exactly once.
  */
-#define max(a, b)           \
-  ({                        \
-    __typeof__(a) _a = (a); \
-    __typeof__(b) _b = (b); \
-    _a > _b ? _a : _b;      \
+#define max(a, b)                 \
+  ({                              \
+    const __typeof__(a) _a = (a); \
+    const __typeof__(b) _b = (b); \
+    _a > _b ? _a : _b;            \
   })
 
 #endif /* SWIFT_MINMAX_H */
diff --git a/src/parallel_io.c b/src/parallel_io.c
index 262ab7d9e4405b54538e5b687c0aadfccf1da2f0..0da34d4dad114db0920c8e8f3bb617295ff3da96 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -37,6 +37,7 @@
 
 /* Local includes. */
 #include "common_io.h"
+#include "dimension.h"
 #include "engine.h"
 #include "error.h"
 #include "gravity_io.h"
@@ -383,6 +384,7 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
   size_t N[NUM_PARTICLE_TYPES] = {0};
   long long N_total[NUM_PARTICLE_TYPES] = {0};
   long long offset[NUM_PARTICLE_TYPES] = {0};
+  int dimension = 3; /* Assume 3D if nothing is specified */
 
   /* Open file */
   /* message("Opening file '%s' as IC.", fileName); */
@@ -409,6 +411,15 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
   h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT);
   if (h_grp < 0) error("Error while opening file header\n");
 
+  /* Check the dimensionality of the ICs (if the info exists) */
+  const hid_t hid_dim = H5Aexists(h_grp, "Dimension");
+  if (hid_dim < 0)
+    error("Error while testing existance of 'Dimension' attribute");
+  if (hid_dim > 0) readAttribute(h_grp, "Dimension", INT, &dimension);
+  if (dimension != hydro_dimension)
+    error("ICs dimensionality (%dD) does not match code dimensionality (%dD)",
+          dimension, (int)hydro_dimension);
+
   /* Read the relevant information and print status */
   int flag_entropy_temp[6];
   readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
@@ -517,27 +528,18 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
 
     int num_fields = 0;
     struct io_props list[100];
-    size_t N = 0;
+    size_t Nparticles = 0;
 
     /* Read particle fields into the particle structure */
     switch (ptype) {
 
       case GAS:
-        /* if (!dry_run) */
-        /*   hydro_read_particles(h_grp, N[ptype], N_total[ptype],
-         * offset[ptype], */
-        /*                        *parts); */
-        /* break; */
-        N = *Ngas;
+        Nparticles = *Ngas;
         hydro_read_particles(*parts, list, &num_fields);
         break;
 
       case DM:
-        /* if (!dry_run) */
-        /*   darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], */
-        /*                             offset[ptype], *gparts); */
-        /* break; */
-        N = Ndm;
+        Nparticles = Ndm;
         darkmatter_read_particles(*gparts, list, &num_fields);
         break;
 
@@ -548,7 +550,7 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
     /* Read everything */
     if (!dry_run)
       for (int i = 0; i < num_fields; ++i)
-        readArray(h_grp, list[i], N, N_total[ptype], offset[ptype],
+        readArray(h_grp, list[i], Nparticles, N_total[ptype], offset[ptype],
                   internal_units, ic_units);
 
     /* Close particle group */
@@ -675,6 +677,8 @@ void write_output_parallel(struct engine* e, const char* baseName,
   writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
   double dblTime = e->time;
   writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1);
+  int dimension = (int)hydro_dimension;
+  writeAttribute(h_grp, "Dimension", INT, &dimension, 1);
 
   /* GADGET-2 legacy values */
   /* Number of particles of each type */
@@ -782,13 +786,13 @@ void write_output_parallel(struct engine* e, const char* baseName,
 
     int num_fields = 0;
     struct io_props list[100];
-    size_t N = 0;
+    size_t Nparticles = 0;
 
     /* Write particle fields from the particle structure */
     switch (ptype) {
 
       case GAS:
-        N = Ngas;
+        Nparticles = Ngas;
         hydro_write_particles(parts, list, &num_fields);
         break;
 
@@ -805,7 +809,7 @@ void write_output_parallel(struct engine* e, const char* baseName,
         collect_dm_gparts(gparts, Ntot, dmparts, Ndm);
 
         /* Write DM particles */
-        N = Ndm;
+        Nparticles = Ndm;
         darkmatter_write_particles(dmparts, list, &num_fields);
 
         /* Free temporary array */
@@ -818,9 +822,9 @@ void write_output_parallel(struct engine* e, const char* baseName,
 
     /* Write everything */
     for (int i = 0; i < num_fields; ++i)
-      writeArray(e, h_grp, fileName, xmfFile, partTypeGroupName, list[i], N,
-                 N_total[ptype], mpi_rank, offset[ptype], internal_units,
-                 snapshot_units);
+      writeArray(e, h_grp, fileName, xmfFile, partTypeGroupName, list[i],
+                 Nparticles, N_total[ptype], mpi_rank, offset[ptype],
+                 internal_units, snapshot_units);
 
     /* Free temporary array */
     free(dmparts);
diff --git a/src/part.h b/src/part.h
index 188330ebae83299ceb69f2ea755b0289304a248f..ca5eacc7e2c8c0df08a91047f617b5c61a2f0a1e 100644
--- a/src/part.h
+++ b/src/part.h
@@ -31,12 +31,13 @@
 #endif
 
 /* Local headers. */
+#include "align.h"
 #include "const.h"
 
 /* Some constants. */
-#define part_align 64
-#define xpart_align 32
-#define gpart_align 32
+#define part_align 128
+#define xpart_align 128
+#define gpart_align 128
 
 /* Import the right hydro particle definition */
 #if defined(MINIMAL_SPH)
@@ -47,6 +48,9 @@
 #include "./hydro/PressureEntropy/hydro_part.h"
 #elif defined(DEFAULT_SPH)
 #include "./hydro/Default/hydro_part.h"
+#elif defined(GIZMO_SPH)
+#include "./hydro/Gizmo/hydro_part.h"
+#define EXTRA_HYDRO_LOOP
 #else
 #error "Invalid choice of SPH variant"
 #endif
diff --git a/src/partition.c b/src/partition.c
index 11de3a62be65179bb128ff89d9fbf48861a2175b..8d17bedf0aaeadc64044b12ffe1bb8887b02d83e 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -143,7 +143,7 @@ static void split_vector(struct space *s, int nregions, int *samplecells) {
             select = l;
           }
         }
-        s->cells[n++].nodeID = select;
+        s->cells_top[n++].nodeID = select;
       }
     }
   }
@@ -274,7 +274,7 @@ static void accumulate_counts(struct space *s, int *counts) {
  */
 static void split_metis(struct space *s, int nregions, int *celllist) {
 
-  for (int i = 0; i < s->nr_cells; i++) s->cells[i].nodeID = celllist[i];
+  for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i];
 }
 #endif
 
@@ -419,7 +419,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
   /* Create weight arrays using task ticks for vertices and edges (edges
    * assume the same graph structure as used in the part_ calls). */
   int nr_cells = s->nr_cells;
-  struct cell *cells = s->cells;
+  struct cell *cells = s->cells_top;
   float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0;
   int wtot = 0;
   int wmax = 1e9 / nr_nodes;
@@ -455,8 +455,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
     /* Skip un-interesting tasks. */
     if (t->type != task_type_self && t->type != task_type_pair &&
         t->type != task_type_sub_self && t->type != task_type_sub_self &&
-        t->type != task_type_ghost && t->type != task_type_drift &&
-        t->type != task_type_kick && t->type != task_type_init)
+        t->type != task_type_ghost && t->type != task_type_kick &&
+        t->type != task_type_init)
       continue;
 
     /* Get the task weight. This can be slightly negative on multiple board
@@ -494,8 +494,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
     int cid = ci - cells;
 
     /* Different weights for different tasks. */
-    if (t->type == task_type_ghost || t->type == task_type_drift ||
-        t->type == task_type_kick) {
+    if (t->type == task_type_ghost || t->type == task_type_kick) {
       /* Particle updates add only to vertex weight. */
       if (taskvweights) weights_v[cid] += w;
 
@@ -796,7 +795,7 @@ void partition_initial_partition(struct partition *initial_partition,
     /* Run through the cells and set their nodeID. */
     // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]);
     for (k = 0; k < s->nr_cells; k++) {
-      c = &s->cells[k];
+      c = &s->cells_top[k];
       for (j = 0; j < 3; j++)
         ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j];
       c->nodeID = ind[0] +
@@ -1038,10 +1037,10 @@ static int check_complete(struct space *s, int verbose, int nregions) {
   int failed = 0;
   for (int i = 0; i < nregions; i++) present[i] = 0;
   for (int i = 0; i < s->nr_cells; i++) {
-    if (s->cells[i].nodeID <= nregions)
-      present[s->cells[i].nodeID]++;
+    if (s->cells_top[i].nodeID <= nregions)
+      present[s->cells_top[i].nodeID]++;
     else
-      message("Bad nodeID: %d", s->cells[i].nodeID);
+      message("Bad nodeID: %d", s->cells_top[i].nodeID);
   }
   for (int i = 0; i < nregions; i++) {
     if (!present[i]) {
@@ -1086,13 +1085,13 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs,
       for (int k = 0; k < s->cdim[2]; k++) {
 
         /* Scale indices to old cell space. */
-        int ii = rint(i * s->iwidth[0] * oldh[0]);
-        int jj = rint(j * s->iwidth[1] * oldh[1]);
-        int kk = rint(k * s->iwidth[2] * oldh[2]);
+        const int ii = rint(i * s->iwidth[0] * oldh[0]);
+        const int jj = rint(j * s->iwidth[1] * oldh[1]);
+        const int kk = rint(k * s->iwidth[2] * oldh[2]);
 
-        int cid = cell_getid(s->cdim, i, j, k);
-        int oldcid = cell_getid(oldcdim, ii, jj, kk);
-        s->cells[cid].nodeID = oldnodeIDs[oldcid];
+        const int cid = cell_getid(s->cdim, i, j, k);
+        const int oldcid = cell_getid(oldcdim, ii, jj, kk);
+        s->cells_top[cid].nodeID = oldnodeIDs[oldcid];
 
         if (oldnodeIDs[oldcid] > nr_nodes) nr_nodes = oldnodeIDs[oldcid];
       }
diff --git a/src/potential.c b/src/potential.c
new file mode 100644
index 0000000000000000000000000000000000000000..5433a05e3e7886ad88021d3916cae26adfe8b954
--- /dev/null
+++ b/src/potential.c
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* This object's header. */
+#include "potential.h"
+
+/**
+ * @brief Initialises the external potential properties in the internal system
+ * of units.
+ *
+ * @param parameter_file The parsed parameter file
+ * @param phys_const Physical constants in internal units
+ * @param us The current internal system of units
+ * @param potential The external potential properties to initialize
+ */
+void potential_init(const struct swift_params* parameter_file,
+                    const struct phys_const* phys_const,
+                    const struct UnitSystem* us,
+                    struct external_potential* potential) {
+
+  potential_init_backend(parameter_file, phys_const, us, potential);
+}
+
+/**
+ * @brief Prints the properties of the external potential to stdout.
+ *
+ * @param  potential The external potential properties.
+ */
+void potential_print(const struct external_potential* potential) {
+
+  potential_print_backend(potential);
+}
diff --git a/src/potential.h b/src/potential.h
new file mode 100644
index 0000000000000000000000000000000000000000..77bd41794a3a8cd244405493898d63b3f80ff3a6
--- /dev/null
+++ b/src/potential.h
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016  Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_POTENTIAL_H
+#define SWIFT_POTENTIAL_H
+
+/**
+ * @file src/potential.h
+ * @brief Branches between the different external gravitational potentials.
+ */
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local includes. */
+#include "const.h"
+
+/* Import the right external potential definition */
+#if defined(EXTERNAL_POTENTIAL_NONE)
+#include "./potential/none/potential.h"
+#elif defined(EXTERNAL_POTENTIAL_POINTMASS)
+#include "./potential/point_mass/potential.h"
+#elif defined(EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL)
+#include "./potential/isothermal/potential.h"
+#elif defined(EXTERNAL_POTENTIAL_DISC_PATCH)
+#include "./potential/disc_patch/potential.h"
+#else
+#error "Invalid choice of external potential"
+#endif
+
+/* Now, some generic functions, defined in the source file */
+void potential_init(const struct swift_params* parameter_file,
+                    const struct phys_const* phys_const,
+                    const struct UnitSystem* us,
+                    struct external_potential* potential);
+
+void potential_print(const struct external_potential* potential);
+
+#endif /* SWIFT_POTENTIAL_H */
diff --git a/src/potential/disc_patch/potential.h b/src/potential/disc_patch/potential.h
new file mode 100644
index 0000000000000000000000000000000000000000..21d168818e164ad3b3e18076ba824285e40956aa
--- /dev/null
+++ b/src/potential/disc_patch/potential.h
@@ -0,0 +1,199 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DISC_PATCH_H
+#define SWIFT_DISC_PATCH_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <float.h>
+#include <math.h>
+
+/* Local includes. */
+#include "const.h"
+#include "error.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief External Potential Properties - Disc patch case
+ *
+ * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948
+ */
+struct external_potential {
+
+  /*! Surface density of the disc */
+  double surface_density;
+
+  /*! Disc scale-height */
+  double scale_height;
+
+  /*! Position of the disc along the z-axis */
+  double z_disc;
+
+  /*! Dynamical time of the system */
+  double dynamical_time;
+
+  /*! Time over which to grow the disk in units of the dynamical time */
+  double growth_time;
+
+  /*! Time-step condition pre-factor */
+  double timestep_mult;
+};
+
+/**
+ * @brief Computes the time-step from the acceleration due to a hydrostatic
+ * disc.
+ *
+ * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948
+ *
+ * @param time The current time.
+ * @param potential The properties of the potential.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static float external_gravity_timestep(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const,
+    const struct gpart* restrict g) {
+
+  /* initilize time step to disc dynamical time */
+  const float dt_dyn = potential->dynamical_time;
+  float dt = dt_dyn;
+
+  /* absolute value of height above disc */
+  const float dz = fabs(g->x[2] - potential->z_disc);
+
+  /* vertical cceleration */
+  const float z_accel = 2.f * M_PI * phys_const->const_newton_G *
+                        potential->surface_density *
+                        tanh(dz / potential->scale_height);
+
+  /* demand that dt * velocity <  fraction of scale height of disc */
+  float dt1 = FLT_MAX;
+  if (fabs(g->v_full[2]) > 0) {
+    dt1 = potential->scale_height / fabs(g->v_full[2]);
+    if (dt1 < dt) dt = dt1;
+  }
+
+  /* demand that dt^2 * acceleration < fraction of scale height of disc */
+  float dt2 = FLT_MAX;
+  if (fabs(z_accel) > 0) {
+    dt2 = potential->scale_height / fabs(z_accel);
+    if (dt2 < dt * dt) dt = sqrt(dt2);
+  }
+
+  /* demand that dt^3 jerk < fraction of scale height of disc */
+  float dt3 = FLT_MAX;
+  if (abs(g->v_full[2]) > 0) {
+    const float dz_accel_over_dt =
+        2.f * M_PI * phys_const->const_newton_G * potential->surface_density /
+        potential->scale_height / cosh(dz / potential->scale_height) /
+        cosh(dz / potential->scale_height) * fabs(g->v_full[2]);
+
+    dt3 = potential->scale_height / fabs(dz_accel_over_dt);
+    if (dt3 < dt * dt * dt) dt = pow(dt3, 1. / 3.);
+  }
+
+  return potential->timestep_mult * dt;
+}
+
+/**
+ * @brief Computes the gravitational acceleration along z due to a hydrostatic
+ * disc
+ *
+ * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948
+ *
+ * @param time The current time in internal units.
+ * @param potential The properties of the potential.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static void external_gravity_acceleration(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const, struct gpart* restrict g) {
+
+  const float dz = g->x[2] - potential->z_disc;
+  const float t_dyn = potential->dynamical_time;
+
+  float reduction_factor = 1.f;
+  if (time < potential->growth_time * t_dyn)
+    reduction_factor = time / (potential->growth_time * t_dyn);
+
+  /* Accelerations. Note that they are multiplied by G later on */
+  const float z_accel = reduction_factor * 2.f * M_PI *
+                        potential->surface_density *
+                        tanh(fabs(dz) / potential->scale_height);
+
+  if (dz > 0) g->a_grav[2] -= z_accel;
+  if (dz < 0) g->a_grav[2] += z_accel;
+}
+
+/**
+ * @brief Initialises the external potential properties in the internal system
+ * of units.
+ *
+ * @param parameter_file The parsed parameter file
+ * @param phys_const Physical constants in internal units
+ * @param us The current internal system of units
+ * @param potential The external potential properties to initialize
+ */
+static INLINE void potential_init_backend(
+    const struct swift_params* parameter_file,
+    const struct phys_const* phys_const, const struct UnitSystem* us,
+    struct external_potential* potential) {
+
+  potential->surface_density = parser_get_param_double(
+      parameter_file, "DiscPatchPotential:surface_density");
+  potential->scale_height = parser_get_param_double(
+      parameter_file, "DiscPatchPotential:scale_height");
+  potential->z_disc =
+      parser_get_param_double(parameter_file, "DiscPatchPotential:z_disc");
+  potential->timestep_mult = parser_get_param_double(
+      parameter_file, "DiscPatchPotential:timestep_mult");
+  potential->growth_time = parser_get_opt_param_double(
+      parameter_file, "DiscPatchPotential:growth_time", 0.);
+  potential->dynamical_time =
+      sqrt(potential->scale_height /
+           (phys_const->const_newton_G * potential->surface_density));
+}
+
+/**
+ * @brief Prints the properties of the external potential to stdout.
+ *
+ * @param  potential The external potential properties.
+ */
+static INLINE void potential_print_backend(
+    const struct external_potential* potential) {
+
+  message(
+      "External potential is 'Disk-patch' with properties surface_density = %e "
+      "disc height= %e scale height = %e timestep multiplier = %e.",
+      potential->surface_density, potential->z_disc, potential->scale_height,
+      potential->timestep_mult);
+
+  if (potential->growth_time > 0.)
+    message("Disc will grow for %f dynamical times.", potential->growth_time);
+}
+
+#endif /* SWIFT_DISC_PATCH_H */
diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h
new file mode 100644
index 0000000000000000000000000000000000000000..a993c09a978ca3692ec3359f7633df14760f263d
--- /dev/null
+++ b/src/potential/isothermal/potential.h
@@ -0,0 +1,162 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_POTENTIAL_ISOTHERMAL_H
+#define SWIFT_POTENTIAL_ISOTHERMAL_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <math.h>
+
+/* Local includes. */
+#include "error.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief External Potential Properties - Isothermal sphere case
+ */
+struct external_potential {
+
+  /*! Position of the centre of potential */
+  double x, y, z;
+
+  /*! Rotation velocity */
+  double vrot;
+
+  /*! Square of vrot divided by G \f$ \frac{v_{rot}^2}{G} \f$ */
+  double vrot2_over_G;
+
+  /*! Time-step condition pre-factor */
+  double timestep_mult;
+};
+
+/**
+ * @brief Computes the time-step due to the acceleration from an isothermal
+ * potential.
+ *
+ * @param time The current time.
+ * @param potential The #external_potential used in the run.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static float external_gravity_timestep(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const,
+    const struct gpart* restrict g) {
+
+  const float dx = g->x[0] - potential->x;
+  const float dy = g->x[1] - potential->y;
+  const float dz = g->x[2] - potential->z;
+
+  const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
+  const float drdv =
+      dx * (g->v_full[0]) + dy * (g->v_full[1]) + dz * (g->v_full[2]);
+  const double vrot = potential->vrot;
+
+  const float dota_x =
+      vrot * vrot * rinv2 * (g->v_full[0] - 2.f * drdv * dx * rinv2);
+  const float dota_y =
+      vrot * vrot * rinv2 * (g->v_full[1] - 2.f * drdv * dy * rinv2);
+  const float dota_z =
+      vrot * vrot * rinv2 * (g->v_full[2] - 2.f * drdv * dz * rinv2);
+  const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z;
+  const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] +
+                    g->a_grav[2] * g->a_grav[2];
+
+  return potential->timestep_mult * sqrtf(a_2 / dota_2);
+}
+
+/**
+ * @brief Computes the gravitational acceleration from an isothermal potential.
+ *
+ * Note that the accelerations are multiplied by Newton's G constant
+ * later on.
+ *
+ * @param time The current time.
+ * @param potential The #external_potential used in the run.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static void external_gravity_acceleration(
+    double time, const struct external_potential* potential,
+    const struct phys_const* const phys_const, struct gpart* g) {
+
+  const float dx = g->x[0] - potential->x;
+  const float dy = g->x[1] - potential->y;
+  const float dz = g->x[2] - potential->z;
+  const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
+
+  const double term = -potential->vrot2_over_G * rinv2;
+
+  g->a_grav[0] += term * dx;
+  g->a_grav[1] += term * dy;
+  g->a_grav[2] += term * dz;
+}
+
+/**
+ * @brief Initialises the external potential properties in the internal system
+ * of units.
+ *
+ * @param parameter_file The parsed parameter file
+ * @param phys_const Physical constants in internal units
+ * @param us The current internal system of units
+ * @param potential The external potential properties to initialize
+ */
+static INLINE void potential_init_backend(
+    const struct swift_params* parameter_file,
+    const struct phys_const* phys_const, const struct UnitSystem* us,
+    struct external_potential* potential) {
+
+  potential->x =
+      parser_get_param_double(parameter_file, "IsothermalPotential:position_x");
+  potential->y =
+      parser_get_param_double(parameter_file, "IsothermalPotential:position_y");
+  potential->z =
+      parser_get_param_double(parameter_file, "IsothermalPotential:position_z");
+  potential->vrot =
+      parser_get_param_double(parameter_file, "IsothermalPotential:vrot");
+  potential->timestep_mult = parser_get_param_float(
+      parameter_file, "IsothermalPotential:timestep_mult");
+
+  potential->vrot2_over_G =
+      potential->vrot * potential->vrot / phys_const->const_newton_G;
+}
+
+/**
+ * @brief Prints the properties of the external potential to stdout.
+ *
+ * @param  potential The external potential properties.
+ */
+static INLINE void potential_print_backend(
+    const struct external_potential* potential) {
+
+  message(
+      "External potential is 'Isothermal' with properties are (x,y,z) = (%e, "
+      "%e, %e), vrot = %e "
+      "timestep multiplier = %e.",
+      potential->x, potential->y, potential->z, potential->vrot,
+      potential->timestep_mult);
+}
+
+#endif /* SWIFT_POTENTIAL_ISOTHERMAL_H */
diff --git a/src/potential/none/potential.h b/src/potential/none/potential.h
new file mode 100644
index 0000000000000000000000000000000000000000..8b1c3e841521f3fb42fbdf5c8922cead2ea7cbcb
--- /dev/null
+++ b/src/potential/none/potential.h
@@ -0,0 +1,97 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_POTENTIAL_NONE_H
+#define SWIFT_POTENTIAL_NONE_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <float.h>
+
+/* Local includes. */
+#include "error.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief External Potential Properties
+ */
+struct external_potential {};
+
+/**
+ * @brief Computes the time-step due to the acceleration from nothing
+ *
+ * @param time The current time.
+ * @param potential The properties of the externa potential.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static float external_gravity_timestep(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const,
+    const struct gpart* restrict g) {
+
+  return FLT_MAX;
+}
+
+/**
+ * @brief Computes the gravitational acceleration due to nothing
+ *
+ * We do nothing.
+ *
+ * @param time The current time.
+ * @param potential The proerties of the external potential.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static void external_gravity_acceleration(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const, struct gpart* restrict g) {}
+
+/**
+ * @brief Initialises the external potential properties in the internal system
+ * of units.
+ *
+ * Nothing to do here.
+ *
+ * @param parameter_file The parsed parameter file
+ * @param phys_const Physical constants in internal units
+ * @param us The current internal system of units
+ * @param potential The external potential properties to initialize
+ */
+static INLINE void potential_init_backend(
+    const struct swift_params* parameter_file,
+    const struct phys_const* phys_const, const struct UnitSystem* us,
+    struct external_potential* potential) {}
+
+/**
+ * @brief Prints the properties of the external potential to stdout.
+ *
+ * @param  potential The external potential properties.
+ */
+static INLINE void potential_print_backend(
+    const struct external_potential* potential) {
+
+  message("External potential is 'No external potential'.");
+}
+
+#endif /* SWIFT_POTENTIAL_NONE_H */
diff --git a/src/potential/point_mass/potential.h b/src/potential/point_mass/potential.h
new file mode 100644
index 0000000000000000000000000000000000000000..f718af2e2c4ff91540e1834cb2072d321ce38705
--- /dev/null
+++ b/src/potential/point_mass/potential.h
@@ -0,0 +1,159 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_POTENTIAL_POINT_MASS_H
+#define SWIFT_POTENTIAL_POINT_MASS_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <math.h>
+
+/* Local includes. */
+#include "error.h"
+#include "parser.h"
+#include "part.h"
+#include "physical_constants.h"
+#include "units.h"
+
+/**
+ * @brief External Potential Properties - Point mass case
+ */
+struct external_potential {
+
+  /*! Position of the point mass */
+  double x, y, z;
+
+  /*! Mass */
+  double mass;
+
+  /*! Time-step condition pre-factor */
+  float timestep_mult;
+};
+
+/**
+ * @brief Computes the time-step due to the acceleration from a point mass
+ *
+ * We pass in the time for simulations where the potential evolves with time.
+ *
+ * @param time The current time.
+ * @param potential The properties of the externa potential.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static float external_gravity_timestep(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const,
+    const struct gpart* restrict g) {
+
+  const float G_newton = phys_const->const_newton_G;
+  const float dx = g->x[0] - potential->x;
+  const float dy = g->x[1] - potential->y;
+  const float dz = g->x[2] - potential->z;
+  const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz);
+  const float rinv2 = rinv * rinv;
+  const float rinv3 = rinv2 * rinv;
+  const float drdv = (g->x[0] - potential->x) * (g->v_full[0]) +
+                     (g->x[1] - potential->y) * (g->v_full[1]) +
+                     (g->x[2] - potential->z) * (g->v_full[2]);
+  const float dota_x = G_newton * potential->mass * rinv3 *
+                       (-g->v_full[0] + 3.f * rinv2 * drdv * dx);
+  const float dota_y = G_newton * potential->mass * rinv3 *
+                       (-g->v_full[1] + 3.f * rinv2 * drdv * dy);
+  const float dota_z = G_newton * potential->mass * rinv3 *
+                       (-g->v_full[2] + 3.f * rinv2 * drdv * dz);
+  const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z;
+  const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] +
+                    g->a_grav[2] * g->a_grav[2];
+
+  return potential->timestep_mult * sqrtf(a_2 / dota_2);
+}
+
+/**
+ * @brief Computes the gravitational acceleration of a particle due to a
+ * point mass
+ *
+ * Note that the accelerations are multiplied by Newton's G constant later
+ * on.
+ *
+ * We pass in the time for simulations where the potential evolves with time.
+ *
+ * @param time The current time.
+ * @param potential The proerties of the external potential.
+ * @param phys_const The physical constants in internal units.
+ * @param g Pointer to the g-particle data.
+ */
+__attribute__((always_inline)) INLINE static void external_gravity_acceleration(
+    double time, const struct external_potential* restrict potential,
+    const struct phys_const* restrict phys_const, struct gpart* restrict g) {
+
+  const float dx = g->x[0] - potential->x;
+  const float dy = g->x[1] - potential->y;
+  const float dz = g->x[2] - potential->z;
+  const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz);
+  const float rinv3 = rinv * rinv * rinv;
+
+  g->a_grav[0] += -potential->mass * dx * rinv3;
+  g->a_grav[1] += -potential->mass * dy * rinv3;
+  g->a_grav[2] += -potential->mass * dz * rinv3;
+}
+
+/**
+ * @brief Initialises the external potential properties in the internal system
+ * of units.
+ *
+ * @param parameter_file The parsed parameter file
+ * @param phys_const Physical constants in internal units
+ * @param us The current internal system of units
+ * @param potential The external potential properties to initialize
+ */
+static INLINE void potential_init_backend(
+    const struct swift_params* parameter_file,
+    const struct phys_const* phys_const, const struct UnitSystem* us,
+    struct external_potential* potential) {
+
+  potential->x =
+      parser_get_param_double(parameter_file, "PointMassPotential:position_x");
+  potential->y =
+      parser_get_param_double(parameter_file, "PointMassPotential:position_y");
+  potential->z =
+      parser_get_param_double(parameter_file, "PointMassPotential:position_z");
+  potential->mass =
+      parser_get_param_double(parameter_file, "PointMassPotential:mass");
+  potential->timestep_mult = parser_get_param_float(
+      parameter_file, "PointMassPotential:timestep_mult");
+}
+
+/**
+ * @brief Prints the properties of the external potential to stdout.
+ *
+ * @param  potential The external potential properties.
+ */
+static INLINE void potential_print_backend(
+    const struct external_potential* potential) {
+
+  message(
+      "External potential is 'Point mass' with properties (x,y,z) = (%e, %e, "
+      "%e), M = %e timestep multiplier = %e.",
+      potential->x, potential->y, potential->z, potential->mass,
+      potential->timestep_mult);
+}
+
+#endif /* SWIFT_POTENTIAL_POINT_MASS_H */
diff --git a/src/potentials.c b/src/potentials.c
deleted file mode 100644
index 5cbe05e008b7de17310b1e738e032af90684c25e..0000000000000000000000000000000000000000
--- a/src/potentials.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- ******************************************************************************/
-
-/* Config parameters. */
-#include "../config.h"
-
-/* This object's header. */
-#include "potentials.h"
-
-/**
- * @brief Initialises the external potential properties in the internal system
- * of units.
- *
- * @param parameter_file The parsed parameter file
- * @param us The current internal system of units
- * @param potential The external potential properties to initialize
- */
-void potential_init(const struct swift_params* parameter_file,
-                    struct UnitSystem* us,
-                    struct external_potential* potential) {
-
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-
-  potential->point_mass.x =
-      parser_get_param_double(parameter_file, "PointMass:position_x");
-  potential->point_mass.y =
-      parser_get_param_double(parameter_file, "PointMass:position_y");
-  potential->point_mass.z =
-      parser_get_param_double(parameter_file, "PointMass:position_z");
-  potential->point_mass.mass =
-      parser_get_param_double(parameter_file, "PointMass:mass");
-  potential->point_mass.timestep_mult =
-      parser_get_param_float(parameter_file, "PointMass:timestep_mult");
-
-#endif /* EXTERNAL_POTENTIAL_POINTMASS */
-
-#ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-
-  potential->isothermal_potential.x =
-      parser_get_param_double(parameter_file, "IsothermalPotential:position_x");
-  potential->isothermal_potential.y =
-      parser_get_param_double(parameter_file, "IsothermalPotential:position_y");
-  potential->isothermal_potential.z =
-      parser_get_param_double(parameter_file, "IsothermalPotential:position_z");
-  potential->isothermal_potential.vrot =
-      parser_get_param_double(parameter_file, "IsothermalPotential:vrot");
-  potential->isothermal_potential.timestep_mult = parser_get_param_float(
-      parameter_file, "IsothermalPotential:timestep_mult");
-
-#endif /* EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL */
-}
-
-/**
- * @brief Prints the properties of the external potential to stdout.
- *
- * @param  potential The external potential properties.
- */
-void potential_print(const struct external_potential* potential) {
-
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-
-  message(
-      "Point mass properties are (x,y,z) = (%e, %e, %e), M = %e timestep "
-      "multiplier = %e",
-      potential->point_mass.x, potential->point_mass.y, potential->point_mass.z,
-      potential->point_mass.mass, potential->point_mass.timestep_mult);
-
-#endif /* EXTERNAL_POTENTIAL_POINTMASS */
-
-#ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-
-  message(
-      "Isothermal potential properties are (x,y,z) = (%e, %e, %e), vrot = %e "
-      "timestep multiplier= %e",
-      potential->isothermal_potential.x, potential->isothermal_potential.y,
-      potential->isothermal_potential.z, potential->isothermal_potential.vrot,
-      potential->isothermal_potential.timestep_mult);
-
-#endif /* EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL */
-}
diff --git a/src/potentials.h b/src/potentials.h
deleted file mode 100644
index 5373cc1f3f55b0c6d9876cbe6348fdcefbe242aa..0000000000000000000000000000000000000000
--- a/src/potentials.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- ******************************************************************************/
-
-#ifndef SWIFT_POTENTIALS_H
-#define SWIFT_POTENTIALS_H
-
-/* Config parameters. */
-#include "../config.h"
-
-/* Some standard headers. */
-#include <math.h>
-
-/* Local includes. */
-#include "const.h"
-#include "error.h"
-#include "parser.h"
-#include "part.h"
-#include "physical_constants.h"
-#include "units.h"
-
-/* External Potential Properties */
-struct external_potential {
-
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-  struct {
-    double x, y, z;
-    double mass;
-    float timestep_mult;
-  } point_mass;
-#endif
-
-#ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-  struct {
-    double x, y, z;
-    double vrot;
-    float timestep_mult;
-  } isothermal_potential;
-#endif
-};
-
-/* Include external isothermal potential */
-#ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
-
-/**
- * @brief Computes the time-step due to the acceleration from a point mass
- *
- * @param potential The #external_potential used in the run.
- * @param phys_const The physical constants in internal units.
- * @param g Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static float
-external_gravity_isothermalpotential_timestep(
-    const struct external_potential* potential,
-    const struct phys_const* const phys_const, const struct gpart* const g) {
-
-  const float dx = g->x[0] - potential->isothermal_potential.x;
-  const float dy = g->x[1] - potential->isothermal_potential.y;
-  const float dz = g->x[2] - potential->isothermal_potential.z;
-  const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
-  const float drdv =
-      dx * (g->v_full[0]) + dy * (g->v_full[1]) + dz * (g->v_full[2]);
-  const double vrot = potential->isothermal_potential.vrot;
-
-  const float dota_x =
-      vrot * vrot * rinv2 * (g->v_full[0] - 2 * drdv * dx * rinv2);
-  const float dota_y =
-      vrot * vrot * rinv2 * (g->v_full[1] - 2 * drdv * dy * rinv2);
-  const float dota_z =
-      vrot * vrot * rinv2 * (g->v_full[2] - 2 * drdv * dz * rinv2);
-  const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z;
-  const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] +
-                    g->a_grav[2] * g->a_grav[2];
-
-  return potential->isothermal_potential.timestep_mult * sqrtf(a_2 / dota_2);
-}
-
-/**
- * @brief Computes the gravitational acceleration of a particle due to a point
- * mass
- *
- * Note that the accelerations are multiplied by Newton's G constant later on.
- *
- * @param potential The #external_potential used in the run.
- * @param phys_const The physical constants in internal units.
- * @param g Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static void
-external_gravity_isothermalpotential(const struct external_potential* potential,
-                                     const struct phys_const* const phys_const,
-                                     struct gpart* g) {
-
-  const float G_newton = phys_const->const_newton_G;
-
-  const float dx = g->x[0] - potential->isothermal_potential.x;
-  const float dy = g->x[1] - potential->isothermal_potential.y;
-  const float dz = g->x[2] - potential->isothermal_potential.z;
-  const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
-
-  const double vrot = potential->isothermal_potential.vrot;
-  const double term = -vrot * vrot * rinv2 / G_newton;
-
-  g->a_grav[0] += term * dx;
-  g->a_grav[1] += term * dy;
-  g->a_grav[2] += term * dz;
-  // error(" %f %f %f %f", vrot, rinv2, dx, g->a_grav[0]);
-}
-
-#endif /* EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL */
-
-/* Include exteral pointmass potential */
-#ifdef EXTERNAL_POTENTIAL_POINTMASS
-
-/**
- * @brief Computes the time-step due to the acceleration from a point mass
- *
- * @param potential The properties of the externa potential.
- * @param phys_const The physical constants in internal units.
- * @param g Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static float
-external_gravity_pointmass_timestep(const struct external_potential* potential,
-                                    const struct phys_const* const phys_const,
-                                    const struct gpart* const g) {
-
-  const float G_newton = phys_const->const_newton_G;
-  const float dx = g->x[0] - potential->point_mass.x;
-  const float dy = g->x[1] - potential->point_mass.y;
-  const float dz = g->x[2] - potential->point_mass.z;
-  const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz);
-  const float drdv = (g->x[0] - potential->point_mass.x) * (g->v_full[0]) +
-                     (g->x[1] - potential->point_mass.y) * (g->v_full[1]) +
-                     (g->x[2] - potential->point_mass.z) * (g->v_full[2]);
-  const float dota_x = G_newton * potential->point_mass.mass * rinv * rinv *
-                       rinv * (-g->v_full[0] + 3.f * rinv * rinv * drdv * dx);
-  const float dota_y = G_newton * potential->point_mass.mass * rinv * rinv *
-                       rinv * (-g->v_full[1] + 3.f * rinv * rinv * drdv * dy);
-  const float dota_z = G_newton * potential->point_mass.mass * rinv * rinv *
-                       rinv * (-g->v_full[2] + 3.f * rinv * rinv * drdv * dz);
-  const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z;
-  const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] +
-                    g->a_grav[2] * g->a_grav[2];
-
-  return potential->point_mass.timestep_mult * sqrtf(a_2 / dota_2);
-}
-
-/**
- * @brief Computes the gravitational acceleration of a particle due to a point
- * mass
- *
- * Note that the accelerations are multiplied by Newton's G constant later on.
- *
- * @param potential The proerties of the external potential.
- * @param phys_const The physical constants in internal units.
- * @param g Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static void external_gravity_pointmass(
-    const struct external_potential* potential,
-    const struct phys_const* const phys_const, struct gpart* g) {
-
-  const float dx = g->x[0] - potential->point_mass.x;
-  const float dy = g->x[1] - potential->point_mass.y;
-  const float dz = g->x[2] - potential->point_mass.z;
-  const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz);
-  const float rinv3 = rinv * rinv * rinv;
-
-  g->a_grav[0] += -potential->point_mass.mass * dx * rinv3;
-  g->a_grav[1] += -potential->point_mass.mass * dy * rinv3;
-  g->a_grav[2] += -potential->point_mass.mass * dz * rinv3;
-}
-
-#endif /* EXTERNAL_POTENTIAL_POINTMASS */
-
-/* Now, some generic functions, defined in the source file */
-void potential_init(const struct swift_params* parameter_file,
-                    struct UnitSystem* us,
-                    struct external_potential* potential);
-
-void potential_print(const struct external_potential* potential);
-
-#endif /* SWIFT_POTENTIALS_H */
diff --git a/src/queue.c b/src/queue.c
index 38f8620fdc75d744df31513792e96323dbf83647..af4dfa3c94470814d4f6e7f53687a2fcba69d6dd 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -188,7 +188,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
   /* Set some pointers we will use often. */
   int *qtid = q->tid;
   struct task *qtasks = q->tasks;
-  const int qcount = q->count;
+  const int old_qcount = q->count;
 
   /* Data for the sliding window in which to try the task with the
      best overlap with the previous task. */
@@ -201,7 +201,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
   int ind = -1;
 
   /* Loop over the queue entries. */
-  for (int k = 0; k < qcount; k++) {
+  for (int k = 0; k < old_qcount; k++) {
     if (k < queue_search_window) {
       window[window_count].ind = k;
       window[window_count].tid = qtid[k];
diff --git a/src/riemann.h b/src/riemann.h
index d647b021167317d14f4cd7316d09c247794f3d23..d0ae57a640e13c2098708735d6c34de70ebea5b0 100644
--- a/src/riemann.h
+++ b/src/riemann.h
@@ -27,18 +27,35 @@
 #include "stdio.h"
 #include "stdlib.h"
 
-#define HLLC_SOLVER
+/* Check that we use an ideal equation of state, since other equations of state
+   are not compatible with these Riemann solvers. */
+#ifndef EOS_IDEAL_GAS
+#error Currently there are no Riemann solvers that can handle the requested \
+       equation of state. Select an ideal gas equation of state if you want to \
+       use this hydro scheme!
+#endif
+
+#if defined(RIEMANN_SOLVER_EXACT)
 
-#ifdef EXACT_SOLVER
+#define RIEMANN_SOLVER_IMPLEMENTATION "Exact Riemann solver (Toro 2009)"
 #include "riemann/riemann_exact.h"
-#endif
 
-#ifdef TRRS_SOLVER
+#elif defined(RIEMANN_SOLVER_TRRS)
+
+#define RIEMANN_SOLVER_IMPLEMENTATION \
+  "Two Rarefaction Riemann Solver (Toro 2009)"
 #include "riemann/riemann_trrs.h"
-#endif
 
-#ifdef HLLC_SOLVER
+#elif defined(RIEMANN_SOLVER_HLLC)
+
+#define RIEMANN_SOLVER_IMPLEMENTATION \
+  "Harten-Lax-van Leer-Contact Riemann solver (Toro 2009)"
 #include "riemann/riemann_hllc.h"
+
+#else
+
+#error "Error: no Riemann solver selected!"
+
 #endif
 
 #endif /* SWIFT_RIEMANN_H */
diff --git a/src/riemann/riemann_exact.h b/src/riemann/riemann_exact.h
index a2f3c30fb1daf5d53bf35abe4ca7e73eafba6018..10dfe56ef35a82e721a715bbb8c7a71979b8e6ce 100644
--- a/src/riemann/riemann_exact.h
+++ b/src/riemann/riemann_exact.h
@@ -29,19 +29,10 @@
 #ifndef SWIFT_RIEMANN_EXACT_H
 #define SWIFT_RIEMANN_EXACT_H
 
-/* frequently used combinations of const_hydro_gamma */
-#define const_riemann_gp1d2g \
-  (0.5f * (const_hydro_gamma + 1.0f) / const_hydro_gamma)
-#define const_riemann_gm1d2g \
-  (0.5f * (const_hydro_gamma - 1.0f) / const_hydro_gamma)
-#define const_riemann_gm1dgp1 \
-  ((const_hydro_gamma - 1.0f) / (const_hydro_gamma + 1.0f))
-#define const_riemann_tdgp1 (2.0f / (const_hydro_gamma + 1.0f))
-#define const_riemann_tdgm1 (2.0f / (const_hydro_gamma - 1.0f))
-#define const_riemann_gm1d2 (0.5f * (const_hydro_gamma - 1.0f))
-#define const_riemann_tgdgm1 \
-  (2.0f * const_hydro_gamma / (const_hydro_gamma - 1.0f))
-#define const_riemann_ginv (1.0f / const_hydro_gamma)
+#include <float.h>
+#include "adiabatic_index.h"
+#include "minmax.h"
+#include "riemann_vacuum.h"
 
 /**
  * @brief Functions (4.6) and (4.7) in Toro.
@@ -50,19 +41,18 @@
  * @param W The left or right state vector
  * @param a The left or right sound speed
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_fb(GFLOAT p,
-                                                               GFLOAT* W,
-                                                               GFLOAT a) {
+__attribute__((always_inline)) INLINE static float riemann_fb(float p, float* W,
+                                                              float a) {
 
-  GFLOAT fval = 0.;
-  GFLOAT A, B;
+  float fval = 0.;
+  float A, B;
   if (p > W[4]) {
-    A = const_riemann_tdgp1 / W[0];
-    B = const_riemann_gm1dgp1 * W[4];
+    A = hydro_two_over_gamma_plus_one / W[0];
+    B = hydro_gamma_minus_one_over_gamma_plus_one * W[4];
     fval = (p - W[4]) * sqrtf(A / (p + B));
   } else {
-    fval =
-        const_riemann_tdgm1 * a * (powf(p / W[4], const_riemann_gm1d2g) - 1.0f);
+    fval = hydro_two_over_gamma_minus_one * a *
+           (pow_gamma_minus_one_over_two_gamma(p / W[4]) - 1.0f);
   }
   return fval;
 }
@@ -78,9 +68,8 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_fb(GFLOAT p,
  * @param aL The left sound speed
  * @param aR The right sound speed
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_f(
-    GFLOAT p, GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL,
-    GFLOAT aR) {
+__attribute__((always_inline)) INLINE static float riemann_f(
+    float p, float* WL, float* WR, float vL, float vR, float aL, float aR) {
 
   return riemann_fb(p, WL, aL) + riemann_fb(p, WR, aR) + (vR - vL);
 }
@@ -92,18 +81,18 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_f(
  * @param W The left or right state vector
  * @param a The left or right sound speed
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_fprimeb(GFLOAT p,
-                                                                    GFLOAT* W,
-                                                                    GFLOAT a) {
+__attribute__((always_inline)) INLINE static float riemann_fprimeb(float p,
+                                                                   float* W,
+                                                                   float a) {
 
-  GFLOAT fval = 0.;
-  GFLOAT A, B;
+  float fval = 0.;
+  float A, B;
   if (p > W[4]) {
-    A = const_riemann_tdgp1 / W[0];
-    B = const_riemann_gm1dgp1 * W[4];
+    A = hydro_two_over_gamma_plus_one / W[0];
+    B = hydro_gamma_minus_one_over_gamma_plus_one * W[4];
     fval = (1.0f - 0.5f * (p - W[4]) / (B + p)) * sqrtf(A / (p + B));
   } else {
-    fval = 1.0f / (W[0] * a) * powf(p / W[4], -const_riemann_gp1d2g);
+    fval = 1.0f / W[0] / a * pow_minus_gamma_plus_one_over_two_gamma(p / W[4]);
   }
   return fval;
 }
@@ -117,8 +106,8 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_fprimeb(GFLOAT p,
  * @param aL The left sound speed
  * @param aR The right sound speed
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_fprime(
-    GFLOAT p, GFLOAT* WL, GFLOAT* WR, GFLOAT aL, GFLOAT aR) {
+__attribute__((always_inline)) INLINE static float riemann_fprime(
+    float p, float* WL, float* WR, float aL, float aR) {
 
   return riemann_fprimeb(p, WL, aL) + riemann_fprimeb(p, WR, aR);
 }
@@ -129,12 +118,12 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_fprime(
  * @param p The current guess for the pressure
  * @param W The left or right state vector
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_gb(GFLOAT p,
-                                                               GFLOAT* W) {
+__attribute__((always_inline)) INLINE static float riemann_gb(float p,
+                                                              float* W) {
 
-  GFLOAT A, B;
-  A = const_riemann_tdgp1 / W[0];
-  B = const_riemann_gm1dgp1 * W[4];
+  float A, B;
+  A = hydro_two_over_gamma_plus_one / W[0];
+  B = hydro_gamma_minus_one_over_gamma_plus_one * W[4];
   return sqrtf(A / (p + B));
 }
 
@@ -151,27 +140,27 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_gb(GFLOAT p,
  * @param aL The left sound speed
  * @param aR The right sound speed
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_guess_p(
-    GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL, GFLOAT aR) {
+__attribute__((always_inline)) INLINE static float riemann_guess_p(
+    float* WL, float* WR, float vL, float vR, float aL, float aR) {
 
-  GFLOAT pguess, pmin, pmax, qmax;
-  GFLOAT ppv;
+  float pguess, pmin, pmax, qmax;
+  float ppv;
 
-  pmin = fminf(WL[4], WR[4]);
-  pmax = fmaxf(WL[4], WR[4]);
+  pmin = min(WL[4], WR[4]);
+  pmax = max(WL[4], WR[4]);
   qmax = pmax / pmin;
   ppv =
       0.5f * (WL[4] + WR[4]) - 0.125f * (vR - vL) * (WL[0] + WR[0]) * (aL + aR);
-  ppv = fmaxf(1.e-8f, ppv);
+  ppv = max(1.e-8f, ppv);
   if (qmax <= 2.0f && pmin <= ppv && ppv <= pmax) {
     pguess = ppv;
   } else {
     if (ppv < pmin) {
       /* two rarefactions */
-      pguess = powf((aL + aR - const_riemann_gm1d2 * (vR - vL)) /
-                        (aL / powf(WL[4], const_riemann_gm1d2g) +
-                         aR / powf(WR[4], const_riemann_gm1d2g)),
-                    const_riemann_tgdgm1);
+      pguess = pow_two_gamma_over_gamma_minus_one(
+          (aL + aR - hydro_gamma_minus_one_over_two * (vR - vL)) /
+          (aL / pow_gamma_minus_one_over_two_gamma(WL[4]) +
+           aR / pow_gamma_minus_one_over_two_gamma(WR[4])));
     } else {
       /* two shocks */
       pguess = (riemann_gb(ppv, WL) * WL[4] + riemann_gb(ppv, WR) * WR[4] - vR +
@@ -183,7 +172,7 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_guess_p(
      value for pressure (...).
      Thus in order to avoid negative guess values we introduce the small
      positive constant _tolerance" */
-  pguess = fmaxf(1.e-8f, pguess);
+  pguess = max(1.e-8f, pguess);
   return pguess;
 }
 
@@ -202,14 +191,14 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_guess_p(
  * @param aL The left sound speed
  * @param aR The right sound speed
  */
-__attribute__((always_inline)) INLINE static GFLOAT riemann_solve_brent(
-    GFLOAT lower_limit, GFLOAT upper_limit, GFLOAT lowf, GFLOAT upf,
-    GFLOAT error_tol, GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL,
-    GFLOAT aR) {
-
-  GFLOAT a, b, c, d, s;
-  GFLOAT fa, fb, fc, fs;
-  GFLOAT tmp, tmp2;
+__attribute__((always_inline)) INLINE static float riemann_solve_brent(
+    float lower_limit, float upper_limit, float lowf, float upf,
+    float error_tol, float* WL, float* WR, float vL, float vR, float aL,
+    float aR) {
+
+  float a, b, c, d, s;
+  float fa, fb, fc, fs;
+  float tmp, tmp2;
   int mflag;
   int i;
 
@@ -296,148 +285,6 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_solve_brent(
   return b;
 }
 
-/**
- * @brief Vacuum Riemann solver, based on section 4.6 in Toro
- *
- * @param WL The left state vector
- * @param WR The right state vector
- * @param vL The left velocity along the interface normal
- * @param vR The right velocity along the interface normal
- * @param aL The left sound speed
- * @param aR The right sound speed
- * @param Whalf Empty state vector to store the solution in
- * @param n_unit Normal vector of the interface
- */
-__attribute__((always_inline)) INLINE static void riemann_solve_vacuum(
-    GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL, GFLOAT aR,
-    GFLOAT* Whalf, float* n_unit) {
-
-  GFLOAT SL, SR;
-  GFLOAT vhalf;
-
-  if (!WR[0] && !WL[0]) {
-    /* if both states are vacuum, the solution is also vacuum */
-    Whalf[0] = 0.0f;
-    Whalf[1] = 0.0f;
-    Whalf[2] = 0.0f;
-    Whalf[3] = 0.0f;
-    Whalf[4] = 0.0f;
-    return;
-  }
-  if (!WR[0]) {
-    Whalf[1] = WL[1];
-    Whalf[2] = WL[2];
-    Whalf[3] = WL[3];
-    /* vacuum right state */
-    if (vL < aL) {
-      SL = vL + const_riemann_tdgm1 * aL;
-      if (SL > 0.0f) {
-        Whalf[0] =
-            WL[0] * powf(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
-                         const_riemann_tdgm1);
-        vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
-        Whalf[4] =
-            WL[4] * powf(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
-                         const_riemann_tgdgm1);
-      } else {
-        Whalf[0] = 0.0f;
-        Whalf[1] = 0.0f;
-        Whalf[2] = 0.0f;
-        Whalf[3] = 0.0f;
-        Whalf[4] = 0.0f;
-        return;
-      }
-    } else {
-      Whalf[0] = WL[0];
-      vhalf = 0.0f;
-      Whalf[4] = WL[4];
-    }
-  } else {
-    if (!WL[0]) {
-      Whalf[1] = WR[1];
-      Whalf[2] = WR[2];
-      Whalf[3] = WR[3];
-      /* vacuum left state */
-      if (-vR < aR) {
-        SR = vR - const_riemann_tdgm1 * aR;
-        if (SR >= 0.0f) {
-          Whalf[0] = 0.0f;
-          Whalf[1] = 0.0f;
-          Whalf[2] = 0.0f;
-          Whalf[3] = 0.0f;
-          Whalf[4] = 0.0f;
-          return;
-        } else {
-          Whalf[0] = WR[0] *
-                     powf(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
-                          const_riemann_tdgm1);
-          vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
-          Whalf[4] = WR[4] *
-                     powf(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
-                          const_riemann_tgdgm1);
-        }
-      } else {
-        Whalf[0] = WR[0];
-        vhalf = 0.0f;
-        Whalf[4] = WR[4];
-      }
-    } else {
-      /* vacuum generation */
-      SR = vR - const_riemann_tdgm1 * aR;
-      SL = vL + const_riemann_tdgm1 * aL;
-      if (SR > 0.0f && SL < 0.0f) {
-        Whalf[0] = 0.0f;
-        Whalf[1] = 0.0f;
-        Whalf[2] = 0.0f;
-        Whalf[3] = 0.0f;
-        Whalf[4] = 0.0f;
-        return;
-      } else {
-        if (SL >= 0.0f) {
-          Whalf[1] = WL[1];
-          Whalf[2] = WL[2];
-          Whalf[3] = WL[3];
-          if (aL > vL) {
-            Whalf[0] = WL[0] * powf(const_riemann_tdgp1 +
-                                        const_riemann_gm1dgp1 / aL * vL,
-                                    const_riemann_tdgm1);
-            vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
-            Whalf[4] = WL[4] * powf(const_riemann_tdgp1 +
-                                        const_riemann_gm1dgp1 / aL * vL,
-                                    const_riemann_tgdgm1);
-          } else {
-            Whalf[0] = WL[0];
-            vhalf = 0.0f;
-            Whalf[4] = WL[4];
-          }
-        } else {
-          Whalf[1] = WR[1];
-          Whalf[2] = WR[2];
-          Whalf[3] = WR[3];
-          if (-vR < aR) {
-            Whalf[0] = WR[0] * powf(const_riemann_tdgp1 -
-                                        const_riemann_gm1dgp1 / aR * vR,
-                                    const_riemann_tdgm1);
-            vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
-            Whalf[4] = WR[4] * powf(const_riemann_tdgp1 -
-                                        const_riemann_gm1dgp1 / aR * vR,
-                                    const_riemann_tgdgm1);
-          } else {
-            Whalf[0] = WR[0];
-            vhalf = 0.0f;
-            Whalf[4] = WR[4];
-          }
-        }
-      }
-    }
-  }
-
-  /* Add the velocity solution along the interface normal to the velocities */
-  Whalf[1] += vhalf * n_unit[0];
-  Whalf[2] += vhalf * n_unit[1];
-  Whalf[3] += vhalf * n_unit[2];
-}
-
 /* Solve the Riemann problem between the states WL and WR and store the result
  * in Whalf
  * The Riemann problem is solved in the x-direction; the velocities in the y-
@@ -456,20 +303,20 @@ __attribute__((always_inline)) INLINE static void riemann_solve_vacuum(
  * @param n_unit Normal vector of the interface
  */
 __attribute__((always_inline)) INLINE static void riemann_solver_solve(
-    GFLOAT* WL, GFLOAT* WR, GFLOAT* Whalf, float* n_unit) {
+    float* WL, float* WR, float* Whalf, float* n_unit) {
 
   /* velocity of the left and right state in a frame aligned with n_unit */
-  GFLOAT vL, vR, vhalf;
+  float vL, vR, vhalf;
   /* sound speeds */
-  GFLOAT aL, aR;
+  float aL, aR;
   /* variables used for finding pstar */
-  GFLOAT p, pguess, fp, fpguess;
+  float p, pguess, fp, fpguess;
   /* variables used for sampling the solution */
-  GFLOAT u;
-  GFLOAT pdpR, SR;
-  GFLOAT SHR, STR;
-  GFLOAT pdpL, SL;
-  GFLOAT SHL, STL;
+  float u;
+  float pdpR, SR;
+  float SHR, STR;
+  float pdpL, SL;
+  float SHL, STL;
   int errorFlag = 0;
 
   /* sanity checks */
@@ -500,156 +347,155 @@ __attribute__((always_inline)) INLINE static void riemann_solver_solve(
   vR = WR[1] * n_unit[0] + WR[2] * n_unit[1] + WR[3] * n_unit[2];
 
   /* calculate sound speeds */
-  aL = sqrtf(const_hydro_gamma * WL[4] / WL[0]);
-  aR = sqrtf(const_hydro_gamma * WR[4] / WR[0]);
+  aL = sqrtf(hydro_gamma * WL[4] / WL[0]);
+  aR = sqrtf(hydro_gamma * WR[4] / WR[0]);
 
-  if (!WL[0] || !WR[0]) {
-    /* vacuum: we need a vacuum riemann solver */
+  /* check vacuum (generation) condition */
+  if (riemann_is_vacuum(WL, WR, vL, vR, aL, aR)) {
     riemann_solve_vacuum(WL, WR, vL, vR, aL, aR, Whalf, n_unit);
     return;
   }
 
-  /* check vacuum generation condition */
-  if (2.0f * aL / (const_hydro_gamma - 1.0f) +
-          2.0f * aR / (const_hydro_gamma - 1.0f) <
-      fabs(vL - vR)) {
-    /* vacuum generation: need a vacuum riemann solver */
-    riemann_solve_vacuum(WL, WR, vL, vR, aL, aR, Whalf, n_unit);
-    return;
-  } else {
-    /* values are ok: let's find pstar (riemann_f(pstar) = 0)! */
-    /* We normally use a Newton-Raphson iteration to find the zeropoint
-       of riemann_f(p), but if pstar is close to 0, we risk negative p values.
-       Since riemann_f(p) is undefined for negative pressures, we don't
-       want this to happen.
-       We therefore use Brent's method if riemann_f(0) is larger than some
-       value. -5 makes the iteration fail safe while almost never invoking
-       the expensive Brent solver. */
-    p = 0.;
-    /* obtain a first guess for p */
-    pguess = riemann_guess_p(WL, WR, vL, vR, aL, aR);
-    fp = riemann_f(p, WL, WR, vL, vR, aL, aR);
-    fpguess = riemann_f(pguess, WL, WR, vL, vR, aL, aR);
-    /* ok, pstar is close to 0, better use Brent's method... */
-    /* we use Newton-Raphson until we find a suitable interval */
-    if (fp * fpguess >= 0.0f) {
-      /* Newton-Raphson until convergence or until suitable interval is found
-         to use Brent's method */
-      unsigned int counter = 0;
-      while (fabs(p - pguess) > 1.e-6f * 0.5f * (p + pguess) &&
-             fpguess < 0.0f) {
-        p = pguess;
-        pguess = pguess - fpguess / riemann_fprime(pguess, WL, WR, aL, aR);
-        fpguess = riemann_f(pguess, WL, WR, vL, vR, aL, aR);
-        counter++;
-        if (counter > 1000) {
-          error("Stuck in Newton-Raphson!\n");
-        }
-      }
-    }
-    /* As soon as there is a suitable interval: use Brent's method */
-    if (1.e6 * fabs(p - pguess) > 0.5f * (p + pguess) && fpguess > 0.0f) {
-      p = 0.0f;
-      fp = riemann_f(p, WL, WR, vL, vR, aL, aR);
-      /* use Brent's method to find the zeropoint */
-      p = riemann_solve_brent(p, pguess, fp, fpguess, 1.e-6, WL, WR, vL, vR, aL,
-                              aR);
-    } else {
+  /* values are ok: let's find pstar (riemann_f(pstar) = 0)! */
+  /* We normally use a Newton-Raphson iteration to find the zeropoint
+     of riemann_f(p), but if pstar is close to 0, we risk negative p values.
+     Since riemann_f(p) is undefined for negative pressures, we don't
+     want this to happen.
+     We therefore use Brent's method if riemann_f(0) is larger than some
+     value. -5 makes the iteration fail safe while almost never invoking
+     the expensive Brent solver. */
+  p = 0.;
+  /* obtain a first guess for p */
+  pguess = riemann_guess_p(WL, WR, vL, vR, aL, aR);
+  fp = riemann_f(p, WL, WR, vL, vR, aL, aR);
+  fpguess = riemann_f(pguess, WL, WR, vL, vR, aL, aR);
+  /* ok, pstar is close to 0, better use Brent's method... */
+  /* we use Newton-Raphson until we find a suitable interval */
+  if (fp * fpguess >= 0.0f) {
+    /* Newton-Raphson until convergence or until suitable interval is found
+       to use Brent's method */
+    unsigned int counter = 0;
+    while (fabs(p - pguess) > 1.e-6f * 0.5f * (p + pguess) && fpguess < 0.0f) {
       p = pguess;
+      pguess = pguess - fpguess / riemann_fprime(pguess, WL, WR, aL, aR);
+      fpguess = riemann_f(pguess, WL, WR, vL, vR, aL, aR);
+      counter++;
+      if (counter > 1000) {
+        error("Stuck in Newton-Raphson!\n");
+      }
     }
+  }
+  /* As soon as there is a suitable interval: use Brent's method */
+  if (1.e6 * fabs(p - pguess) > 0.5f * (p + pguess) && fpguess > 0.0f) {
+    p = 0.0f;
+    fp = riemann_f(p, WL, WR, vL, vR, aL, aR);
+    /* use Brent's method to find the zeropoint */
+    p = riemann_solve_brent(p, pguess, fp, fpguess, 1.e-6, WL, WR, vL, vR, aL,
+                            aR);
+  } else {
+    p = pguess;
+  }
 
-    /* calculate the velocity in the intermediate state */
-    u = 0.5f * (vL + vR) +
-        0.5f * (riemann_fb(p, WR, aR) - riemann_fb(p, WL, aL));
-
-    /* sample the solution */
-    /* This corresponds to the flow chart in Fig. 4.14 in Toro */
-    if (u < 0.0f) {
-      /* advect velocity components */
-      Whalf[1] = WR[1];
-      Whalf[2] = WR[2];
-      Whalf[3] = WR[3];
-      pdpR = p / WR[4];
-      if (p > WR[4]) {
-        /* shockwave */
-        SR =
-            vR + aR * sqrtf(const_riemann_gp1d2g * pdpR + const_riemann_gm1d2g);
-        if (SR > 0.0f) {
-          Whalf[0] = WR[0] * (pdpR + const_riemann_gm1dgp1) /
-                     (const_riemann_gm1dgp1 * pdpR + 1.0f);
+  /* calculate the velocity in the intermediate state */
+  u = 0.5f * (vL + vR) + 0.5f * (riemann_fb(p, WR, aR) - riemann_fb(p, WL, aL));
+
+  /* sample the solution */
+  /* This corresponds to the flow chart in Fig. 4.14 in Toro */
+  if (u < 0.0f) {
+    /* advect velocity components */
+    Whalf[1] = WR[1];
+    Whalf[2] = WR[2];
+    Whalf[3] = WR[3];
+    pdpR = p / WR[4];
+    if (p > WR[4]) {
+      /* shockwave */
+      SR = vR +
+           aR * sqrtf(hydro_gamma_plus_one_over_two_gamma * pdpR +
+                      hydro_gamma_minus_one_over_two_gamma);
+      if (SR > 0.0f) {
+        Whalf[0] = WR[0] * (pdpR + hydro_gamma_minus_one_over_gamma_plus_one) /
+                   (hydro_gamma_minus_one_over_gamma_plus_one * pdpR + 1.0f);
+        vhalf = u - vR;
+        Whalf[4] = p;
+      } else {
+        Whalf[0] = WR[0];
+        vhalf = 0.0f;
+        Whalf[4] = WR[4];
+      }
+    } else {
+      /* rarefaction wave */
+      SHR = vR + aR;
+      if (SHR > 0.0f) {
+        STR = u + aR * pow_gamma_minus_one_over_two_gamma(pdpR);
+        if (STR <= 0.0f) {
+          Whalf[0] =
+              WR[0] * pow_two_over_gamma_minus_one(
+                          hydro_two_over_gamma_plus_one -
+                          hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+          vhalf = hydro_two_over_gamma_plus_one *
+                      (-aR + hydro_gamma_minus_one_over_two * vR) -
+                  vR;
+          Whalf[4] =
+              WR[4] * pow_two_gamma_over_gamma_minus_one(
+                          hydro_two_over_gamma_plus_one -
+                          hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+        } else {
+          Whalf[0] = WR[0] * pow_one_over_gamma(pdpR);
           vhalf = u - vR;
           Whalf[4] = p;
-        } else {
-          Whalf[0] = WR[0];
-          vhalf = 0.0f;
-          Whalf[4] = WR[4];
         }
       } else {
-        /* rarefaction wave */
-        SHR = vR + aR;
-        if (SHR > 0.0f) {
-          STR = u + aR * powf(pdpR, const_riemann_gm1d2g);
-          if (STR <= 0.0f) {
-            Whalf[0] = WR[0] * powf(const_riemann_tdgp1 -
-                                        const_riemann_gm1dgp1 / aR * vR,
-                                    const_riemann_tdgm1);
-            vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
-            Whalf[4] = WR[4] * powf(const_riemann_tdgp1 -
-                                        const_riemann_gm1dgp1 / aR * vR,
-                                    const_riemann_tgdgm1);
-          } else {
-            Whalf[0] = WR[0] * powf(pdpR, const_riemann_ginv);
-            vhalf = u - vR;
-            Whalf[4] = p;
-          }
-        } else {
-          Whalf[0] = WR[0];
-          vhalf = 0.0f;
-          Whalf[4] = WR[4];
-        }
+        Whalf[0] = WR[0];
+        vhalf = 0.0f;
+        Whalf[4] = WR[4];
+      }
+    }
+  } else {
+    Whalf[1] = WL[1];
+    Whalf[2] = WL[2];
+    Whalf[3] = WL[3];
+    pdpL = p / WL[4];
+    if (p > WL[4]) {
+      /* shockwave */
+      SL = vL -
+           aL * sqrtf(hydro_gamma_plus_one_over_two_gamma * pdpL +
+                      hydro_gamma_minus_one_over_two_gamma);
+      if (SL < 0.0f) {
+        Whalf[0] = WL[0] * (pdpL + hydro_gamma_minus_one_over_gamma_plus_one) /
+                   (hydro_gamma_minus_one_over_gamma_plus_one * pdpL + 1.0f);
+        vhalf = u - vL;
+        Whalf[4] = p;
+      } else {
+        Whalf[0] = WL[0];
+        vhalf = 0.0f;
+        Whalf[4] = WL[4];
       }
     } else {
-      Whalf[1] = WL[1];
-      Whalf[2] = WL[2];
-      Whalf[3] = WL[3];
-      pdpL = p / WL[4];
-      if (p > WL[4]) {
-        /* shockwave */
-        SL =
-            vL - aL * sqrtf(const_riemann_gp1d2g * pdpL + const_riemann_gm1d2g);
-        if (SL < 0.0f) {
-          Whalf[0] = WL[0] * (pdpL + const_riemann_gm1dgp1) /
-                     (const_riemann_gm1dgp1 * pdpL + 1.0f);
+      /* rarefaction wave */
+      SHL = vL - aL;
+      if (SHL < 0.0f) {
+        STL = u - aL * pow_gamma_minus_one_over_two_gamma(pdpL);
+        if (STL > 0.0f) {
+          Whalf[0] =
+              WL[0] * pow_two_over_gamma_minus_one(
+                          hydro_two_over_gamma_plus_one +
+                          hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+          vhalf = hydro_two_over_gamma_plus_one *
+                      (aL + hydro_gamma_minus_one_over_two * vL) -
+                  vL;
+          Whalf[4] =
+              WL[4] * pow_two_gamma_over_gamma_minus_one(
+                          hydro_two_over_gamma_plus_one +
+                          hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+        } else {
+          Whalf[0] = WL[0] * pow_one_over_gamma(pdpL);
           vhalf = u - vL;
           Whalf[4] = p;
-        } else {
-          Whalf[0] = WL[0];
-          vhalf = 0.0f;
-          Whalf[4] = WL[4];
         }
       } else {
-        /* rarefaction wave */
-        SHL = vL - aL;
-        if (SHL < 0.0f) {
-          STL = u - aL * powf(pdpL, const_riemann_gm1d2g);
-          if (STL > 0.0f) {
-            Whalf[0] = WL[0] * powf(const_riemann_tdgp1 +
-                                        const_riemann_gm1dgp1 / aL * vL,
-                                    const_riemann_tdgm1);
-            vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
-            Whalf[4] = WL[4] * powf(const_riemann_tdgp1 +
-                                        const_riemann_gm1dgp1 / aL * vL,
-                                    const_riemann_tgdgm1);
-          } else {
-            Whalf[0] = WL[0] * powf(pdpL, const_riemann_ginv);
-            vhalf = u - vL;
-            Whalf[4] = p;
-          }
-        } else {
-          Whalf[0] = WL[0];
-          vhalf = 0.0f;
-          Whalf[4] = WL[4];
-        }
+        Whalf[0] = WL[0];
+        vhalf = 0.0f;
+        Whalf[4] = WL[4];
       }
     }
   }
@@ -661,10 +507,10 @@ __attribute__((always_inline)) INLINE static void riemann_solver_solve(
 }
 
 __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
-    GFLOAT* Wi, GFLOAT* Wj, float* n_unit, float* vij, GFLOAT* totflux) {
+    float* Wi, float* Wj, float* n_unit, float* vij, float* totflux) {
 
-  GFLOAT Whalf[5];
-  GFLOAT flux[5][3];
+  float Whalf[5];
+  float flux[5][3];
   float vtot[3];
   float rhoe;
 
@@ -690,7 +536,7 @@ __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
   /* eqn. (15) */
   /* F_P = \rho e ( \vec{v} - \vec{v_{ij}} ) + P \vec{v} */
   /* \rho e = P / (\gamma-1) + 1/2 \rho \vec{v}^2 */
-  rhoe = Whalf[4] / (const_hydro_gamma - 1.0f) +
+  rhoe = Whalf[4] / hydro_gamma_minus_one +
          0.5f * Whalf[0] *
              (vtot[0] * vtot[0] + vtot[1] * vtot[1] + vtot[2] * vtot[2]);
   flux[4][0] = rhoe * Whalf[1] + Whalf[4] * vtot[0];
diff --git a/src/riemann/riemann_hllc.h b/src/riemann/riemann_hllc.h
index 6c583f6410f53ed64d630082926d816129768fab..b8b1239d7799221c98522c06631aba5cabe69183 100644
--- a/src/riemann/riemann_hllc.h
+++ b/src/riemann/riemann_hllc.h
@@ -20,13 +20,17 @@
 #ifndef SWIFT_RIEMANN_HLLC_H
 #define SWIFT_RIEMANN_HLLC_H
 
+#include "adiabatic_index.h"
+#include "minmax.h"
+#include "riemann_vacuum.h"
+
 __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
-    GFLOAT *WL, GFLOAT *WR, float *n, float *vij, GFLOAT *totflux) {
+    float *WL, float *WR, float *n, float *vij, float *totflux) {
 
-  GFLOAT uL, uR, aL, aR;
-  GFLOAT rhobar, abar, pPVRS, pstar, qL, qR, SL, SR, Sstar;
-  GFLOAT v2, eL, eR;
-  GFLOAT UstarL[5], UstarR[5];
+  float uL, uR, aL, aR;
+  float rhobar, abar, pPVRS, pstar, qL, qR, SL, SR, Sstar;
+  float v2, eL, eR;
+  float UstarL[5], UstarR[5];
 
   /* Handle pure vacuum */
   if (!WL[0] && !WR[0]) {
@@ -41,37 +45,32 @@ __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
   /* STEP 0: obtain velocity in interface frame */
   uL = WL[1] * n[0] + WL[2] * n[1] + WL[3] * n[2];
   uR = WR[1] * n[0] + WR[2] * n[1] + WR[3] * n[2];
-  aL = sqrtf(const_hydro_gamma * WL[4] / WL[0]);
-  aR = sqrtf(const_hydro_gamma * WR[4] / WR[0]);
+  aL = sqrtf(hydro_gamma * WL[4] / WL[0]);
+  aR = sqrtf(hydro_gamma * WR[4] / WR[0]);
 
   /* Handle vacuum: vacuum does not require iteration and is always exact */
-  if (!WL[0] || !WR[0]) {
-    error("Vacuum not yet supported");
-  }
-  if (2. * aL / (const_hydro_gamma - 1.) + 2. * aR / (const_hydro_gamma - 1.) <
-      fabs(uL - uR)) {
-    error("Vacuum not yet supported");
+  if (riemann_is_vacuum(WL, WR, uL, uR, aL, aR)) {
+    riemann_solve_vacuum_flux(WL, WR, uL, uR, aL, aR, n, vij, totflux);
+    return;
   }
 
   /* STEP 1: pressure estimate */
   rhobar = 0.5 * (WL[0] + WR[0]);
   abar = 0.5 * (aL + aR);
   pPVRS = 0.5 * (WL[4] + WR[4]) - 0.5 * (uR - uL) * rhobar * abar;
-  pstar = fmaxf(0., pPVRS);
+  pstar = max(0., pPVRS);
 
   /* STEP 2: wave speed estimates
      all these speeds are along the interface normal, since uL and uR are */
   qL = 1.;
   if (pstar > WL[4]) {
     qL = sqrtf(1. +
-               0.5 * (const_hydro_gamma + 1.) / const_hydro_gamma *
-                   (pstar / WL[4] - 1.));
+               0.5 * (hydro_gamma + 1.) / hydro_gamma * (pstar / WL[4] - 1.));
   }
   qR = 1.;
   if (pstar > WR[4]) {
     qR = sqrtf(1. +
-               0.5 * (const_hydro_gamma + 1.) / const_hydro_gamma *
-                   (pstar / WR[4] - 1.));
+               0.5 * (hydro_gamma + 1.) / hydro_gamma * (pstar / WR[4] - 1.));
   }
   SL = uL - aL * qL;
   SR = uR + aR * qR;
@@ -86,9 +85,9 @@ __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
        (not rotated to interface frame) */
     totflux[1] = WL[0] * WL[1] * uL + WL[4] * n[0];
     totflux[2] = WL[0] * WL[2] * uL + WL[4] * n[1];
-    totflux[3] = WL[0] * WL[2] * uL + WL[4] * n[2];
+    totflux[3] = WL[0] * WL[3] * uL + WL[4] * n[2];
     v2 = WL[1] * WL[1] + WL[2] * WL[2] + WL[3] * WL[3];
-    eL = WL[4] / (const_hydro_gamma - 1.) / WL[0] + 0.5 * v2;
+    eL = WL[4] / hydro_gamma_minus_one / WL[0] + 0.5 * v2;
     totflux[4] = WL[0] * eL * uL + WL[4] * uL;
     if (SL < 0.) {
       /* add flux FstarL */
@@ -118,7 +117,7 @@ __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
     totflux[2] = WR[0] * WR[2] * uR + WR[4] * n[1];
     totflux[3] = WR[0] * WR[3] * uR + WR[4] * n[2];
     v2 = WR[1] * WR[1] + WR[2] * WR[2] + WR[3] * WR[3];
-    eR = WR[4] / (const_hydro_gamma - 1.) / WR[0] + 0.5 * v2;
+    eR = WR[4] / hydro_gamma_minus_one / WR[0] + 0.5 * v2;
     totflux[4] = WR[0] * eR * uR + WR[4] * uR;
     if (SR > 0.) {
       /* add flux FstarR */
diff --git a/src/riemann/riemann_trrs.h b/src/riemann/riemann_trrs.h
index efdbfb59877c09a59d535a4785ad74620c0f3651..b13a76b4c57af548497780e974e5c9ee3a721fac 100644
--- a/src/riemann/riemann_trrs.h
+++ b/src/riemann/riemann_trrs.h
@@ -20,19 +20,8 @@
 #ifndef SWIFT_RIEMANN_TRRS_H
 #define SWIFT_RIEMANN_TRRS_H
 
-/* frequently used combinations of const_hydro_gamma */
-#define const_riemann_gp1d2g \
-  (0.5f * (const_hydro_gamma + 1.0f) / const_hydro_gamma)
-#define const_riemann_gm1d2g \
-  (0.5f * (const_hydro_gamma - 1.0f) / const_hydro_gamma)
-#define const_riemann_gm1dgp1 \
-  ((const_hydro_gamma - 1.0f) / (const_hydro_gamma + 1.0f))
-#define const_riemann_tdgp1 (2.0f / (const_hydro_gamma + 1.0f))
-#define const_riemann_tdgm1 (2.0f / (const_hydro_gamma - 1.0f))
-#define const_riemann_gm1d2 (0.5f * (const_hydro_gamma - 1.0f))
-#define const_riemann_tgdgm1 \
-  (2.0f * const_hydro_gamma / (const_hydro_gamma - 1.0f))
-#define const_riemann_ginv (1.0f / const_hydro_gamma)
+#include "adiabatic_index.h"
+#include "riemann_vacuum.h"
 
 /**
  * @brief Solve the Riemann problem using the Two Rarefaction Riemann Solver
@@ -50,31 +39,39 @@
  * @param n_unit Normal vector of the interface
  */
 __attribute__((always_inline)) INLINE static void riemann_solver_solve(
-    GFLOAT* WL, GFLOAT* WR, GFLOAT* Whalf, float* n_unit) {
-  GFLOAT aL, aR;
-  GFLOAT PLR;
-  GFLOAT vL, vR;
-  GFLOAT ustar, pstar;
-  GFLOAT vhalf;
-  GFLOAT pdpR, SHR, STR;
-  GFLOAT pdpL, SHL, STL;
+    float* WL, float* WR, float* Whalf, float* n_unit) {
+  float aL, aR;
+  float PLR;
+  float vL, vR;
+  float ustar, pstar;
+  float vhalf;
+  float pdpR, SHR, STR;
+  float pdpL, SHL, STL;
 
   /* calculate the velocities along the interface normal */
   vL = WL[1] * n_unit[0] + WL[2] * n_unit[1] + WL[3] * n_unit[2];
   vR = WR[1] * n_unit[0] + WR[2] * n_unit[1] + WR[3] * n_unit[2];
 
   /* calculate the sound speeds */
-  aL = sqrtf(const_hydro_gamma * WL[4] / WL[0]);
-  aR = sqrtf(const_hydro_gamma * WR[4] / WR[0]);
+  aL = sqrtf(hydro_gamma * WL[4] / WL[0]);
+  aR = sqrtf(hydro_gamma * WR[4] / WR[0]);
+
+  if (riemann_is_vacuum(WL, WR, vL, vR, aL, aR)) {
+    riemann_solve_vacuum(WL, WR, vL, vR, aL, aR, Whalf, n_unit);
+    return;
+  }
 
   /* calculate the velocity and pressure in the intermediate state */
-  PLR = pow(WL[4] / WR[4], const_riemann_gm1d2g);
-  ustar = (PLR * vL / aL + vR / aR + const_riemann_tdgm1 * (PLR - 1.0f)) /
+  PLR = pow_gamma_minus_one_over_two_gamma(WL[4] / WR[4]);
+  ustar = (PLR * vL / aL + vR / aR +
+           hydro_two_over_gamma_minus_one * (PLR - 1.0f)) /
           (PLR / aL + 1.0f / aR);
-  pstar = 0.5f * (WL[4] * pow(1.0f + const_riemann_gm1d2 / aL * (vL - ustar),
-                              const_riemann_tgdgm1) +
-                  WR[4] * pow(1.0f + const_riemann_gm1d2 / aR * (ustar - vR),
-                              const_riemann_tgdgm1));
+  pstar =
+      0.5f *
+      (WL[4] * pow_two_gamma_over_gamma_minus_one(
+                   1.0f + hydro_gamma_minus_one_over_two / aL * (vL - ustar)) +
+       WR[4] * pow_two_gamma_over_gamma_minus_one(
+                   1.0f + hydro_gamma_minus_one_over_two / aR * (ustar - vR)));
 
   /* sample the solution */
   if (ustar < 0.0f) {
@@ -86,17 +83,21 @@ __attribute__((always_inline)) INLINE static void riemann_solver_solve(
     /* always a rarefaction wave, that's the approximation */
     SHR = vR + aR;
     if (SHR > 0.0f) {
-      STR = ustar + aR * pow(pdpR, const_riemann_gm1d2g);
+      STR = ustar + aR * pow_gamma_minus_one_over_two_gamma(pdpR);
       if (STR <= 0.0f) {
         Whalf[0] =
-            WR[0] * pow(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
-                        const_riemann_tdgm1);
-        vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
+            WR[0] * pow_two_over_gamma_minus_one(
+                        hydro_two_over_gamma_plus_one -
+                        hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+        vhalf = hydro_two_over_gamma_plus_one *
+                    (-aR + hydro_gamma_minus_one_over_two * vR) -
+                vR;
         Whalf[4] =
-            WR[4] * pow(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
-                        const_riemann_tgdgm1);
+            WR[4] * pow_two_gamma_over_gamma_minus_one(
+                        hydro_two_over_gamma_plus_one -
+                        hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
       } else {
-        Whalf[0] = WR[0] * pow(pdpR, const_riemann_ginv);
+        Whalf[0] = WR[0] * pow_one_over_gamma(pdpR);
         vhalf = ustar - vR;
         Whalf[4] = pstar;
       }
@@ -114,17 +115,21 @@ __attribute__((always_inline)) INLINE static void riemann_solver_solve(
     /* rarefaction wave */
     SHL = vL - aL;
     if (SHL < 0.0f) {
-      STL = ustar - aL * pow(pdpL, const_riemann_gm1d2g);
+      STL = ustar - aL * pow_gamma_minus_one_over_two_gamma(pdpL);
       if (STL > 0.0f) {
         Whalf[0] =
-            WL[0] * pow(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
-                        const_riemann_tdgm1);
-        vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
+            WL[0] * pow_two_over_gamma_minus_one(
+                        hydro_two_over_gamma_plus_one +
+                        hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+        vhalf = hydro_two_over_gamma_plus_one *
+                    (aL + hydro_gamma_minus_one_over_two * vL) -
+                vL;
         Whalf[4] =
-            WL[4] * pow(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
-                        const_riemann_tgdgm1);
+            WL[4] * pow_two_gamma_over_gamma_minus_one(
+                        hydro_two_over_gamma_plus_one +
+                        hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
       } else {
-        Whalf[0] = WL[0] * pow(pdpL, const_riemann_ginv);
+        Whalf[0] = WL[0] * pow_one_over_gamma(pdpL);
         vhalf = ustar - vL;
         Whalf[4] = pstar;
       }
@@ -141,4 +146,53 @@ __attribute__((always_inline)) INLINE static void riemann_solver_solve(
   Whalf[3] += vhalf * n_unit[2];
 }
 
+__attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
+    float* Wi, float* Wj, float* n_unit, float* vij, float* totflux) {
+
+  float Whalf[5];
+  float flux[5][3];
+  float vtot[3];
+  float rhoe;
+
+  riemann_solver_solve(Wi, Wj, Whalf, n_unit);
+
+  flux[0][0] = Whalf[0] * Whalf[1];
+  flux[0][1] = Whalf[0] * Whalf[2];
+  flux[0][2] = Whalf[0] * Whalf[3];
+
+  vtot[0] = Whalf[1] + vij[0];
+  vtot[1] = Whalf[2] + vij[1];
+  vtot[2] = Whalf[3] + vij[2];
+  flux[1][0] = Whalf[0] * vtot[0] * Whalf[1] + Whalf[4];
+  flux[1][1] = Whalf[0] * vtot[0] * Whalf[2];
+  flux[1][2] = Whalf[0] * vtot[0] * Whalf[3];
+  flux[2][0] = Whalf[0] * vtot[1] * Whalf[1];
+  flux[2][1] = Whalf[0] * vtot[1] * Whalf[2] + Whalf[4];
+  flux[2][2] = Whalf[0] * vtot[1] * Whalf[3];
+  flux[3][0] = Whalf[0] * vtot[2] * Whalf[1];
+  flux[3][1] = Whalf[0] * vtot[2] * Whalf[2];
+  flux[3][2] = Whalf[0] * vtot[2] * Whalf[3] + Whalf[4];
+
+  /* eqn. (15) */
+  /* F_P = \rho e ( \vec{v} - \vec{v_{ij}} ) + P \vec{v} */
+  /* \rho e = P / (\gamma-1) + 1/2 \rho \vec{v}^2 */
+  rhoe = Whalf[4] / hydro_gamma_minus_one +
+         0.5f * Whalf[0] *
+             (vtot[0] * vtot[0] + vtot[1] * vtot[1] + vtot[2] * vtot[2]);
+  flux[4][0] = rhoe * Whalf[1] + Whalf[4] * vtot[0];
+  flux[4][1] = rhoe * Whalf[2] + Whalf[4] * vtot[1];
+  flux[4][2] = rhoe * Whalf[3] + Whalf[4] * vtot[2];
+
+  totflux[0] =
+      flux[0][0] * n_unit[0] + flux[0][1] * n_unit[1] + flux[0][2] * n_unit[2];
+  totflux[1] =
+      flux[1][0] * n_unit[0] + flux[1][1] * n_unit[1] + flux[1][2] * n_unit[2];
+  totflux[2] =
+      flux[2][0] * n_unit[0] + flux[2][1] * n_unit[1] + flux[2][2] * n_unit[2];
+  totflux[3] =
+      flux[3][0] * n_unit[0] + flux[3][1] * n_unit[1] + flux[3][2] * n_unit[2];
+  totflux[4] =
+      flux[4][0] * n_unit[0] + flux[4][1] * n_unit[1] + flux[4][2] * n_unit[2];
+}
+
 #endif /* SWIFT_RIEMANN_TRRS_H */
diff --git a/src/riemann/riemann_vacuum.h b/src/riemann/riemann_vacuum.h
new file mode 100644
index 0000000000000000000000000000000000000000..743abb910193380793ccdf3d7eddbcedc4968691
--- /dev/null
+++ b/src/riemann/riemann_vacuum.h
@@ -0,0 +1,254 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_RIEMANN_VACUUM_H
+#define SWIFT_RIEMANN_VACUUM_H
+
+/**
+ * @brief Check if the given input states are vacuum or will generate vacuum
+ */
+__attribute__((always_inline)) INLINE static int riemann_is_vacuum(
+    float* WL, float* WR, float vL, float vR, float aL, float aR) {
+
+  /* vacuum */
+  if (!WL[0] || !WR[0]) {
+    return 1;
+  }
+  /* vacuum generation */
+  if (2.0f * aL / hydro_gamma_minus_one + 2.0f * aR / hydro_gamma_minus_one <=
+      vR - vL) {
+    return 1;
+  }
+
+  /* no vacuum */
+  return 0;
+}
+
+/**
+ * @brief Vacuum Riemann solver, based on section 4.6 in Toro
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ * @param aL The left sound speed
+ * @param aR The right sound speed
+ * @param Whalf Empty state vector to store the solution in
+ * @param n_unit Normal vector of the interface
+ */
+__attribute__((always_inline)) INLINE static void riemann_solve_vacuum(
+    float* WL, float* WR, float vL, float vR, float aL, float aR, float* Whalf,
+    float* n_unit) {
+
+  float SL, SR;
+  float vhalf;
+
+  if (!WR[0] && !WL[0]) {
+    /* if both states are vacuum, the solution is also vacuum */
+    Whalf[0] = 0.0f;
+    Whalf[1] = 0.0f;
+    Whalf[2] = 0.0f;
+    Whalf[3] = 0.0f;
+    Whalf[4] = 0.0f;
+    return;
+  }
+  if (!WR[0]) {
+    Whalf[1] = WL[1];
+    Whalf[2] = WL[2];
+    Whalf[3] = WL[3];
+    /* vacuum right state */
+    if (vL < aL) {
+      SL = vL + hydro_two_over_gamma_minus_one * aL;
+      if (SL > 0.0f) {
+        Whalf[0] =
+            WL[0] * pow_two_over_gamma_minus_one(
+                        hydro_two_over_gamma_plus_one +
+                        hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+        vhalf = hydro_two_over_gamma_plus_one *
+                    (aL + hydro_gamma_minus_one_over_two * vL) -
+                vL;
+        Whalf[4] =
+            WL[4] * pow_two_gamma_over_gamma_minus_one(
+                        hydro_two_over_gamma_plus_one +
+                        hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+      } else {
+        Whalf[0] = 0.0f;
+        Whalf[1] = 0.0f;
+        Whalf[2] = 0.0f;
+        Whalf[3] = 0.0f;
+        Whalf[4] = 0.0f;
+        return;
+      }
+    } else {
+      Whalf[0] = WL[0];
+      vhalf = 0.0f;
+      Whalf[4] = WL[4];
+    }
+  } else {
+    if (!WL[0]) {
+      Whalf[1] = WR[1];
+      Whalf[2] = WR[2];
+      Whalf[3] = WR[3];
+      /* vacuum left state */
+      if (-aR < vR) {
+        SR = vR - hydro_two_over_gamma_minus_one * aR;
+        if (SR >= 0.0f) {
+          Whalf[0] = 0.0f;
+          Whalf[1] = 0.0f;
+          Whalf[2] = 0.0f;
+          Whalf[3] = 0.0f;
+          Whalf[4] = 0.0f;
+          return;
+        } else {
+          Whalf[0] =
+              WR[0] * pow_two_over_gamma_minus_one(
+                          hydro_two_over_gamma_plus_one -
+                          hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+          vhalf = hydro_two_over_gamma_plus_one *
+                      (-aR + hydro_gamma_minus_one_over_two * vR) -
+                  vR;
+          Whalf[4] =
+              WR[4] * pow_two_gamma_over_gamma_minus_one(
+                          hydro_two_over_gamma_plus_one -
+                          hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+        }
+      } else {
+        Whalf[0] = WR[0];
+        vhalf = 0.0f;
+        Whalf[4] = WR[4];
+      }
+    } else {
+      /* vacuum generation */
+      SR = vR - hydro_two_over_gamma_minus_one * aR;
+      SL = vL + hydro_two_over_gamma_minus_one * aL;
+      if (SR > 0.0f && SL < 0.0f) {
+        Whalf[0] = 0.0f;
+        Whalf[1] = 0.0f;
+        Whalf[2] = 0.0f;
+        Whalf[3] = 0.0f;
+        Whalf[4] = 0.0f;
+        return;
+      } else {
+        if (SL >= 0.0f) {
+          Whalf[1] = WL[1];
+          Whalf[2] = WL[2];
+          Whalf[3] = WL[3];
+          if (aL > vL) {
+            Whalf[0] = WL[0] *
+                       pow_two_over_gamma_minus_one(
+                           hydro_two_over_gamma_plus_one +
+                           hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+            vhalf = hydro_two_over_gamma_plus_one *
+                        (aL + hydro_gamma_minus_one_over_two * vL) -
+                    vL;
+            Whalf[4] = WL[4] *
+                       pow_two_gamma_over_gamma_minus_one(
+                           hydro_two_over_gamma_plus_one +
+                           hydro_gamma_minus_one_over_gamma_plus_one / aL * vL);
+          } else {
+            Whalf[0] = WL[0];
+            vhalf = 0.0f;
+            Whalf[4] = WL[4];
+          }
+        } else {
+          Whalf[1] = WR[1];
+          Whalf[2] = WR[2];
+          Whalf[3] = WR[3];
+          if (-aR < vR) {
+            Whalf[0] = WR[0] *
+                       pow_two_over_gamma_minus_one(
+                           hydro_two_over_gamma_plus_one -
+                           hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+            vhalf = hydro_two_over_gamma_plus_one *
+                        (-aR + hydro_gamma_minus_one_over_two * vR) -
+                    vR;
+            Whalf[4] = WR[4] *
+                       pow_two_gamma_over_gamma_minus_one(
+                           hydro_two_over_gamma_plus_one -
+                           hydro_gamma_minus_one_over_gamma_plus_one / aR * vR);
+          } else {
+            Whalf[0] = WR[0];
+            vhalf = 0.0f;
+            Whalf[4] = WR[4];
+          }
+        }
+      }
+    }
+  }
+
+  /* Add the velocity solution along the interface normal to the velocities */
+  Whalf[1] += vhalf * n_unit[0];
+  Whalf[2] += vhalf * n_unit[1];
+  Whalf[3] += vhalf * n_unit[2];
+}
+
+/**
+ * @brief Solve the vacuum Riemann problem and return the fluxes
+ */
+__attribute__((always_inline)) INLINE static void riemann_solve_vacuum_flux(
+    float* WL, float* WR, float vL, float vR, float aL, float aR, float* n_unit,
+    float* vij, float* totflux) {
+
+  float Whalf[5];
+  float flux[5][3];
+  float vtot[3];
+  float rhoe;
+
+  riemann_solve_vacuum(WL, WR, vL, vR, aL, aR, Whalf, n_unit);
+
+  flux[0][0] = Whalf[0] * Whalf[1];
+  flux[0][1] = Whalf[0] * Whalf[2];
+  flux[0][2] = Whalf[0] * Whalf[3];
+
+  vtot[0] = Whalf[1] + vij[0];
+  vtot[1] = Whalf[2] + vij[1];
+  vtot[2] = Whalf[3] + vij[2];
+  flux[1][0] = Whalf[0] * vtot[0] * Whalf[1] + Whalf[4];
+  flux[1][1] = Whalf[0] * vtot[0] * Whalf[2];
+  flux[1][2] = Whalf[0] * vtot[0] * Whalf[3];
+  flux[2][0] = Whalf[0] * vtot[1] * Whalf[1];
+  flux[2][1] = Whalf[0] * vtot[1] * Whalf[2] + Whalf[4];
+  flux[2][2] = Whalf[0] * vtot[1] * Whalf[3];
+  flux[3][0] = Whalf[0] * vtot[2] * Whalf[1];
+  flux[3][1] = Whalf[0] * vtot[2] * Whalf[2];
+  flux[3][2] = Whalf[0] * vtot[2] * Whalf[3] + Whalf[4];
+
+  /* eqn. (15) */
+  /* F_P = \rho e ( \vec{v} - \vec{v_{ij}} ) + P \vec{v} */
+  /* \rho e = P / (\gamma-1) + 1/2 \rho \vec{v}^2 */
+  rhoe = Whalf[4] / hydro_gamma_minus_one +
+         0.5f * Whalf[0] *
+             (vtot[0] * vtot[0] + vtot[1] * vtot[1] + vtot[2] * vtot[2]);
+  flux[4][0] = rhoe * Whalf[1] + Whalf[4] * vtot[0];
+  flux[4][1] = rhoe * Whalf[2] + Whalf[4] * vtot[1];
+  flux[4][2] = rhoe * Whalf[3] + Whalf[4] * vtot[2];
+
+  totflux[0] =
+      flux[0][0] * n_unit[0] + flux[0][1] * n_unit[1] + flux[0][2] * n_unit[2];
+  totflux[1] =
+      flux[1][0] * n_unit[0] + flux[1][1] * n_unit[1] + flux[1][2] * n_unit[2];
+  totflux[2] =
+      flux[2][0] * n_unit[0] + flux[2][1] * n_unit[1] + flux[2][2] * n_unit[2];
+  totflux[3] =
+      flux[3][0] * n_unit[0] + flux[3][1] * n_unit[1] + flux[3][2] * n_unit[2];
+  totflux[4] =
+      flux[4][0] * n_unit[0] + flux[4][1] * n_unit[1] + flux[4][2] * n_unit[2];
+}
+
+#endif /* SWIFT_RIEMANN_VACUUM_H */
diff --git a/src/runner.c b/src/runner.c
index 6e99d8cece2cf086b1209ac2f8314301b9779e20..0a08bb5abf9af25aea889e3c9279eacb0271f388 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -42,6 +42,7 @@
 #include "atomic.h"
 #include "cell.h"
 #include "const.h"
+#include "cooling.h"
 #include "debug.h"
 #include "drift.h"
 #include "engine.h"
@@ -57,6 +58,18 @@
 #include "timers.h"
 #include "timestep.h"
 
+/**
+ * @brief  Entry in a list of sorted indices.
+ */
+struct entry {
+
+  /*! Distance on the axis */
+  float d;
+
+  /*! Particle index */
+  int i;
+};
+
 /* Orientation of the cell pairs */
 const double runner_shift[13][3] = {
     {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01},
@@ -82,6 +95,13 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
 #define FUNCTION density
 #include "runner_doiact.h"
 
+/* Import the gradient loop functions (if required). */
+#ifdef EXTRA_HYDRO_LOOP
+#undef FUNCTION
+#define FUNCTION gradient
+#include "runner_doiact.h"
+#endif
+
 /* Import the force loop functions. */
 #undef FUNCTION
 #define FUNCTION force
@@ -105,9 +125,13 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
   const int ti_current = r->e->ti_current;
   const struct external_potential *potential = r->e->external_potential;
   const struct phys_const *constants = r->e->physical_constants;
+  const double time = r->e->time;
 
   TIMER_TIC;
 
+  /* Anything to do here? */
+  if (c->ti_end_min > ti_current) return;
+
   /* Recurse? */
   if (c->split) {
     for (int k = 0; k < 8; k++)
@@ -128,13 +152,64 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
     /* Is this part within the time step? */
     if (g->ti_end <= ti_current) {
 
-      external_gravity(potential, constants, g);
+      external_gravity_acceleration(time, potential, constants, g);
     }
   }
 
   if (timer) TIMER_TOC(timer_dograv_external);
 }
 
+/**
+ * @brief Calculate change in thermal state of particles induced
+ * by radiative cooling and heating.
+ *
+ * @param r runner task
+ * @param c cell
+ * @param timer 1 if the time is to be recorded.
+ */
+void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
+
+  struct part *restrict parts = c->parts;
+  struct xpart *restrict xparts = c->xparts;
+  const int count = c->count;
+  const int ti_current = r->e->ti_current;
+  const struct cooling_function_data *cooling_func = r->e->cooling_func;
+  const struct phys_const *constants = r->e->physical_constants;
+  const struct UnitSystem *us = r->e->internalUnits;
+  const double timeBase = r->e->timeBase;
+
+  TIMER_TIC;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0);
+    return;
+  }
+
+#ifdef TASK_VERBOSE
+  OUT;
+#endif
+
+  /* Loop over the parts in this cell. */
+  for (int i = 0; i < count; i++) {
+
+    /* Get a direct pointer on the part. */
+    struct part *restrict p = &parts[i];
+    struct xpart *restrict xp = &xparts[i];
+
+    /* Kick has already updated ti_end, so need to check ti_begin */
+    if (p->ti_begin == ti_current) {
+
+      const double dt = (p->ti_end - p->ti_begin) * timeBase;
+
+      cooling_cool_part(constants, us, cooling_func, p, xp, dt);
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_do_cooling);
+}
+
 /**
  * @brief Sort the entries in ascending order using QuickSort.
  *
@@ -384,6 +459,9 @@ void runner_do_init(struct runner *r, struct cell *c, int timer) {
 
   TIMER_TIC;
 
+  /* Anything to do here? */
+  if (c->ti_end_min > ti_current) return;
+
   /* Recurse? */
   if (c->split) {
     for (int k = 0; k < 8; k++)
@@ -422,7 +500,52 @@ void runner_do_init(struct runner *r, struct cell *c, int timer) {
 }
 
 /**
- * @brief Intermediate task between density and force
+ * @brief Intermediate task after the gradient loop that does final operations
+ * on the gradient quantities and optionally slope limits the gradients
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ */
+void runner_do_extra_ghost(struct runner *r, struct cell *c) {
+
+#ifdef EXTRA_HYDRO_LOOP
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+  const int ti_current = r->e->ti_current;
+
+  /* Anything to do here? */
+  if (c->ti_end_min > ti_current) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k]);
+    return;
+  } else {
+
+    /* Loop over the parts in this cell. */
+    for (int i = 0; i < count; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct part *restrict p = &parts[i];
+
+      if (p->ti_end <= ti_current) {
+
+        /* Get ready for a force calculation */
+        hydro_end_gradient(p);
+      }
+    }
+  }
+
+#else
+  error("SWIFT was not compiled with the extra hydro loop activated.");
+#endif
+}
+
+/**
+ * @brief Intermediate task after the density to check that the smoothing
+ * lengths are correct.
  *
  * @param r The runner thread.
  * @param c The cell.
@@ -444,6 +567,9 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
 
   TIMER_TIC;
 
+  /* Anything to do here? */
+  if (c->ti_end_min > ti_current) return;
+
   /* Recurse? */
   if (c->split) {
     for (int k = 0; k < 8; k++)
@@ -487,8 +613,8 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
           h_corr = (target_wcount - p->density.wcount) / p->density.wcount_dh;
 
           /* Truncate to the range [ -p->h/2 , p->h ]. */
-          h_corr = fminf(h_corr, p->h);
-          h_corr = fmaxf(h_corr, -p->h * 0.5f);
+          h_corr = (h_corr < p->h) ? h_corr : p->h;
+          h_corr = (h_corr > -0.5f * p->h) ? h_corr : -0.5f * p->h;
         }
 
         /* Did we get the right number density? */
@@ -584,42 +710,44 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
 }
 
 /**
- * @brief Drift particles and g-particles forward in time
+ * @brief Drift particles and g-particles in a cell forward in time
  *
- * @param r The runner thread.
  * @param c The cell.
- * @param timer Are we timing this ?
+ * @param e The engine.
  */
-void runner_do_drift(struct runner *r, struct cell *c, int timer) {
+static void runner_do_drift(struct cell *c, struct engine *e) {
 
-  const double timeBase = r->e->timeBase;
-  const double dt = (r->e->ti_current - r->e->ti_old) * timeBase;
-  const int ti_old = r->e->ti_old;
-  const int ti_current = r->e->ti_current;
-  struct part *restrict parts = c->parts;
-  struct xpart *restrict xparts = c->xparts;
-  struct gpart *restrict gparts = c->gparts;
-  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
+  const double timeBase = e->timeBase;
+  const int ti_old = c->ti_old;
+  const int ti_current = e->ti_current;
+  struct part *const parts = c->parts;
+  struct xpart *const xparts = c->xparts;
+  struct gpart *const gparts = c->gparts;
 
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, entropy = 0.0, mass = 0.0;
-  double mom[3] = {0.0, 0.0, 0.0};
-  double ang_mom[3] = {0.0, 0.0, 0.0};
+  /* Do we need to drift ? */
+  if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return;
 
-  TIMER_TIC
+  /* Check that we are actually going to move forward. */
+  if (ti_current == ti_old) return;
 
-#ifdef TASK_VERBOSE
-  OUT;
-#endif
+  /* Drift from the last time the cell was drifted to the current time */
+  const double dt = (ti_current - ti_old) * timeBase;
+
+  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
+  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0;
+  double entropy = 0.0, mass = 0.0;
+  double mom[3] = {0.0, 0.0, 0.0};
+  double ang_mom[3] = {0.0, 0.0, 0.0};
 
   /* No children? */
   if (!c->split) {
 
     /* Loop over all the g-particles in the cell */
-    const int nr_gparts = c->gcount;
+    const size_t nr_gparts = c->gcount;
     for (size_t k = 0; k < nr_gparts; k++) {
 
       /* Get a handle on the gpart. */
-      struct gpart *restrict gp = &gparts[k];
+      struct gpart *const gp = &gparts[k];
 
       /* Drift... */
       drift_gpart(gp, dt, timeBase, ti_old, ti_current);
@@ -628,7 +756,7 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
       const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
                         gp->x_diff[1] * gp->x_diff[1] +
                         gp->x_diff[2] * gp->x_diff[2];
-      dx2_max = fmaxf(dx2_max, dx2);
+      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
     }
 
     /* Loop over all the particles in the cell (more work for these !) */
@@ -636,8 +764,8 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
     for (size_t k = 0; k < nr_parts; k++) {
 
       /* Get a handle on the part. */
-      struct part *restrict p = &parts[k];
-      struct xpart *restrict xp = &xparts[k];
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
 
       /* Drift... */
       drift_part(p, xp, dt, timeBase, ti_old, ti_current);
@@ -646,10 +774,10 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
       const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
                         xp->x_diff[1] * xp->x_diff[1] +
                         xp->x_diff[2] * xp->x_diff[2];
-      dx2_max = fmaxf(dx2_max, dx2);
+      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
 
       /* Maximal smoothing length */
-      h_max = fmaxf(p->h, h_max);
+      h_max = (h_max > p->h) ? h_max : p->h;
 
       /* Now collect quantities for statistics */
 
@@ -659,7 +787,8 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
       const float v[3] = {xp->v_full[0] + p->a_hydro[0] * half_dt,
                           xp->v_full[1] + p->a_hydro[1] * half_dt,
                           xp->v_full[2] + p->a_hydro[2] * half_dt};
-      const float m = p->mass;
+
+      const float m = hydro_get_mass(p);
 
       /* Collect mass */
       mass += m;
@@ -678,6 +807,7 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
       e_kin += 0.5 * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
       e_pot += 0.;
       e_int += m * hydro_get_internal_energy(p, half_dt);
+      e_rad += cooling_get_radiated_energy(xp);
 
       /* Collect entropy */
       entropy += m * hydro_get_entropy(p, half_dt);
@@ -690,21 +820,21 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
   /* Otherwise, aggregate data from children. */
   else {
 
-    /* Loop over the progeny. */
+    /* Loop over the progeny and collect their data. */
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
 
-        /* Recurse */
-        struct cell *restrict cp = c->progeny[k];
-        runner_do_drift(r, cp, 0);
+        /* Recurse. */
+        runner_do_drift(cp, e);
 
-        /* Collect */
-        dx_max = fmaxf(dx_max, cp->dx_max);
-        h_max = fmaxf(h_max, cp->h_max);
+        dx_max = max(dx_max, cp->dx_max);
+        h_max = max(h_max, cp->h_max);
         mass += cp->mass;
         e_kin += cp->e_kin;
         e_int += cp->e_int;
         e_pot += cp->e_pot;
+        e_rad += cp->e_rad;
         entropy += cp->entropy;
         mom[0] += cp->mom[0];
         mom[1] += cp->mom[1];
@@ -722,6 +852,7 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
   c->e_kin = e_kin;
   c->e_int = e_int;
   c->e_pot = e_pot;
+  c->e_rad = e_rad;
   c->entropy = entropy;
   c->mom[0] = mom[0];
   c->mom[1] = mom[1];
@@ -730,7 +861,30 @@ void runner_do_drift(struct runner *r, struct cell *c, int timer) {
   c->ang_mom[1] = ang_mom[1];
   c->ang_mom[2] = ang_mom[2];
 
-  if (timer) TIMER_TOC(timer_drift);
+  /* Update the time of the last drift */
+  c->ti_old = ti_current;
+}
+
+/**
+ * @brief Mapper function to drift particles and g-particles forward in time.
+ *
+ * @param map_data An array of #cell%s.
+ * @param num_elements Chunk size.
+ * @param extra_data Pointer to an #engine.
+ */
+
+void runner_do_drift_mapper(void *map_data, int num_elements,
+                            void *extra_data) {
+
+  struct engine *e = (struct engine *)extra_data;
+  struct cell *cells = (struct cell *)map_data;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct cell *c = &cells[ind];
+
+    /* Only drift local particles. */
+    if (c != NULL && c->nodeID == e->nodeID) runner_do_drift(c, e);
+  }
 }
 
 /**
@@ -865,15 +1019,22 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) {
   struct gpart *restrict gparts = c->gparts;
   const double const_G = r->e->physical_constants->const_newton_G;
 
-  int updated = 0, g_updated = 0;
-  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
+  TIMER_TIC;
 
-  TIMER_TIC
+  /* Anything to do here? */
+  if (c->ti_end_min > ti_current) {
+    c->updated = 0;
+    c->g_updated = 0;
+    return;
+  }
 
 #ifdef TASK_VERBOSE
   OUT;
 #endif
 
+  int updated = 0, g_updated = 0;
+  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
+
   /* No children? */
   if (!c->split) {
 
@@ -998,7 +1159,7 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
       // if(ti_end < ti_current) error("Received invalid particle !");
       ti_end_min = min(ti_end_min, ti_end);
       ti_end_max = max(ti_end_max, ti_end);
-      h_max = fmaxf(h_max, parts[k].h);
+      h_max = max(h_max, parts[k].h);
     }
     for (size_t k = 0; k < nr_gparts; k++) {
       const int ti_end = gparts[k].ti_end;
@@ -1016,7 +1177,7 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
         runner_do_recv_cell(r, c->progeny[k], 0);
         ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
         ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        h_max = fmaxf(h_max, c->progeny[k]->h_max);
+        h_max = max(h_max, c->progeny[k]->h_max);
       }
     }
   }
@@ -1069,13 +1230,15 @@ void *runner_main(void *data) {
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
       t->rid = r->cpuid;
-      t->last_rid = r->cpuid;
 
       /* Different types of tasks... */
       switch (t->type) {
         case task_type_self:
-          if (t->subtype == task_subtype_density)
-            runner_doself1_density(r, ci);
+          if (t->subtype == task_subtype_density) runner_doself1_density(r, ci);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_doself1_gradient(r, ci);
+#endif
           else if (t->subtype == task_subtype_force)
             runner_doself2_force(r, ci);
           else if (t->subtype == task_subtype_grav)
@@ -1086,6 +1249,10 @@ void *runner_main(void *data) {
         case task_type_pair:
           if (t->subtype == task_subtype_density)
             runner_dopair1_density(r, ci, cj);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dopair1_gradient(r, ci, cj);
+#endif
           else if (t->subtype == task_subtype_force)
             runner_dopair2_force(r, ci, cj);
           else if (t->subtype == task_subtype_grav)
@@ -1099,6 +1266,10 @@ void *runner_main(void *data) {
         case task_type_sub_self:
           if (t->subtype == task_subtype_density)
             runner_dosub_self1_density(r, ci, 1);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dosub_self1_gradient(r, ci, 1);
+#endif
           else if (t->subtype == task_subtype_force)
             runner_dosub_self2_force(r, ci, 1);
           else if (t->subtype == task_subtype_grav)
@@ -1109,6 +1280,10 @@ void *runner_main(void *data) {
         case task_type_sub_pair:
           if (t->subtype == task_subtype_density)
             runner_dosub_pair1_density(r, ci, cj, t->flags, 1);
+#ifdef EXTRA_HYDRO_LOOP
+          else if (t->subtype == task_subtype_gradient)
+            runner_dosub_pair1_gradient(r, ci, cj, t->flags, 1);
+#endif
           else if (t->subtype == task_subtype_force)
             runner_dosub_pair2_force(r, ci, cj, t->flags, 1);
           else if (t->subtype == task_subtype_grav)
@@ -1122,15 +1297,18 @@ void *runner_main(void *data) {
         case task_type_ghost:
           runner_do_ghost(r, ci);
           break;
-        case task_type_drift:
-          runner_do_drift(r, ci, 1);
+#ifdef EXTRA_HYDRO_LOOP
+        case task_type_extra_ghost:
+          runner_do_extra_ghost(r, ci);
           break;
+#endif
         case task_type_kick:
           runner_do_kick(r, ci, 1);
           break;
         case task_type_kick_fixdt:
           runner_do_kick_fixdt(r, ci, 1);
           break;
+#ifdef WITH_MPI
         case task_type_send:
           if (t->subtype == task_subtype_tend) {
             free(t->buff);
@@ -1144,6 +1322,7 @@ void *runner_main(void *data) {
             runner_do_recv_cell(r, ci, 1);
           }
           break;
+#endif
         case task_type_grav_mm:
           runner_do_grav_mm(r, t->ci, 1);
           break;
@@ -1158,18 +1337,8 @@ void *runner_main(void *data) {
         case task_type_grav_external:
           runner_do_grav_external(r, t->ci, 1);
           break;
-        case task_type_part_sort:
-          space_do_parts_sort();
-          break;
-        case task_type_gpart_sort:
-          space_do_gparts_sort();
-          break;
-        case task_type_split_cell:
-          space_do_split(e->s, t->ci);
-          break;
-        case task_type_rewait:
-          scheduler_do_rewait((struct task *)t->ci, (struct task *)t->cj,
-                              t->flags, t->rank);
+        case task_type_cooling:
+          runner_do_cooling(r, t->ci, 1);
           break;
         default:
           error("Unknown task type.");
diff --git a/src/runner.h b/src/runner.h
index 6838b959955c4e54e208b8d2d16339e7fdb1740f..be19ab61b997f5730a04fa8c01c0787e8b99a8b2 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -29,30 +29,32 @@ extern const char runner_flip[27];
 struct cell;
 struct engine;
 
-/* A struct representing a runner's thread and its data. */
+/**
+ * @brief A struct representing a runner's thread and its data.
+ */
 struct runner {
 
-  /* The id of this thread. */
+  /*! The id of this thread. */
   int id;
 
-  /* The thread which it is running. */
+  /*! The actual thread which it is running. */
   pthread_t thread;
 
-  /* The queue to use to get tasks. */
+  /*! The queue to use to get tasks. */
   int cpuid, qid;
 
-  /* The underlying runner. */
+  /*! The engine owing this runner. */
   struct engine *e;
 };
 
 /* Function prototypes. */
 void runner_do_ghost(struct runner *r, struct cell *c);
 void runner_do_sort(struct runner *r, struct cell *c, int flag, int clock);
-void runner_do_gsort(struct runner *r, struct cell *c, int flag, int clock);
 void runner_do_kick(struct runner *r, struct cell *c, int timer);
 void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer);
-void runner_do_drift(struct runner *r, struct cell *c, int timer);
 void runner_do_init(struct runner *r, struct cell *c, int timer);
+void runner_do_cooling(struct runner *r, struct cell *c, int timer);
 void *runner_main(void *data);
+void runner_do_drift_mapper(void *map_data, int num_elements, void *extra_data);
 
 #endif /* SWIFT_RUNNER_H */
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index db439671ff5fee56c086444ddaa8268571c80a15..3c968cbf7d955198ad6bb44ab70e93af17735e99 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1739,7 +1739,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the cell dimensions. */
-  const float h = fmin(ci->width[0], fmin(ci->width[1], ci->width[2]));
+  const float h = min(ci->width[0], min(ci->width[1], ci->width[2]));
 
   /* Get the type of pair if not specified explicitly. */
   // if ( sid < 0 )
@@ -1748,7 +1748,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
 
   /* Recurse? */
   if (ci->split && cj->split &&
-      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+      max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
           h / 2) {
 
     /* Different types of flags. */
@@ -2023,7 +2023,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
 
   /* Get the cell dimensions. */
-  const float h = fmin(ci->width[0], fmin(ci->width[1], ci->width[2]));
+  const float h = min(ci->width[0], min(ci->width[1], ci->width[2]));
 
   /* Get the type of pair if not specified explicitly. */
   // if ( sid < 0 )
@@ -2032,7 +2032,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
 
   /* Recurse? */
   if (ci->split && cj->split &&
-      fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+      max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
           h / 2) {
 
     /* Different types of flags. */
@@ -2336,11 +2336,11 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
   else {
 
     /* Get the cell dimensions. */
-    const float h = fmin(ci->width[0], fmin(ci->width[1], ci->width[2]));
+    const float h = min(ci->width[0], min(ci->width[1], ci->width[2]));
 
     /* Recurse? */
     if (ci->split && cj->split &&
-        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
+        max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
             h / 2) {
 
       /* Get the type of pair if not specified explicitly. */
@@ -2862,16 +2862,17 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
       }
 
       /* Get the sorting index. */
-      int sid = 0;
+      int new_sid = 0;
       for (int k = 0; k < 3; k++)
-        sid =
-            3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
-                           ? 0
-                           : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
-      sid = sortlistID[sid];
+        new_sid = 3 * new_sid +
+                  ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
+                       ? 0
+                       : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
+      new_sid = sortlistID[new_sid];
 
       /* Do any of the cells need to be sorted first? */
-      if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1);
+      if (!(cj->sorted & (1 << new_sid)))
+        runner_do_sort(r, cj, (1 << new_sid), 1);
 
       /* Compute the interactions. */
       DOPAIR_SUBSET(r, ci, parts, ind, count, cj);
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index a220ad1794d23999ff16752e797a499071fa2e65..0fcd2d2e80a72b92588acd5b8275b9dafc68df45 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -488,7 +488,7 @@ static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) {
 
   /* Recover the list of top-level cells */
   const struct engine *e = r->e;
-  struct cell *cells = e->s->cells;
+  struct cell *cells = e->s->cells_top;
   const int nr_cells = e->s->nr_cells;
   const int ti_current = e->ti_current;
   const double max_d =
diff --git a/src/scheduler.c b/src/scheduler.c
index 6a0d886bd5458028c5c05812f10c204bc8946a1a..2d1c59f28f74552f859e227278257b4caeb4d59a 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -2,6 +2,7 @@
  * This file is part of SWIFT.
  * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
  *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2016 Peter W. Draper (p.w.draper@durham.ac.uk)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -59,102 +60,80 @@
 
 void scheduler_addunlock(struct scheduler *s, struct task *ta,
                          struct task *tb) {
-
-  /* Lock the scheduler since re-allocating the unlocks is not
-     thread-safe. */
-  if (lock_lock(&s->lock) != 0) error("Unable to lock scheduler.");
+  /* Get an index at which to store this unlock. */
+  const int ind = atomic_inc(&s->nr_unlocks);
 
   /* Does the buffer need to be grown? */
-  if (s->nr_unlocks == s->size_unlocks) {
+  if (ind == s->size_unlocks) {
+    /* Allocate the new buffer. */
     struct task **unlocks_new;
     int *unlock_ind_new;
-    s->size_unlocks *= 2;
+    const int size_unlocks_new = s->size_unlocks * 2;
     if ((unlocks_new = (struct task **)malloc(sizeof(struct task *) *
-                                              s->size_unlocks)) == NULL ||
-        (unlock_ind_new = (int *)malloc(sizeof(int) * s->size_unlocks)) == NULL)
+                                              size_unlocks_new)) == NULL ||
+        (unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new)) ==
+            NULL)
       error("Failed to re-allocate unlocks.");
-    memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * s->nr_unlocks);
-    memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * s->nr_unlocks);
+
+    /* Wait for all writes to the old buffer to complete. */
+    while (s->completed_unlock_writes < ind)
+      ;
+
+    /* Copy the buffers. */
+    memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * ind);
+    memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * ind);
     free(s->unlocks);
     free(s->unlock_ind);
     s->unlocks = unlocks_new;
     s->unlock_ind = unlock_ind_new;
+
+    /* Publish the new buffer size. */
+    s->size_unlocks = size_unlocks_new;
   }
 
+  /* Wait for there to actually be space at my index. */
+  while (ind > s->size_unlocks)
+    ;
+
   /* Write the unlock to the scheduler. */
-  const int ind = atomic_inc(&s->nr_unlocks);
   s->unlocks[ind] = tb;
   s->unlock_ind[ind] = ta - s->tasks;
-
-  /* Release the scheduler. */
-  if (lock_unlock(&s->lock) != 0) error("Unable to unlock scheduler.");
+  atomic_inc(&s->completed_unlock_writes);
 }
 
 /**
- * @brief Split tasks that may be too large.
+ * @brief Split a task if too large.
  *
+ * @param t The #task
  * @param s The #scheduler we are working in.
  */
 
-void scheduler_splittasks(struct scheduler *s) {
+static void scheduler_splittask(struct task *t, struct scheduler *s) {
 
-  const int pts[7][8] = {
+  /* Static constants. */
+  static const int pts[7][8] = {
       {-1, 12, 10, 9, 4, 3, 1, 0},     {-1, -1, 11, 10, 5, 4, 2, 1},
       {-1, -1, -1, 12, 7, 6, 4, 3},    {-1, -1, -1, -1, 8, 7, 5, 4},
       {-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10},
       {-1, -1, -1, -1, -1, -1, -1, 12}};
-  const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788,
-                               0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
-                               0.5788, 0.4025, 0.5788};
-
-  /* Loop through the tasks... */
-  int tid = 0, redo = 0;
-  struct task *t_old = NULL;
-  while (1) {
-
-    /* Get a pointer on the task. */
-    struct task *t = t_old;
-    if (redo) {
-      redo = 0;
-    } else {
-      const int ind = atomic_inc(&tid);
-      if (ind < s->nr_tasks)
-        t_old = t = &s->tasks[s->tasks_ind[ind]];
-      else
-        break;
-    }
-
-    /* Skip sorting tasks. */
-    if (t->type == task_type_part_sort) continue;
-
-    if (t->type == task_type_gpart_sort) continue;
-
-    /* Empty task? */
-    if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) {
-      t->type = task_type_none;
-      t->skip = 1;
-      continue;
-    }
+  static const float sid_scale[13] = {
+      0.1897f, 0.4025f, 0.1897f, 0.4025f, 0.5788f, 0.4025f, 0.1897f,
+      0.4025f, 0.1897f, 0.4025f, 0.5788f, 0.4025f, 0.5788f};
 
-    /* Non-local kick task? */
-    if ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) {
-      t->type = task_type_none;
-      t->skip = 1;
-      continue;
-    }
+  /* Iterate on this task until we're done with it. */
+  int redo = 1;
+  while (redo) {
 
-    /* Non-local drift task? */
-    if ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) {
-      t->type = task_type_none;
-      t->skip = 1;
-      continue;
-    }
+    /* Reset the redo flag. */
+    redo = 0;
 
-    /* Non-local init task? */
-    if ((t->type == task_type_init) && t->ci->nodeID != s->nodeID) {
+    /* Non-splittable task? */
+    if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) ||
+        ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) ||
+        ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) {
       t->type = task_type_none;
       t->skip = 1;
-      continue;
+      break;
     }
 
     /* Self-interaction? */
@@ -166,49 +145,52 @@ void scheduler_splittasks(struct scheduler *s) {
       /* Foreign task? */
       if (ci->nodeID != s->nodeID) {
         t->skip = 1;
-        continue;
+        break;
       }
 
       /* Is this cell even split? */
       if (ci->split) {
 
         /* Make a sub? */
-        if (scheduler_dosub && (ci->count * ci->count < space_subsize ||
-                                ci->gcount * ci->gcount < space_subsize)) {
+        if (scheduler_dosub &&
+            ((ci->count > 0 && ci->count < space_subsize / ci->count) ||
+             (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount))) {
 
           /* convert to a self-subtask. */
           t->type = task_type_sub_self;
 
-        }
-
-        /* Otherwise, make tasks explicitly. */
-        else {
+          /* Otherwise, make tasks explicitly. */
+        } else {
 
           /* Take a step back (we're going to recycle the current task)... */
           redo = 1;
 
-          /* Add the self task. */
+          /* Add the self tasks. */
           int first_child = 0;
           while (ci->progeny[first_child] == NULL) first_child++;
           t->ci = ci->progeny[first_child];
           for (int k = first_child + 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
-              scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
-                                ci->progeny[k], NULL, 0);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
+                                    ci->progeny[k], NULL, 0),
+                  s);
 
           /* Make a task for each pair of progeny. */
           for (int j = 0; j < 8; j++)
             if (ci->progeny[j] != NULL)
               for (int k = j + 1; k < 8; k++)
                 if (ci->progeny[k] != NULL)
-                  scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0,
-                                    ci->progeny[j], ci->progeny[k], 0);
+                  scheduler_splittask(
+                      scheduler_addtask(s, task_type_pair, t->subtype,
+                                        pts[j][k], 0, ci->progeny[j],
+                                        ci->progeny[k], 0),
+                      s);
         }
       }
-    }
 
-    /* Hydro Pair interaction? */
-    else if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
+      /* Pair interaction? */
+    } else if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
 
       /* Get a handle on the cells involved. */
       struct cell *ci = t->ci;
@@ -219,7 +201,7 @@ void scheduler_splittasks(struct scheduler *s) {
       /* Foreign task? */
       if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
         t->skip = 1;
-        continue;
+        break;
       }
 
       /* Get the sort ID, use space_getsid and not t->flags
@@ -234,16 +216,14 @@ void scheduler_splittasks(struct scheduler *s) {
 
         /* Replace by a single sub-task? */
         if (scheduler_dosub &&
-            ci->count * cj->count * sid_scale[sid] < space_subsize &&
+            ci->count * sid_scale[sid] < space_subsize / cj->count &&
             sid != 0 && sid != 2 && sid != 6 && sid != 8) {
 
           /* Make this task a sub task. */
           t->type = task_type_sub_pair;
 
-        }
-
-        /* Otherwise, split it. */
-        else {
+          /* Otherwise, split it. */
+        } else {
 
           /* Take a step back (we're going to recycle the current task)... */
           redo = 1;
@@ -262,12 +242,18 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[0];
               t->flags = 1;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[7], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[6], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[7], cj->progeny[0], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                    ci->progeny[7], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                    ci->progeny[6], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                    ci->progeny[7], cj->progeny[0], 1),
+                  s);
               break;
 
             case 2: /* (  1 ,  1 , -1 ) */
@@ -282,12 +268,18 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[0];
               t->flags = 3;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[7], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[5], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[7], cj->progeny[0], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                    ci->progeny[7], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                    ci->progeny[5], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                    ci->progeny[7], cj->progeny[0], 1),
+                  s);
               break;
 
             case 4: /* (  1 ,  0 ,  0 ) */
@@ -295,36 +287,66 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[0];
               t->flags = 4;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[5], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[6], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[7], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[4], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
-                                    ci->progeny[5], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[6], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[7], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[4], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[5], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
-                                    ci->progeny[6], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[7], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[4], cj->progeny[3], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[5], cj->progeny[3], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[6], cj->progeny[3], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
-                                    ci->progeny[7], cj->progeny[3], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                    ci->progeny[5], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                    ci->progeny[6], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                    ci->progeny[7], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                    ci->progeny[4], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
+                                    ci->progeny[5], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                    ci->progeny[6], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                    ci->progeny[7], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                    ci->progeny[4], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                    ci->progeny[5], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
+                                    ci->progeny[6], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                    ci->progeny[7], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                    ci->progeny[4], cj->progeny[3], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                    ci->progeny[5], cj->progeny[3], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                    ci->progeny[6], cj->progeny[3], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
+                                    ci->progeny[7], cj->progeny[3], 1),
+                  s);
               break;
 
             case 5: /* (  1 ,  0 , -1 ) */
@@ -332,12 +354,18 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[1];
               t->flags = 5;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[6], cj->progeny[3], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[4], cj->progeny[3], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[6], cj->progeny[1], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                    ci->progeny[6], cj->progeny[3], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                    ci->progeny[4], cj->progeny[3], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                    ci->progeny[6], cj->progeny[1], 1),
+                  s);
               break;
 
             case 6: /* (  1 , -1 ,  1 ) */
@@ -352,12 +380,18 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[3];
               t->flags = 6;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[5], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[4], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[5], cj->progeny[3], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                    ci->progeny[5], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                    ci->progeny[4], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                    ci->progeny[5], cj->progeny[3], 1),
+                  s);
               break;
 
             case 8: /* (  1 , -1 , -1 ) */
@@ -372,12 +406,18 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[0];
               t->flags = 9;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[7], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[3], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[7], cj->progeny[0], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                    ci->progeny[7], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                    ci->progeny[3], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                    ci->progeny[7], cj->progeny[0], 1),
+                  s);
               break;
 
             case 10: /* (  0 ,  1 ,  0 ) */
@@ -385,36 +425,66 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[0];
               t->flags = 10;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[3], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[6], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[7], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[2], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
-                                    ci->progeny[3], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[6], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
-                                    ci->progeny[7], cj->progeny[1], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[2], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[3], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
-                                    ci->progeny[6], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[7], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[2], cj->progeny[5], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
-                                    ci->progeny[3], cj->progeny[5], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[6], cj->progeny[5], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
-                                    ci->progeny[7], cj->progeny[5], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                    ci->progeny[3], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                    ci->progeny[6], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                    ci->progeny[7], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                    ci->progeny[2], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
+                                    ci->progeny[3], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                    ci->progeny[6], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
+                                    ci->progeny[7], cj->progeny[1], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                    ci->progeny[2], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                    ci->progeny[3], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
+                                    ci->progeny[6], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                    ci->progeny[7], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                    ci->progeny[2], cj->progeny[5], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
+                                    ci->progeny[3], cj->progeny[5], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                    ci->progeny[6], cj->progeny[5], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
+                                    ci->progeny[7], cj->progeny[5], 1),
+                  s);
               break;
 
             case 11: /* (  0 ,  1 , -1 ) */
@@ -422,12 +492,18 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[1];
               t->flags = 11;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[6], cj->progeny[5], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[2], cj->progeny[5], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[6], cj->progeny[1], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                    ci->progeny[6], cj->progeny[5], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                    ci->progeny[2], cj->progeny[5], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                    ci->progeny[6], cj->progeny[1], 1),
+                  s);
               break;
 
             case 12: /* (  0 ,  0 ,  1 ) */
@@ -435,45 +511,73 @@ void scheduler_splittasks(struct scheduler *s) {
               t->cj = cj->progeny[0];
               t->flags = 12;
               t->tight = 1;
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[3], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[5], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
-                                    ci->progeny[7], cj->progeny[0], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[1], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
-                                    ci->progeny[3], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
-                                    ci->progeny[5], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
-                                    ci->progeny[7], cj->progeny[2], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[1], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
-                                    ci->progeny[3], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
-                                    ci->progeny[5], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
-                                    ci->progeny[7], cj->progeny[4], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
-                                    ci->progeny[1], cj->progeny[6], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
-                                    ci->progeny[3], cj->progeny[6], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
-                                    ci->progeny[5], cj->progeny[6], 1);
-              t = scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
-                                    ci->progeny[7], cj->progeny[6], 1);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                    ci->progeny[3], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                    ci->progeny[5], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
+                                    ci->progeny[7], cj->progeny[0], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                    ci->progeny[1], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
+                                    ci->progeny[3], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
+                                    ci->progeny[5], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
+                                    ci->progeny[7], cj->progeny[2], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                    ci->progeny[1], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
+                                    ci->progeny[3], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
+                                    ci->progeny[5], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
+                                    ci->progeny[7], cj->progeny[4], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                    ci->progeny[1], cj->progeny[6], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
+                                    ci->progeny[3], cj->progeny[6], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
+                                    ci->progeny[5], cj->progeny[6], 1),
+                  s);
+              scheduler_splittask(
+                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
+                                    ci->progeny[7], cj->progeny[6], 1),
+                  s);
               break;
-          }
+          } /* switch(sid) */
         }
 
-      } /* split this task? */
-
-      /* Otherwise, break it up if it is too large? */
-      else if (scheduler_doforcesplit && ci->split && cj->split &&
-               (ci->count > space_maxsize / cj->count)) {
+        /* Otherwise, break it up if it is too large? */
+      } else if (scheduler_doforcesplit && ci->split && cj->split &&
+                 (ci->count > space_maxsize / cj->count)) {
 
         // message( "force splitting pair with %i and %i parts." , ci->count ,
         // cj->count );
@@ -485,34 +589,34 @@ void scheduler_splittasks(struct scheduler *s) {
           if (ci->progeny[j] != NULL)
             for (int k = 0; k < 8; k++)
               if (cj->progeny[k] != NULL) {
-                t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                struct task *tl =
+                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                       ci->progeny[j], cj->progeny[k], 0);
-                t->flags = space_getsid(s->space, &t->ci, &t->cj, shift);
+                scheduler_splittask(tl, s);
+                tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift);
               }
 
-      }
-
-      /* Otherwise, if not spilt, stitch-up the sorting. */
-      else {
+        /* Otherwise, if not spilt, stitch-up the sorting. */
+      } else {
 
         /* Create the sort for ci. */
-        // lock_lock( &ci->lock );
+        lock_lock(&ci->lock);
         if (ci->sorts == NULL)
           ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none,
                                         1 << sid, 0, ci, NULL, 0);
         else
           ci->sorts->flags |= (1 << sid);
-        // lock_unlock_blind( &ci->lock );
+        lock_unlock_blind(&ci->lock);
         scheduler_addunlock(s, ci->sorts, t);
 
         /* Create the sort for cj. */
-        // lock_lock( &cj->lock );
+        lock_lock(&cj->lock);
         if (cj->sorts == NULL)
           cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none,
                                         1 << sid, 0, cj, NULL, 0);
         else
           cj->sorts->flags |= (1 << sid);
-        // lock_unlock_blind( &cj->lock );
+        lock_unlock_blind(&cj->lock);
         scheduler_addunlock(s, cj->sorts, t);
       }
 
@@ -528,8 +632,35 @@ void scheduler_splittasks(struct scheduler *s) {
       if (ci->gcount == 0) t->type = task_type_none;
 
     } /* gravity interaction? */
+  }   /* iterate over the current task. */
+}
+
+/**
+ * @brief Mapper function to split tasks that may be too large.
+ *
+ * @param map_data the tasks to process
+ * @param num_elements the number of tasks.
+ * @param extra_data The #scheduler we are working in.
+ */
 
-  } /* loop over all tasks. */
+void scheduler_splittasks_mapper(void *map_data, int num_elements,
+                                 void *extra_data) {
+
+  /* Extract the parameters. */
+  struct scheduler *s = (struct scheduler *)extra_data;
+  struct task *tasks = (struct task *)map_data;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
+    scheduler_splittask(t, s);
+  }
+}
+
+void scheduler_splittasks(struct scheduler *s) {
+
+  /* Call the mapper on each current task. */
+  threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks,
+                 s->nr_tasks, sizeof(struct task), 1000, s);
 }
 
 /**
@@ -574,7 +705,6 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
   t->toc = 0;
   t->nr_unlock_tasks = 0;
   t->rid = -1;
-  t->last_rid = -1;
 
   /* Add an index for it. */
   // lock_lock( &s->lock );
@@ -626,17 +756,27 @@ void scheduler_set_unlocks(struct scheduler *s) {
   offsets[0] = 0;
   for (int k = 1; k < s->nr_tasks; k++)
     offsets[k] = offsets[k - 1] + counts[k - 1];
-  for (int k = 0; k < s->nr_tasks; k++)
-    for (int j = offsets[k]; j < offsets[k + 1]; j++) s->unlock_ind[j] = k;
 
   /* Set the unlocks in the tasks. */
   for (int k = 0; k < s->nr_tasks; k++) {
     struct task *t = &s->tasks[k];
     t->nr_unlock_tasks = counts[k];
     t->unlock_tasks = &s->unlocks[offsets[k]];
-    for (int j = offsets[k]; j < offsets[k + 1]; j++) s->unlock_ind[j] = k;
   }
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that there are no duplicate unlocks. */
+  for (int k = 0; k < s->nr_tasks; k++) {
+    struct task *t = &s->tasks[k];
+    for (int i = 0; i < t->nr_unlock_tasks; i++) {
+      for (int j = i + 1; j < t->nr_unlock_tasks; j++) {
+        if (t->unlock_tasks[i] == t->unlock_tasks[j])
+          error("duplicate unlock!");
+      }
+    }
+  }
+#endif
+
   /* Clean up. */
   free(counts);
   free(offsets);
@@ -655,42 +795,48 @@ void scheduler_ranktasks(struct scheduler *s) {
   const int nr_tasks = s->nr_tasks;
 
   /* Run through the tasks and get all the waits right. */
-  for (int k = 0; k < nr_tasks; k++) {
-    tid[k] = k;
-    for (int j = 0; j < tasks[k].nr_unlock_tasks; j++)
-      tasks[k].unlock_tasks[j]->wait += 1;
+  for (int i = 0; i < nr_tasks; i++) {
+    struct task *t = &tasks[i];
+
+    // Increment the waits of the dependances
+    for (int k = 0; k < t->nr_unlock_tasks; k++) {
+      t->unlock_tasks[k]->wait++;
+    }
   }
 
+  /* Load the tids of tasks with no waits. */
+  int left = 0;
+  for (int k = 0; k < nr_tasks; k++)
+    if (tasks[k].wait == 0) {
+      tid[left] = k;
+      left += 1;
+    }
+
   /* Main loop. */
-  for (int j = 0, rank = 0, left = 0; left < nr_tasks; rank++) {
-
-    /* Load the tids of tasks with no waits. */
-    for (int k = left; k < nr_tasks; k++)
-      if (tasks[tid[k]].wait == 0) {
-        int temp = tid[j];
-        tid[j] = tid[k];
-        tid[k] = temp;
-        j += 1;
-      }
+  for (int j = 0, rank = 0; left < nr_tasks; rank++) {
 
     /* Did we get anything? */
     if (j == left) error("Unsatisfiable task dependencies detected.");
+    const int left_old = left;
 
     /* Unlock the next layer of tasks. */
-    for (int i = left; i < j; i++) {
-      struct task *t = &tasks[tid[i]];
+    for (; j < left_old; j++) {
+      struct task *t = &tasks[tid[j]];
       t->rank = rank;
-      tid[i] = t - tasks;
-      if (tid[i] >= nr_tasks) error("Task index overshoot.");
       /* message( "task %i of type %s has rank %i." , i ,
           (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ?
          "pair" : "sort" , rank ); */
-      for (int k = 0; k < t->nr_unlock_tasks; k++)
-        t->unlock_tasks[k]->wait -= 1;
+      for (int k = 0; k < t->nr_unlock_tasks; k++) {
+        struct task *u = t->unlock_tasks[k];
+        if (--u->wait == 0) {
+          tid[left] = u - tasks;
+          left += 1;
+        }
+      }
     }
 
-    /* The new left (no, not tony). */
-    left = j;
+    /* Move back to the old left (like Sanders). */
+    j = left_old;
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -718,15 +864,14 @@ void scheduler_reset(struct scheduler *s, int size) {
     if (s->tasks_ind != NULL) free(s->tasks_ind);
 
     /* Allocate the new lists. */
-    if ((s->tasks = (struct task *)malloc(sizeof(struct task) * size)) ==
-            NULL ||
-        (s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL)
+    if (posix_memalign((void *)&s->tasks, task_align,
+                       size * sizeof(struct task)) != 0)
+      error("Failed to allocate task array.");
+
+    if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL)
       error("Failed to allocate task lists.");
   }
 
-  /* Reset the task data. */
-  bzero(s->tasks, sizeof(struct task) * size);
-
   /* Reset the counters. */
   s->size = size;
   s->nr_tasks = 0;
@@ -735,6 +880,7 @@ void scheduler_reset(struct scheduler *s, int size) {
   s->mask = 0;
   s->submask = 0;
   s->nr_unlocks = 0;
+  s->completed_unlock_writes = 0;
 
   /* Set the task pointers in the queues. */
   for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks;
@@ -744,10 +890,11 @@ void scheduler_reset(struct scheduler *s, int size) {
  * @brief Compute the task weights
  *
  * @param s The #scheduler.
+ * @param verbose Are we talkative ?
  */
+void scheduler_reweight(struct scheduler *s, int verbose) {
 
-void scheduler_reweight(struct scheduler *s) {
-
+  const ticks tic = getticks();
   const int nr_tasks = s->nr_tasks;
   int *tid = s->tasks_ind;
   struct task *tasks = s->tasks;
@@ -756,11 +903,8 @@ void scheduler_reweight(struct scheduler *s) {
                                0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
                                0.5788, 0.4025, 0.5788};
   const float wscale = 0.001;
-  // ticks tic;
 
-  /* Run through the tasks backwards and set their waits and
-     weights. */
-  // tic = getticks();
+  /* Run through the tasks backwards and set their weights. */
   for (int k = nr_tasks - 1; k >= 0; k--) {
     struct task *t = &tasks[tid[k]];
     t->weight = 0;
@@ -776,7 +920,7 @@ void scheduler_reweight(struct scheduler *s) {
                        (sizeof(int) * 8 - intrinsics_clz(t->ci->count));
           break;
         case task_type_self:
-          t->weight += 1 * t->ci->count * t->ci->count;
+          t->weight += 1 * wscale * t->ci->count * t->ci->count;
           break;
         case task_type_pair:
           if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
@@ -810,9 +954,6 @@ void scheduler_reweight(struct scheduler *s) {
         case task_type_kick:
           t->weight += wscale * t->ci->count;
           break;
-        case task_type_drift:
-          t->weight += wscale * t->ci->count;
-          break;
         case task_type_init:
           t->weight += wscale * t->ci->count;
           break;
@@ -820,11 +961,13 @@ void scheduler_reweight(struct scheduler *s) {
           break;
       }
   }
-  // message( "weighting tasks took %.3f %s." ,
-  // clocks_from_ticks( getticks() - tic ), clocks_getunit());
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 
   /* int min = tasks[0].weight, max = tasks[0].weight;
-  for ( k = 1 ; k < nr_tasks ; k++ )
+  for ( int k = 1 ; k < nr_tasks ; k++ )
       if ( tasks[k].weight < min )
           min = tasks[k].weight;
       else if ( tasks[k].weight > max )
@@ -832,6 +975,52 @@ void scheduler_reweight(struct scheduler *s) {
   message( "task weights are in [ %i , %i ]." , min , max ); */
 }
 
+/**
+ * @brief #threadpool_map function which runs through the task
+ *        graph and re-computes the task wait counters.
+ */
+
+void scheduler_rewait_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
+
+  struct scheduler *s = (struct scheduler *)extra_data;
+  struct task *tasks = (struct task *)map_data;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[ind];
+
+    if (t->skip || !((1 << t->type) & s->mask) ||
+        !((1 << t->subtype) & s->submask))
+      continue;
+
+    /* Skip sort tasks that have already been performed */
+    if (t->type == task_type_sort && t->flags == 0) {
+      error("Empty sort task encountered.");
+    }
+
+    /* Sets the waits of the dependances */
+    for (int k = 0; k < t->nr_unlock_tasks; k++) {
+      struct task *u = t->unlock_tasks[k];
+      atomic_inc(&u->wait);
+    }
+  }
+}
+
+void scheduler_enqueue_mapper(void *map_data, int num_elements,
+                              void *extra_data) {
+  struct scheduler *s = (struct scheduler *)extra_data;
+  const int *tid = (int *)map_data;
+  struct task *tasks = s->tasks;
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct task *t = &tasks[tid[ind]];
+    if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) &&
+        ((1 << t->subtype) & s->submask)) {
+      scheduler_enqueue(s, t);
+    }
+  }
+  pthread_cond_broadcast(&s->sleep_cond);
+}
+
 /**
  * @brief Start the scheduler, i.e. fill the queues with ready tasks.
  *
@@ -843,88 +1032,33 @@ void scheduler_reweight(struct scheduler *s) {
 void scheduler_start(struct scheduler *s, unsigned int mask,
                      unsigned int submask) {
 
-  const int nr_tasks = s->nr_tasks;
-  int *tid = s->tasks_ind;
-  struct task *tasks = s->tasks;
-  // ticks tic;
+  // ticks tic = getticks();
 
   /* Store the masks */
-  s->mask = mask | (1 << task_type_rewait);
+  s->mask = mask;
   s->submask = submask | (1 << task_subtype_none);
 
   /* Clear all the waits and rids. */
-  // ticks tic = getticks();
   for (int k = 0; k < s->nr_tasks; k++) {
     s->tasks[k].wait = 1;
     s->tasks[k].rid = -1;
   }
-  // message( "waiting tasks took %.3f %s." ,
-  // clocks_from_ticks(getticks() - tic), clocks_getunit() );
-
-  /* Enqueue a set of extraenous tasks to set the task waits. */
-  struct task *rewait_tasks = &s->tasks[s->nr_tasks];
-  const int num_rewait_tasks = s->nr_queues > s->size - s->nr_tasks
-                                   ? s->size - s->nr_tasks
-                                   : s->nr_queues;
-
-  /* Remember that engine_launch may fiddle with this value. */
-  const int waiting_old = s->waiting;
-
-  /* We are going to use the task structure in a modified way to pass
-     information to the task. Don't do this at home !
-     - ci and cj will give the range of tasks to which the waits will be applied
-     - the flags will be used to transfer the mask
-     - the rank will be used to transfer the submask
-     - the rest is unused.
-  */
-  for (int k = 0; k < num_rewait_tasks; k++) {
-    rewait_tasks[k].type = task_type_rewait;
-    rewait_tasks[k].ci = (struct cell *)&s->tasks[k * nr_tasks / s->nr_queues];
-    rewait_tasks[k].cj =
-        (struct cell *)&s->tasks[(k + 1) * nr_tasks / s->nr_queues];
-    rewait_tasks[k].flags = s->mask;
-    rewait_tasks[k].rank = s->submask;
-    rewait_tasks[k].skip = 0;
-    rewait_tasks[k].wait = 0;
-    rewait_tasks[k].rid = -1;
-    rewait_tasks[k].weight = 1;
-    rewait_tasks[k].implicit = 0;
-    rewait_tasks[k].nr_unlock_tasks = 0;
-    scheduler_enqueue(s, &rewait_tasks[k]);
-    pthread_cond_broadcast(&s->sleep_cond);
-  }
 
-  /* Wait for the rewait tasks to have executed. */
-  pthread_mutex_lock(&s->sleep_mutex);
-  pthread_cond_broadcast(&s->sleep_cond);
-  while (s->waiting > waiting_old) {
-    pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex);
-  }
-  pthread_mutex_unlock(&s->sleep_mutex);
-  /* message("waiting tasks took %.3f %s.",
-     clocks_from_ticks(getticks() - tic), clocks_getunit());*/
-
-  s->mask = mask;
-  s->submask = submask | (1 << task_subtype_none);
+  /* Re-wait the tasks. */
+  threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks,
+                 sizeof(struct task), 1000, s);
 
   /* Loop over the tasks and enqueue whoever is ready. */
-  // tic = getticks();
-  for (int k = 0; k < s->nr_tasks; k++) {
-    struct task *t = &tasks[tid[k]];
-    if (atomic_dec(&t->wait) == 1 && ((1 << t->type) & s->mask) &&
-        ((1 << t->subtype) & s->submask) && !t->skip) {
-      scheduler_enqueue(s, t);
-      pthread_cond_broadcast(&s->sleep_cond);
-    }
-  }
+  threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tasks_ind,
+                 s->nr_tasks, sizeof(int), 1000, s);
 
   /* To be safe, fire of one last sleep_cond in a safe way. */
   pthread_mutex_lock(&s->sleep_mutex);
   pthread_cond_broadcast(&s->sleep_cond);
   pthread_mutex_unlock(&s->sleep_mutex);
 
-  // message( "enqueueing tasks took %.3f %s." ,
-  // clocks_from_ticks( getticks() - tic ), clocks_getunit());
+  /* message("enqueueing tasks took %.3f %s." ,
+          clocks_from_ticks( getticks() - tic ), clocks_getunit()); */
 }
 
 /**
@@ -952,7 +1086,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
   if (t->implicit) {
     for (int j = 0; j < t->nr_unlock_tasks; j++) {
       struct task *t2 = t->unlock_tasks[j];
-
       if (atomic_dec(&t2->wait) == 1) scheduler_enqueue(s, t2);
     }
   }
@@ -971,16 +1104,25 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_sort:
       case task_type_ghost:
       case task_type_kick:
-      case task_type_drift:
       case task_type_init:
         qid = t->ci->super->owner;
         break;
       case task_type_pair:
       case task_type_sub_pair:
-        qid = t->ci->super->owner;
-        if (qid < 0 ||
-            s->queues[qid].count > s->queues[t->cj->super->owner].count)
-          qid = t->cj->super->owner;
+        if (t->subtype == task_subtype_grav) {
+
+          qid = t->ci->gsuper->owner;
+          if (qid < 0 ||
+              s->queues[qid].count > s->queues[t->cj->gsuper->owner].count)
+            qid = t->cj->gsuper->owner;
+
+        } else {
+
+          qid = t->ci->super->owner;
+          if (qid < 0 ||
+              s->queues[qid].count > s->queues[t->cj->super->owner].count)
+            qid = t->cj->super->owner;
+        }
         break;
       case task_type_recv:
 #ifdef WITH_MPI
@@ -1214,10 +1356,12 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
  * @param nr_queues The number of queues in this scheduler.
  * @param flags The #scheduler flags.
  * @param nodeID The MPI rank
+ * @param tp Parallel processing threadpool.
  */
 
 void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
-                    int nr_queues, unsigned int flags, int nodeID) {
+                    int nr_queues, unsigned int flags, int nodeID,
+                    struct threadpool *tp) {
 
   /* Init the lock. */
   lock_init(&s->lock);
@@ -1249,6 +1393,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
   s->flags = flags;
   s->space = space;
   s->nodeID = nodeID;
+  s->threadpool = tp;
 
   /* Init the tasks array. */
   s->size = 0;
@@ -1282,34 +1427,6 @@ void scheduler_print_tasks(const struct scheduler *s, const char *fileName) {
   fclose(file);
 }
 
-/**
- * @brief Sets the waits of the dependants of a range of task
- *
- * @param t_begin Beginning of the #task range
- * @param t_end End of the #task range
- * @param mask The scheduler task mask
- * @param submask The scheduler subtask mask
- */
-void scheduler_do_rewait(struct task *t_begin, struct task *t_end,
-                         unsigned int mask, unsigned int submask) {
-  for (struct task *t2 = t_begin; t2 != t_end; t2++) {
-
-    if (t2->skip) continue;
-
-    /* Skip tasks not in the mask */
-    if (!((1 << t2->type) & mask) || !((1 << t2->subtype) & submask)) continue;
-
-    /* Skip sort tasks that have already been performed */
-    if (t2->type == task_type_sort && t2->flags == 0) continue;
-
-    /* Sets the waits of the dependances */
-    for (int k = 0; k < t2->nr_unlock_tasks; k++) {
-      struct task *t3 = t2->unlock_tasks[k];
-      atomic_inc(&t3->wait);
-    }
-  }
-}
-
 /**
  * @brief Frees up the memory allocated for this #scheduler
  */
diff --git a/src/scheduler.h b/src/scheduler.h
index fcff27abfe7eaddead3e7c0f67ae544907ce6ce6..c4eb5e99447d623e5fb8e442efc1c254c00bfadd 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -36,6 +36,7 @@
 #include "lock.h"
 #include "queue.h"
 #include "task.h"
+#include "threadpool.h"
 
 /* Some constants. */
 #define scheduler_maxwait 3
@@ -83,9 +84,9 @@ struct scheduler {
   int *tasks_ind;
 
   /* The task unlocks. */
-  struct task **unlocks;
-  int *unlock_ind;
-  int nr_unlocks, size_unlocks;
+  struct task **volatile unlocks;
+  int *volatile unlock_ind;
+  volatile int nr_unlocks, size_unlocks, completed_unlock_writes;
 
   /* Lock for this scheduler. */
   swift_lock_type lock;
@@ -97,13 +98,17 @@ struct scheduler {
   /* The space associated with this scheduler. */
   struct space *space;
 
+  /* Threadpool to use internally for mundane parallel work. */
+  struct threadpool *threadpool;
+
   /* The node we are working on. */
   int nodeID;
 };
 
 /* Function prototypes. */
 void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
-                    int nr_queues, unsigned int flags, int nodeID);
+                    int nr_queues, unsigned int flags, int nodeID,
+                    struct threadpool *tp);
 struct task *scheduler_gettask(struct scheduler *s, int qid,
                                const struct task *prev);
 void scheduler_enqueue(struct scheduler *s, struct task *t);
@@ -111,7 +116,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
                      unsigned int submask);
 void scheduler_reset(struct scheduler *s, int nr_tasks);
 void scheduler_ranktasks(struct scheduler *s);
-void scheduler_reweight(struct scheduler *s);
+void scheduler_reweight(struct scheduler *s, int verbose);
 struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
                                enum task_subtypes subtype, int flags, int wait,
                                struct cell *ci, struct cell *cj, int tight);
@@ -122,8 +127,6 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb);
 void scheduler_set_unlocks(struct scheduler *s);
 void scheduler_dump_queue(struct scheduler *s);
 void scheduler_print_tasks(const struct scheduler *s, const char *fileName);
-void scheduler_do_rewait(struct task *t_begin, struct task *t_end,
-                         unsigned int mask, unsigned int submask);
 void scheduler_clean(struct scheduler *s);
 
 #endif /* SWIFT_SCHEDULER_H */
diff --git a/src/serial_io.c b/src/serial_io.c
index c981e5e31db1868e6cd2590c6bb36d51282f94c7..6e26be1a33fbc2c74ae1b8f7af2b83db285c962e 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -37,6 +37,7 @@
 
 /* Local includes. */
 #include "common_io.h"
+#include "dimension.h"
 #include "engine.h"
 #include "error.h"
 #include "gravity_io.h"
@@ -176,9 +177,10 @@ void readArray(hid_t grp, const struct io_props props, size_t N,
  * Routines writing an output file
  *-----------------------------------------------------------------------------*/
 
-void prepareArray(hid_t grp, char* fileName, FILE* xmfFile,
+void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
                   char* partTypeGroupName, const struct io_props props,
-                  long long N_total, const struct UnitSystem* internal_units,
+                  unsigned long long N_total,
+                  const struct UnitSystem* internal_units,
                   const struct UnitSystem* snapshot_units) {
 
   /* Create data space */
@@ -219,15 +221,17 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile,
   /* Set chunk size */
   h_err = H5Pset_chunk(h_prop, rank, chunk_shape);
   if (h_err < 0) {
-    error("Error while setting chunk size (%lld, %lld) for field '%s'.",
+    error("Error while setting chunk size (%llu, %llu) for field '%s'.",
           chunk_shape[0], chunk_shape[1], props.name);
   }
 
   /* Impose data compression */
-  h_err = H5Pset_deflate(h_prop, 4);
-  if (h_err < 0) {
-    error("Error while setting compression options for field '%s'.",
-          props.name);
+  if (e->snapshotCompression > 0) {
+    h_err = H5Pset_deflate(h_prop, e->snapshotCompression);
+    if (h_err < 0) {
+      error("Error while setting compression options for field '%s'.",
+            props.name);
+    }
   }
 
   /* Create dataset */
@@ -291,7 +295,7 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
 
   /* Prepare the arrays in the file */
   if (mpi_rank == 0)
-    prepareArray(grp, fileName, xmfFile, partTypeGroupName, props, N_total,
+    prepareArray(e, grp, fileName, xmfFile, partTypeGroupName, props, N_total,
                  internal_units, snapshot_units);
 
   /* Allocate temporary buffer */
@@ -426,6 +430,7 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
   size_t N[NUM_PARTICLE_TYPES] = {0};
   long long N_total[NUM_PARTICLE_TYPES] = {0};
   long long offset[NUM_PARTICLE_TYPES] = {0};
+  int dimension = 3; /* Assume 3D if nothing is specified */
   struct UnitSystem* ic_units = malloc(sizeof(struct UnitSystem));
 
   /* First read some information about the content */
@@ -453,6 +458,15 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
     h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT);
     if (h_grp < 0) error("Error while opening file header\n");
 
+    /* Check the dimensionality of the ICs (if the info exists) */
+    const hid_t hid_dim = H5Aexists(h_grp, "Dimension");
+    if (hid_dim < 0)
+      error("Error while testing existance of 'Dimension' attribute");
+    if (hid_dim > 0) readAttribute(h_grp, "Dimension", INT, &dimension);
+    if (dimension != hydro_dimension)
+      error("ICs dimensionality (%dD) does not match code dimensionality (%dD)",
+            dimension, (int)hydro_dimension);
+
     /* Read the relevant information and print status */
     int flag_entropy_temp[6];
     readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
@@ -577,26 +591,18 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
 
         int num_fields = 0;
         struct io_props list[100];
-        size_t N = 0;
+        size_t Nparticles = 0;
 
         /* Read particle fields into the particle structure */
         switch (ptype) {
 
           case GAS:
-            /* if (!dry_run) */
-            /*   hydro_read_particles(h_grp, N[ptype], N_total[ptype], */
-            /*                        offset[ptype], *parts); */
-            /* break; */
-            N = *Ngas;
+            Nparticles = *Ngas;
             hydro_read_particles(*parts, list, &num_fields);
             break;
 
           case DM:
-            /* if (!dry_run) */
-            /*   darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], */
-            /*                             offset[ptype], *gparts); */
-            /* break; */
-            N = Ndm;
+            Nparticles = Ndm;
             darkmatter_read_particles(*gparts, list, &num_fields);
             break;
 
@@ -608,7 +614,7 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
         /* Read everything */
         if (!dry_run)
           for (int i = 0; i < num_fields; ++i)
-            readArray(h_grp, list[i], N, N_total[ptype], offset[ptype],
+            readArray(h_grp, list[i], Nparticles, N_total[ptype], offset[ptype],
                       internal_units, ic_units);
 
         /* Close particle group */
@@ -733,6 +739,8 @@ void write_output_serial(struct engine* e, const char* baseName,
     writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
     double dblTime = e->time;
     writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1);
+    int dimension = (int)hydro_dimension;
+    writeAttribute(h_grp, "Dimension", INT, &dimension, 1);
 
     /* GADGET-2 legacy values */
     /* Number of particles of each type */
@@ -873,13 +881,13 @@ void write_output_serial(struct engine* e, const char* baseName,
 
         int num_fields = 0;
         struct io_props list[100];
-        size_t N = 0;
+        size_t Nparticles = 0;
 
         /* Write particle fields from the particle structure */
         switch (ptype) {
 
           case GAS:
-            N = Ngas;
+            Nparticles = Ngas;
             hydro_write_particles(parts, list, &num_fields);
             break;
 
@@ -894,7 +902,7 @@ void write_output_serial(struct engine* e, const char* baseName,
             collect_dm_gparts(gparts, Ntot, dmparts, Ndm);
 
             /* Write DM particles */
-            N = Ndm;
+            Nparticles = Ndm;
             darkmatter_write_particles(dmparts, list, &num_fields);
 
             break;
@@ -905,9 +913,9 @@ void write_output_serial(struct engine* e, const char* baseName,
 
         /* Write everything */
         for (int i = 0; i < num_fields; ++i)
-          writeArray(e, h_grp, fileName, xmfFile, partTypeGroupName, list[i], N,
-                     N_total[ptype], mpi_rank, offset[ptype], internal_units,
-                     snapshot_units);
+          writeArray(e, h_grp, fileName, xmfFile, partTypeGroupName, list[i],
+                     Nparticles, N_total[ptype], mpi_rank, offset[ptype],
+                     internal_units, snapshot_units);
 
         /* Free temporary array */
         free(dmparts);
diff --git a/src/single_io.c b/src/single_io.c
index 93faab6717fb8c136559511ada2c928f185f9f42..6cb7e830209b0d58919fe6f529f675b4c611a51d 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -36,6 +36,7 @@
 
 /* Local includes. */
 #include "common_io.h"
+#include "dimension.h"
 #include "engine.h"
 #include "error.h"
 #include "gravity_io.h"
@@ -256,15 +257,17 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
   /* Set chunk size */
   h_err = H5Pset_chunk(h_prop, rank, chunk_shape);
   if (h_err < 0) {
-    error("Error while setting chunk size (%lld, %lld) for field '%s'.",
+    error("Error while setting chunk size (%llu, %llu) for field '%s'.",
           chunk_shape[0], chunk_shape[1], props.name);
   }
 
   /* Impose data compression */
-  h_err = H5Pset_deflate(h_prop, 4);
-  if (h_err < 0) {
-    error("Error while setting compression options for field '%s'.",
-          props.name);
+  if (e->snapshotCompression > 0) {
+    h_err = H5Pset_deflate(h_prop, e->snapshotCompression);
+    if (h_err < 0) {
+      error("Error while setting compression options for field '%s'.",
+            props.name);
+    }
   }
 
   /* Create dataset */
@@ -339,6 +342,7 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
   int numParticles[NUM_PARTICLE_TYPES] = {0};
   int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
   size_t N[NUM_PARTICLE_TYPES] = {0};
+  int dimension = 3; /* Assume 3D if nothing is specified */
   size_t Ndm;
 
   /* Open file */
@@ -364,6 +368,15 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
   h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT);
   if (h_grp < 0) error("Error while opening file header\n");
 
+  /* Check the dimensionality of the ICs (if the info exists) */
+  const hid_t hid_dim = H5Aexists(h_grp, "Dimension");
+  if (hid_dim < 0)
+    error("Error while testing existance of 'Dimension' attribute");
+  if (hid_dim > 0) readAttribute(h_grp, "Dimension", INT, &dimension);
+  if (dimension != hydro_dimension)
+    error("ICs dimensionality (%dD) does not match code dimensionality (%dD)",
+          dimension, (int)hydro_dimension);
+
   /* Read the relevant information and print status */
   int flag_entropy_temp[6];
   readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
@@ -458,18 +471,18 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
 
     int num_fields = 0;
     struct io_props list[100];
-    size_t N = 0;
+    size_t Nparticles = 0;
 
     /* Read particle fields into the structure */
     switch (ptype) {
 
       case GAS:
-        N = *Ngas;
+        Nparticles = *Ngas;
         hydro_read_particles(*parts, list, &num_fields);
         break;
 
       case DM:
-        N = Ndm;
+        Nparticles = Ndm;
         darkmatter_read_particles(*gparts, list, &num_fields);
         break;
 
@@ -480,7 +493,7 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
     /* Read everything */
     if (!dry_run)
       for (int i = 0; i < num_fields; ++i)
-        readArray(h_grp, list[i], N, internal_units, ic_units);
+        readArray(h_grp, list[i], Nparticles, internal_units, ic_units);
 
     /* Close particle group */
     H5Gclose(h_grp);
@@ -579,6 +592,8 @@ void write_output_single(struct engine* e, const char* baseName,
   writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
   double dblTime = e->time;
   writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1);
+  int dimension = (int)hydro_dimension;
+  writeAttribute(h_grp, "Dimension", INT, &dimension, 1);
 
   /* GADGET-2 legacy values */
   /* Number of particles of each type */
diff --git a/src/space.c b/src/space.c
index 2a6250e1c19eef77b48e9e54dc813312e64aa32d..a9958f6fbd7d85060db99a9682b0de10f507085d 100644
--- a/src/space.c
+++ b/src/space.c
@@ -42,6 +42,7 @@
 /* Local headers. */
 #include "atomic.h"
 #include "const.h"
+#include "cooling.h"
 #include "engine.h"
 #include "error.h"
 #include "gravity.h"
@@ -50,11 +51,9 @@
 #include "lock.h"
 #include "minmax.h"
 #include "runner.h"
+#include "threadpool.h"
 #include "tools.h"
 
-/* Shared sort structure. */
-struct parallel_sort space_sort_struct;
-
 /* Split size. */
 int space_splitsize = space_splitsize_default;
 int space_subsize = space_subsize_default;
@@ -90,6 +89,28 @@ const int sortlistID[27] = {
     /* (  1 ,  1 ,  0 ) */ 1,
     /* (  1 ,  1 ,  1 ) */ 0};
 
+/**
+ * @brief Interval stack necessary for parallel particle sorting.
+ */
+struct qstack {
+  volatile ptrdiff_t i, j;
+  volatile int min, max;
+  volatile int ready;
+};
+
+/**
+ * @brief Parallel particle-sorting stack
+ */
+struct parallel_sort {
+  struct part *parts;
+  struct gpart *gparts;
+  struct xpart *xparts;
+  int *ind;
+  struct qstack *stack;
+  unsigned int stack_size;
+  volatile unsigned int first, last, waiting;
+};
+
 /**
  * @brief Get the shift-id of the given pair of cells, swapping them
  *      if need be.
@@ -101,7 +122,6 @@ const int sortlistID[27] = {
  *
  * @return The shift ID and set shift, may or may not swap ci and cj.
  */
-
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift) {
 
@@ -140,8 +160,9 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
 /**
  * @brief Recursively dismantle a cell tree.
  *
+ * @param s The #space.
+ * @param c The #cell to recycle.
  */
-
 void space_rebuild_recycle(struct space *s, struct cell *c) {
 
   if (c->split)
@@ -154,32 +175,33 @@ void space_rebuild_recycle(struct space *s, struct cell *c) {
 }
 
 /**
- * @brief Re-build the cell grid.
+ * @brief Re-build the top-level cell grid.
  *
  * @param s The #space.
  * @param cell_max Maximum cell edge length.
  * @param verbose Print messages to stdout or not.
  */
-
 void space_regrid(struct space *s, double cell_max, int verbose) {
 
   const size_t nr_parts = s->nr_parts;
-  struct cell *restrict c;
-  ticks tic = getticks();
+  const ticks tic = getticks();
+  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
-  /* Run through the parts and get the current h_max. */
+  /* Run through the cells and get the current h_max. */
   // tic = getticks();
   float h_max = s->cell_min / kernel_gamma / space_stretch;
   if (nr_parts > 0) {
-    if (s->cells != NULL) {
+    if (s->cells_top != NULL) {
       for (int k = 0; k < s->nr_cells; k++) {
-        if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max;
+        if (s->cells_top[k].nodeID == engine_rank &&
+            s->cells_top[k].h_max > h_max) {
+          h_max = s->cells_top[k].h_max;
+        }
       }
     } else {
       for (size_t k = 0; k < nr_parts; k++) {
         if (s->parts[k].h > h_max) h_max = s->parts[k].h;
       }
-      s->h_max = h_max;
     }
   }
 
@@ -197,10 +219,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
   if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
 
   /* Get the new putative cell dimensions. */
-  int cdim[3];
-  for (int k = 0; k < 3; k++)
-    cdim[k] =
-        floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max));
+  const int cdim[3] = {
+      floor(s->dim[0] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
+      floor(s->dim[1] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
+      floor(s->dim[2] / fmax(h_max * kernel_gamma * space_stretch, cell_max))};
 
   /* Check if we have enough cells for periodicity. */
   if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
@@ -239,7 +261,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       for (int j = 0; j < s->cdim[1]; j++) {
         for (int k = 0; k < s->cdim[2]; k++) {
           cid = cell_getid(oldcdim, i, j, k);
-          oldnodeIDs[cid] = s->cells[cid].nodeID;
+          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
         }
       }
     }
@@ -249,16 +271,16 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
   /* Do we need to re-build the upper-level cells? */
   // tic = getticks();
-  if (s->cells == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
+  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
       cdim[2] < s->cdim[2]) {
 
     /* Free the old cells, if they were allocated. */
-    if (s->cells != NULL) {
+    if (s->cells_top != NULL) {
       for (int k = 0; k < s->nr_cells; k++) {
-        space_rebuild_recycle(s, &s->cells[k]);
-        if (s->cells[k].sort != NULL) free(s->cells[k].sort);
+        space_rebuild_recycle(s, &s->cells_top[k]);
+        if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
       }
-      free(s->cells);
+      free(s->cells_top);
       s->maxdepth = 0;
     }
 
@@ -268,22 +290,23 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       s->width[k] = s->dim[k] / cdim[k];
       s->iwidth[k] = 1.0 / s->width[k];
     }
-    const float dmin = fminf(s->width[0], fminf(s->width[1], s->width[2]));
+    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
 
     /* Allocate the highest level of cells. */
     s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
-    if (posix_memalign((void *)&s->cells, 64,
+    if (posix_memalign((void *)&s->cells_top, cell_align,
                        s->nr_cells * sizeof(struct cell)) != 0)
       error("Failed to allocate cells.");
-    bzero(s->cells, s->nr_cells * sizeof(struct cell));
+    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
     for (int k = 0; k < s->nr_cells; k++)
-      if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock.");
+      if (lock_init(&s->cells_top[k].lock) != 0)
+        error("Failed to init spinlock.");
 
     /* Set the cell location and sizes. */
     for (int i = 0; i < cdim[0]; i++)
       for (int j = 0; j < cdim[1]; j++)
         for (int k = 0; k < cdim[2]; k++) {
-          c = &s->cells[cell_getid(cdim, i, j, k)];
+          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
           c->loc[0] = i * s->width[0];
           c->loc[1] = j * s->width[1];
           c->loc[2] = k * s->width[2];
@@ -295,6 +318,8 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
           c->count = 0;
           c->gcount = 0;
           c->super = c;
+          c->gsuper = c;
+          c->ti_old = ti_current;
           lock_init(&c->lock);
         }
 
@@ -338,31 +363,35 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       free(oldnodeIDs);
     }
 #endif
+
+    // message( "rebuilding upper-level cells took %.3f %s." ,
+    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
+
   } /* re-build upper-level cells? */
-  // message( "rebuilding upper-level cells took %.3f %s." ,
-  // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
 
-  /* Otherwise, just clean up the cells. */
-  else {
+  else { /* Otherwise, just clean up the cells. */
 
     /* Free the old cells, if they were allocated. */
     for (int k = 0; k < s->nr_cells; k++) {
-      space_rebuild_recycle(s, &s->cells[k]);
-      s->cells[k].sorts = NULL;
-      s->cells[k].nr_tasks = 0;
-      s->cells[k].nr_density = 0;
-      s->cells[k].nr_force = 0;
-      s->cells[k].density = NULL;
-      s->cells[k].force = NULL;
-      s->cells[k].dx_max = 0.0f;
-      s->cells[k].sorted = 0;
-      s->cells[k].count = 0;
-      s->cells[k].gcount = 0;
-      s->cells[k].init = NULL;
-      s->cells[k].ghost = NULL;
-      s->cells[k].drift = NULL;
-      s->cells[k].kick = NULL;
-      s->cells[k].super = &s->cells[k];
+      space_rebuild_recycle(s, &s->cells_top[k]);
+      s->cells_top[k].sorts = NULL;
+      s->cells_top[k].nr_tasks = 0;
+      s->cells_top[k].nr_density = 0;
+      s->cells_top[k].nr_gradient = 0;
+      s->cells_top[k].nr_force = 0;
+      s->cells_top[k].density = NULL;
+      s->cells_top[k].gradient = NULL;
+      s->cells_top[k].force = NULL;
+      s->cells_top[k].dx_max = 0.0f;
+      s->cells_top[k].sorted = 0;
+      s->cells_top[k].count = 0;
+      s->cells_top[k].gcount = 0;
+      s->cells_top[k].init = NULL;
+      s->cells_top[k].extra_ghost = NULL;
+      s->cells_top[k].ghost = NULL;
+      s->cells_top[k].kick = NULL;
+      s->cells_top[k].super = &s->cells_top[k];
+      s->cells_top[k].gsuper = &s->cells_top[k];
     }
     s->maxdepth = 0;
   }
@@ -380,7 +409,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
  * @param verbose Print messages to stdout or not
  *
  */
-
 void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   const ticks tic = getticks();
@@ -393,7 +421,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   size_t nr_parts = s->nr_parts;
   size_t nr_gparts = s->nr_gparts;
-  struct cell *restrict cells = s->cells;
+  struct cell *restrict cells_top = s->cells_top;
+  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
@@ -414,7 +443,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         p->x[j] -= dim[j];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[ind[k]].count++;
+    cells_top[ind[k]].count++;
   }
   // message( "getting particle indices took %.3f %s." ,
   // clocks_from_ticks(getticks() - tic), clocks_getunit()):
@@ -425,7 +454,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   int *gind;
   if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
     error("Failed to allocate temporary g-particle indices.");
-  for (int k = 0; k < nr_gparts; k++) {
+  for (size_t k = 0; k < nr_gparts; k++) {
     struct gpart *restrict gp = &s->gparts[k];
     for (int j = 0; j < 3; j++)
       if (gp->x[j] < 0.0)
@@ -434,9 +463,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         gp->x[j] -= dim[j];
     gind[k] =
         cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    cells[gind[k]].gcount++;
+    cells_top[gind[k]].gcount++;
   }
-// message( "getting particle indices took %.3f %s." ,
+// message( "getting g-particle indices took %.3f %s." ,
 // clocks_from_ticks(getticks() - tic), clocks_getunit());
 
 #ifdef WITH_MPI
@@ -444,8 +473,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* Move non-local parts to the end of the list. */
   const int local_nodeID = s->e->nodeID;
   for (size_t k = 0; k < nr_parts;) {
-    if (cells[ind[k]].nodeID != local_nodeID) {
-      cells[ind[k]].count -= 1;
+    if (cells_top[ind[k]].nodeID != local_nodeID) {
+      cells_top[ind[k]].count -= 1;
       nr_parts -= 1;
       const struct part tp = s->parts[k];
       s->parts[k] = s->parts[nr_parts];
@@ -471,21 +500,21 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all parts are in the correct places. */
   for (size_t k = 0; k < nr_parts; k++) {
-    if (cells[ind[k]].nodeID != local_nodeID) {
+    if (cells_top[ind[k]].nodeID != local_nodeID) {
       error("Failed to move all non-local parts to send list");
     }
   }
   for (size_t k = nr_parts; k < s->nr_parts; k++) {
-    if (cells[ind[k]].nodeID == local_nodeID) {
+    if (cells_top[ind[k]].nodeID == local_nodeID) {
       error("Failed to remove local parts from send list");
     }
   }
 #endif
 
   /* Move non-local gparts to the end of the list. */
-  for (int k = 0; k < nr_gparts;) {
-    if (cells[gind[k]].nodeID != local_nodeID) {
-      cells[gind[k]].gcount -= 1;
+  for (size_t k = 0; k < nr_gparts;) {
+    if (cells_top[gind[k]].nodeID != local_nodeID) {
+      cells_top[gind[k]].gcount -= 1;
       nr_gparts -= 1;
       const struct gpart tp = s->gparts[k];
       s->gparts[k] = s->gparts[nr_gparts];
@@ -509,12 +538,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all gparts are in the correct place (untested). */
   for (size_t k = 0; k < nr_gparts; k++) {
-    if (cells[gind[k]].nodeID != local_nodeID) {
+    if (cells_top[gind[k]].nodeID != local_nodeID) {
       error("Failed to move all non-local gparts to send list");
     }
   }
   for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
-    if (cells[gind[k]].nodeID == local_nodeID) {
+    if (cells_top[gind[k]].nodeID == local_nodeID) {
       error("Failed to remove local gparts from send list");
     }
   }
@@ -546,11 +575,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
     const struct part *const p = &s->parts[k];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[ind[k]].count += 1;
+    cells_top[ind[k]].count += 1;
 #ifdef SWIFT_DEBUG_CHECKS
-    if (cells[ind[k]].nodeID != local_nodeID)
+    if (cells_top[ind[k]].nodeID != local_nodeID)
       error("Received part that does not belong to me (nodeID=%i).",
-            cells[ind[k]].nodeID);
+            cells_top[ind[k]].nodeID);
 #endif
   }
   nr_parts = s->nr_parts;
@@ -561,7 +590,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the gparts. */
-  part_relink_gparts(s->parts, nr_parts, 0);
+  if (nr_parts > 0 && nr_gparts > 0) part_relink_gparts(s->parts, nr_parts, 0);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
@@ -592,24 +621,28 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   }
 
   /* Assign each particle to its cell. */
-  for (int k = nr_gparts; k < s->nr_gparts; k++) {
+  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
     const struct gpart *const p = &s->gparts[k];
     gind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[gind[k]].gcount += 1;
-    /* if ( cells[ ind[k] ].nodeID != nodeID )
-        error( "Received part that does not belong to me (nodeID=%i)." , cells[
-       ind[k] ].nodeID ); */
+    cells_top[gind[k]].gcount += 1;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (cells_top[ind[k]].nodeID != s->e->nodeID)
+      error("Received part that does not belong to me (nodeID=%i).",
+            cells_top[ind[k]].nodeID);
+#endif
   }
   nr_gparts = s->nr_gparts;
 
 #endif
 
-  /* Sort the parts according to their cells. */
+  /* Sort the gparts according to their cells. */
   space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the parts. */
-  part_relink_parts(s->gparts, nr_gparts, s->parts);
+  if (nr_parts > 0 && nr_gparts > 0)
+    part_relink_parts(s->gparts, nr_gparts, s->parts);
 
   /* We no longer need the indices as of here. */
   free(gind);
@@ -632,7 +665,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   for (size_t k = 0; k < nr_parts; ++k) {
 
     if (s->parts[k].gpart != NULL &&
-        s->parts[k].gpart->id_or_neg_offset != -k) {
+        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
       error("Linking problem !");
     }
   }
@@ -644,7 +677,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   struct xpart *xfinger = s->xparts;
   struct gpart *gfinger = s->gparts;
   for (int k = 0; k < s->nr_cells; k++) {
-    struct cell *restrict c = &cells[k];
+    struct cell *restrict c = &cells_top[k];
+    c->ti_old = ti_current;
     c->parts = finger;
     c->xparts = xfinger;
     c->gparts = gfinger;
@@ -657,7 +691,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* At this point, we have the upper-level cells, old or new. Now make
      sure that the parts in each cell are ok. */
-  space_split(s, cells, verbose);
+  space_split(s, cells_top, verbose);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -667,6 +701,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 /**
  * @brief Split particles between cells of a hierarchy
  *
+ * This is done in parallel using threads in the #threadpool.
+ *
  * @param s The #space.
  * @param cells The cell hierarchy
  * @param verbose Are we talkative ?
@@ -675,10 +711,8 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
 
   const ticks tic = getticks();
 
-  for (int k = 0; k < s->nr_cells; k++)
-    scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k,
-                      0, &cells[k], NULL, 0);
-  engine_launch(s->e, s->e->nr_threads, 1 << task_type_split_cell, 0);
+  threadpool_map(&s->e->threadpool, space_split_mapper, cells, s->nr_cells,
+                 sizeof(struct cell), 1, s);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -687,7 +721,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
 
 /**
  * @brief Sort the particles and condensed particles according to the given
- *indices.
+ * indices.
  *
  * @param s The #space.
  * @param ind The indices with respect to which the parts are sorted.
@@ -696,77 +730,82 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
  * @param max highest index.
  * @param verbose Are we talkative ?
  */
-
 void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose) {
 
   const ticks tic = getticks();
 
-  /*Populate the global parallel_sort structure with the input data */
-  space_sort_struct.parts = s->parts;
-  space_sort_struct.xparts = s->xparts;
-  space_sort_struct.ind = ind;
-  space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
-  if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
-                                        space_sort_struct.stack_size)) == NULL)
+  /* Populate a parallel_sort structure with the input data */
+  struct parallel_sort sort_struct;
+  sort_struct.parts = s->parts;
+  sort_struct.xparts = s->xparts;
+  sort_struct.ind = ind;
+  sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
+  if ((sort_struct.stack =
+           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
     error("Failed to allocate sorting stack.");
-  for (int i = 0; i < space_sort_struct.stack_size; i++)
-    space_sort_struct.stack[i].ready = 0;
+  for (unsigned int i = 0; i < sort_struct.stack_size; i++)
+    sort_struct.stack[i].ready = 0;
 
   /* Add the first interval. */
-  space_sort_struct.stack[0].i = 0;
-  space_sort_struct.stack[0].j = N - 1;
-  space_sort_struct.stack[0].min = min;
-  space_sort_struct.stack[0].max = max;
-  space_sort_struct.stack[0].ready = 1;
-  space_sort_struct.first = 0;
-  space_sort_struct.last = 1;
-  space_sort_struct.waiting = 1;
-
-  /* Launch the sorting tasks. */
-  engine_launch(s->e, s->e->nr_threads, (1 << task_type_part_sort), 0);
+  sort_struct.stack[0].i = 0;
+  sort_struct.stack[0].j = N - 1;
+  sort_struct.stack[0].min = min;
+  sort_struct.stack[0].max = max;
+  sort_struct.stack[0].ready = 1;
+  sort_struct.first = 0;
+  sort_struct.last = 1;
+  sort_struct.waiting = 1;
+
+  /* Launch the sorting tasks with a stride of zero such that the same
+     map data is passed to each thread. */
+  threadpool_map(&s->e->threadpool, space_parts_sort_mapper, &sort_struct,
+                 s->e->threadpool.num_threads, 0, 1, NULL);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
-  for (int i = 1; i < N; i++)
+  for (size_t i = 1; i < N; i++)
     if (ind[i - 1] > ind[i])
-      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
   message("Sorting succeeded.");
 #endif
 
   /* Clean up. */
-  free(space_sort_struct.stack);
+  free(sort_struct.stack);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
 }
 
-void space_do_parts_sort() {
+void space_parts_sort_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
+
+  /* Unpack the mapping data. */
+  struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
 
   /* Pointers to the sorting data. */
-  int *ind = space_sort_struct.ind;
-  struct part *parts = space_sort_struct.parts;
-  struct xpart *xparts = space_sort_struct.xparts;
+  int *ind = sort_struct->ind;
+  struct part *parts = sort_struct->parts;
+  struct xpart *xparts = sort_struct->xparts;
 
   /* Main loop. */
-  while (space_sort_struct.waiting) {
+  while (sort_struct->waiting) {
 
     /* Grab an interval off the queue. */
-    int qid =
-        atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
+    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
 
     /* Wait for the entry to be ready, or for the sorting do be done. */
-    while (!space_sort_struct.stack[qid].ready)
-      if (!space_sort_struct.waiting) return;
+    while (!sort_struct->stack[qid].ready)
+      if (!sort_struct->waiting) return;
 
     /* Get the stack entry. */
-    ptrdiff_t i = space_sort_struct.stack[qid].i;
-    ptrdiff_t j = space_sort_struct.stack[qid].j;
-    int min = space_sort_struct.stack[qid].min;
-    int max = space_sort_struct.stack[qid].max;
-    space_sort_struct.stack[qid].ready = 0;
+    ptrdiff_t i = sort_struct->stack[qid].i;
+    ptrdiff_t j = sort_struct->stack[qid].j;
+    int min = sort_struct->stack[qid].min;
+    int max = sort_struct->stack[qid].max;
+    sort_struct->stack[qid].ready = 0;
 
     /* Loop over sub-intervals. */
     while (1) {
@@ -816,18 +855,16 @@ void space_do_parts_sort() {
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = atomic_inc(&space_sort_struct.last) %
-                space_sort_struct.stack_size;
-          while (space_sort_struct.stack[qid].ready)
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
+          while (sort_struct->stack[qid].ready)
             ;
-          space_sort_struct.stack[qid].i = i;
-          space_sort_struct.stack[qid].j = jj;
-          space_sort_struct.stack[qid].min = min;
-          space_sort_struct.stack[qid].max = pivot;
-          if (atomic_inc(&space_sort_struct.waiting) >=
-              space_sort_struct.stack_size)
+          sort_struct->stack[qid].i = i;
+          sort_struct->stack[qid].j = jj;
+          sort_struct->stack[qid].min = min;
+          sort_struct->stack[qid].max = pivot;
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
-          space_sort_struct.stack[qid].ready = 1;
+          sort_struct->stack[qid].ready = 1;
         }
 
         /* Recurse on the right? */
@@ -841,18 +878,16 @@ void space_do_parts_sort() {
 
         /* Recurse on the right? */
         if (pivot + 1 < max) {
-          qid = atomic_inc(&space_sort_struct.last) %
-                space_sort_struct.stack_size;
-          while (space_sort_struct.stack[qid].ready)
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
+          while (sort_struct->stack[qid].ready)
             ;
-          space_sort_struct.stack[qid].i = jj + 1;
-          space_sort_struct.stack[qid].j = j;
-          space_sort_struct.stack[qid].min = pivot + 1;
-          space_sort_struct.stack[qid].max = max;
-          if (atomic_inc(&space_sort_struct.waiting) >=
-              space_sort_struct.stack_size)
+          sort_struct->stack[qid].i = jj + 1;
+          sort_struct->stack[qid].j = j;
+          sort_struct->stack[qid].min = pivot + 1;
+          sort_struct->stack[qid].max = max;
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
-          space_sort_struct.stack[qid].ready = 1;
+          sort_struct->stack[qid].ready = 1;
         }
 
         /* Recurse on the left? */
@@ -865,14 +900,13 @@ void space_do_parts_sort() {
 
     } /* loop over sub-intervals. */
 
-    atomic_dec(&space_sort_struct.waiting);
+    atomic_dec(&sort_struct->waiting);
 
   } /* main loop. */
 }
 
 /**
- * @brief Sort the g-particles and condensed particles according to the given
- *indices.
+ * @brief Sort the g-particles according to the given indices.
  *
  * @param s The #space.
  * @param ind The indices with respect to which the gparts are sorted.
@@ -886,69 +920,75 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
 
   const ticks tic = getticks();
 
-  /*Populate the global parallel_sort structure with the input data */
-  space_sort_struct.gparts = s->gparts;
-  space_sort_struct.ind = ind;
-  space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
-  if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
-                                        space_sort_struct.stack_size)) == NULL)
+  /*Populate a global parallel_sort structure with the input data */
+  struct parallel_sort sort_struct;
+  sort_struct.gparts = s->gparts;
+  sort_struct.ind = ind;
+  sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
+  if ((sort_struct.stack =
+           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
     error("Failed to allocate sorting stack.");
-  for (int i = 0; i < space_sort_struct.stack_size; i++)
-    space_sort_struct.stack[i].ready = 0;
+  for (unsigned int i = 0; i < sort_struct.stack_size; i++)
+    sort_struct.stack[i].ready = 0;
 
   /* Add the first interval. */
-  space_sort_struct.stack[0].i = 0;
-  space_sort_struct.stack[0].j = N - 1;
-  space_sort_struct.stack[0].min = min;
-  space_sort_struct.stack[0].max = max;
-  space_sort_struct.stack[0].ready = 1;
-  space_sort_struct.first = 0;
-  space_sort_struct.last = 1;
-  space_sort_struct.waiting = 1;
-
-  /* Launch the sorting tasks. */
-  engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0);
+  sort_struct.stack[0].i = 0;
+  sort_struct.stack[0].j = N - 1;
+  sort_struct.stack[0].min = min;
+  sort_struct.stack[0].max = max;
+  sort_struct.stack[0].ready = 1;
+  sort_struct.first = 0;
+  sort_struct.last = 1;
+  sort_struct.waiting = 1;
+
+  /* Launch the sorting tasks with a stride of zero such that the same
+     map data is passed to each thread. */
+  threadpool_map(&s->e->threadpool, space_gparts_sort_mapper, &sort_struct,
+                 s->e->threadpool.num_threads, 0, 1, NULL);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify space_sort_struct. */
-  for (int i = 1; i < N; i++)
+  for (size_t i = 1; i < N; i++)
     if (ind[i - 1] > ind[i])
-      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
   message("Sorting succeeded.");
 #endif
 
   /* Clean up. */
-  free(space_sort_struct.stack);
+  free(sort_struct.stack);
 
   if (verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
 }
 
-void space_do_gparts_sort() {
+void space_gparts_sort_mapper(void *map_data, int num_elements,
+                              void *extra_data) {
+
+  /* Unpack the mapping data. */
+  struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
 
   /* Pointers to the sorting data. */
-  int *ind = space_sort_struct.ind;
-  struct gpart *gparts = space_sort_struct.gparts;
+  int *ind = sort_struct->ind;
+  struct gpart *gparts = sort_struct->gparts;
 
   /* Main loop. */
-  while (space_sort_struct.waiting) {
+  while (sort_struct->waiting) {
 
     /* Grab an interval off the queue. */
-    int qid =
-        atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
+    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
 
     /* Wait for the entry to be ready, or for the sorting do be done. */
-    while (!space_sort_struct.stack[qid].ready)
-      if (!space_sort_struct.waiting) return;
+    while (!sort_struct->stack[qid].ready)
+      if (!sort_struct->waiting) return;
 
     /* Get the stack entry. */
-    ptrdiff_t i = space_sort_struct.stack[qid].i;
-    ptrdiff_t j = space_sort_struct.stack[qid].j;
-    int min = space_sort_struct.stack[qid].min;
-    int max = space_sort_struct.stack[qid].max;
-    space_sort_struct.stack[qid].ready = 0;
+    ptrdiff_t i = sort_struct->stack[qid].i;
+    ptrdiff_t j = sort_struct->stack[qid].j;
+    int min = sort_struct->stack[qid].min;
+    int max = sort_struct->stack[qid].max;
+    sort_struct->stack[qid].ready = 0;
 
     /* Loop over sub-intervals. */
     while (1) {
@@ -995,18 +1035,16 @@ void space_do_gparts_sort() {
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = atomic_inc(&space_sort_struct.last) %
-                space_sort_struct.stack_size;
-          while (space_sort_struct.stack[qid].ready)
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
+          while (sort_struct->stack[qid].ready)
             ;
-          space_sort_struct.stack[qid].i = i;
-          space_sort_struct.stack[qid].j = jj;
-          space_sort_struct.stack[qid].min = min;
-          space_sort_struct.stack[qid].max = pivot;
-          if (atomic_inc(&space_sort_struct.waiting) >=
-              space_sort_struct.stack_size)
+          sort_struct->stack[qid].i = i;
+          sort_struct->stack[qid].j = jj;
+          sort_struct->stack[qid].min = min;
+          sort_struct->stack[qid].max = pivot;
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
-          space_sort_struct.stack[qid].ready = 1;
+          sort_struct->stack[qid].ready = 1;
         }
 
         /* Recurse on the right? */
@@ -1020,18 +1058,16 @@ void space_do_gparts_sort() {
 
         /* Recurse on the right? */
         if (pivot + 1 < max) {
-          qid = atomic_inc(&space_sort_struct.last) %
-                space_sort_struct.stack_size;
-          while (space_sort_struct.stack[qid].ready)
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
+          while (sort_struct->stack[qid].ready)
             ;
-          space_sort_struct.stack[qid].i = jj + 1;
-          space_sort_struct.stack[qid].j = j;
-          space_sort_struct.stack[qid].min = pivot + 1;
-          space_sort_struct.stack[qid].max = max;
-          if (atomic_inc(&space_sort_struct.waiting) >=
-              space_sort_struct.stack_size)
+          sort_struct->stack[qid].i = jj + 1;
+          sort_struct->stack[qid].j = j;
+          sort_struct->stack[qid].min = pivot + 1;
+          sort_struct->stack[qid].max = max;
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
             error("Qstack overflow.");
-          space_sort_struct.stack[qid].ready = 1;
+          sort_struct->stack[qid].ready = 1;
         }
 
         /* Recurse on the left? */
@@ -1044,7 +1080,7 @@ void space_do_gparts_sort() {
 
     } /* loop over sub-intervals. */
 
-    atomic_dec(&space_sort_struct.waiting);
+    atomic_dec(&sort_struct->waiting);
 
   } /* main loop. */
 }
@@ -1052,7 +1088,6 @@ void space_do_gparts_sort() {
 /**
  * @brief Mapping function to free the sorted indices buffers.
  */
-
 void space_map_clearsort(struct cell *c, void *data) {
 
   if (c->sort != NULL) {
@@ -1068,21 +1103,17 @@ void space_map_clearsort(struct cell *c, void *data) {
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 static void rec_map_parts(struct cell *c,
                           void (*fun)(struct part *p, struct cell *c,
                                       void *data),
                           void *data) {
-
-  int k;
-
   /* No progeny? */
   if (!c->split)
-    for (k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
+    for (int k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
 
   /* Otherwise, recurse. */
   else
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) rec_map_parts(c->progeny[k], fun, data);
 }
 
@@ -1093,16 +1124,13 @@ static void rec_map_parts(struct cell *c,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 void space_map_parts(struct space *s,
                      void (*fun)(struct part *p, struct cell *c, void *data),
                      void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_parts(&s->cells[cid], fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_parts(&s->cells_top[cid], fun, data);
 }
 
 /**
@@ -1111,20 +1139,17 @@ void space_map_parts(struct space *s,
  * @param c The #cell we are working in.
  * @param fun Function pointer to apply on the cells.
  */
-
 static void rec_map_parts_xparts(struct cell *c,
                                  void (*fun)(struct part *p, struct xpart *xp,
                                              struct cell *c)) {
 
-  int k;
-
   /* No progeny? */
   if (!c->split)
-    for (k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);
+    for (int k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);
 
   /* Otherwise, recurse. */
   else
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) rec_map_parts_xparts(c->progeny[k], fun);
 }
 
@@ -1134,16 +1159,13 @@ static void rec_map_parts_xparts(struct cell *c,
  * @param s The #space we are working in.
  * @param fun Function pointer to apply on the particles in the cells.
  */
-
 void space_map_parts_xparts(struct space *s,
                             void (*fun)(struct part *p, struct xpart *xp,
                                         struct cell *c)) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_parts_xparts(&s->cells[cid], fun);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_parts_xparts(&s->cells_top[cid], fun);
 }
 
 /**
@@ -1154,16 +1176,12 @@ void space_map_parts_xparts(struct space *s,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 static void rec_map_cells_post(struct cell *c, int full,
                                void (*fun)(struct cell *c, void *data),
                                void *data) {
-
-  int k;
-
   /* Recurse. */
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         rec_map_cells_post(c->progeny[k], full, fun, data);
 
@@ -1179,29 +1197,24 @@ static void rec_map_cells_post(struct cell *c, int full,
  * @param fun Function pointer to apply on the cells.
  * @param data Data passed to the function fun.
  */
-
 void space_map_cells_post(struct space *s, int full,
                           void (*fun)(struct cell *c, void *data), void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_cells_post(&s->cells[cid], full, fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_cells_post(&s->cells_top[cid], full, fun, data);
 }
 
 static void rec_map_cells_pre(struct cell *c, int full,
                               void (*fun)(struct cell *c, void *data),
                               void *data) {
 
-  int k;
-
   /* No progeny? */
   if (full || !c->split) fun(c, data);
 
   /* Recurse. */
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         rec_map_cells_pre(c->progeny[k], full, fun, data);
 }
@@ -1217,81 +1230,128 @@ static void rec_map_cells_pre(struct cell *c, int full,
 void space_map_cells_pre(struct space *s, int full,
                          void (*fun)(struct cell *c, void *data), void *data) {
 
-  int cid = 0;
-
   /* Call the recursive function on all higher-level cells. */
-  for (cid = 0; cid < s->nr_cells; cid++)
-    rec_map_cells_pre(&s->cells[cid], full, fun, data);
+  for (int cid = 0; cid < s->nr_cells; cid++)
+    rec_map_cells_pre(&s->cells_top[cid], full, fun, data);
 }
 
 /**
- * @brief Split cells that contain too many particles.
+ * @brief #threadpool mapper function to split cells if they contain
+ *        too many particles.
  *
- * @param s The #space we are working in.
- * @param c The #cell under consideration.
+ * @param map_data Pointer towards the top-cells.
+ * @param num_elements The number of cells to treat.
+ * @param extra_data Pointers to the #space.
  */
-
-void space_do_split(struct space *s, struct cell *c) {
-
-  const int count = c->count;
-  const int gcount = c->gcount;
-  int maxdepth = 0;
-  float h_max = 0.0f;
-  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
-  struct cell *temp;
-  struct part *parts = c->parts;
-  struct gpart *gparts = c->gparts;
-  struct xpart *xparts = c->xparts;
-
-  /* Check the depth. */
-  if (c->depth > s->maxdepth) s->maxdepth = c->depth;
-
-  /* Split or let it be? */
-  if (count > space_splitsize || gcount > space_splitsize) {
-
-    /* No longer just a leaf. */
-    c->split = 1;
-
-    /* Create the cell's progeny. */
-    for (int k = 0; k < 8; k++) {
-      temp = space_getcell(s);
-      temp->count = 0;
-      temp->gcount = 0;
-      temp->loc[0] = c->loc[0];
-      temp->loc[1] = c->loc[1];
-      temp->loc[2] = c->loc[2];
-      temp->width[0] = c->width[0] / 2;
-      temp->width[1] = c->width[1] / 2;
-      temp->width[2] = c->width[2] / 2;
-      temp->dmin = c->dmin / 2;
-      if (k & 4) temp->loc[0] += temp->width[0];
-      if (k & 2) temp->loc[1] += temp->width[1];
-      if (k & 1) temp->loc[2] += temp->width[2];
-      temp->depth = c->depth + 1;
-      temp->split = 0;
-      temp->h_max = 0.0;
-      temp->dx_max = 0.f;
-      temp->nodeID = c->nodeID;
-      temp->parent = c;
-      c->progeny[k] = temp;
+void space_split_mapper(void *map_data, int num_elements, void *extra_data) {
+
+  /* Unpack the inputs. */
+  struct space *s = (struct space *)extra_data;
+  struct cell *restrict cells_top = (struct cell *)map_data;
+  struct engine *e = s->e;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+
+    struct cell *c = &cells_top[ind];
+
+    const int count = c->count;
+    const int gcount = c->gcount;
+    int maxdepth = 0;
+    float h_max = 0.0f;
+    int ti_end_min = max_nr_timesteps, ti_end_max = 0;
+    struct cell *temp;
+    struct part *parts = c->parts;
+    struct gpart *gparts = c->gparts;
+    struct xpart *xparts = c->xparts;
+
+    /* Check the depth. */
+    while (c->depth > (maxdepth = s->maxdepth)) {
+      atomic_cas(&s->maxdepth, maxdepth, c->depth);
     }
 
-    /* Split the cell data. */
-    cell_split(c, c->parts - s->parts);
+    /* Split or let it be? */
+    if (count > space_splitsize || gcount > space_splitsize) {
+
+      /* No longer just a leaf. */
+      c->split = 1;
+
+      /* Create the cell's progeny. */
+      for (int k = 0; k < 8; k++) {
+        temp = space_getcell(s);
+        temp->count = 0;
+        temp->gcount = 0;
+        temp->ti_old = e->ti_current;
+        temp->loc[0] = c->loc[0];
+        temp->loc[1] = c->loc[1];
+        temp->loc[2] = c->loc[2];
+        temp->width[0] = c->width[0] / 2;
+        temp->width[1] = c->width[1] / 2;
+        temp->width[2] = c->width[2] / 2;
+        temp->dmin = c->dmin / 2;
+        if (k & 4) temp->loc[0] += temp->width[0];
+        if (k & 2) temp->loc[1] += temp->width[1];
+        if (k & 1) temp->loc[2] += temp->width[2];
+        temp->depth = c->depth + 1;
+        temp->split = 0;
+        temp->h_max = 0.0;
+        temp->dx_max = 0.f;
+        temp->nodeID = c->nodeID;
+        temp->parent = c;
+        temp->super = NULL;
+        temp->gsuper = NULL;
+        c->progeny[k] = temp;
+      }
+
+      /* Split the cell data. */
+      cell_split(c, c->parts - s->parts);
+
+      /* Remove any progeny with zero parts. */
+      for (int k = 0; k < 8; k++)
+        if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) {
+          space_recycle(s, c->progeny[k]);
+          c->progeny[k] = NULL;
+        } else {
+          space_split_mapper(c->progeny[k], 1, s);
+          h_max = max(h_max, c->progeny[k]->h_max);
+          ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
+          ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+          if (c->progeny[k]->maxdepth > maxdepth)
+            maxdepth = c->progeny[k]->maxdepth;
+        }
 
-    /* Remove any progeny with zero parts. */
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) {
-        space_recycle(s, c->progeny[k]);
-        c->progeny[k] = NULL;
-      } else {
-        space_do_split(s, c->progeny[k]);
-        h_max = fmaxf(h_max, c->progeny[k]->h_max);
-        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
-        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        if (c->progeny[k]->maxdepth > maxdepth)
-          maxdepth = c->progeny[k]->maxdepth;
+    }
+
+    /* Otherwise, collect the data for this cell. */
+    else {
+
+      /* Clear the progeny. */
+      bzero(c->progeny, sizeof(struct cell *) * 8);
+      c->split = 0;
+      maxdepth = c->depth;
+
+      /* Get dt_min/dt_max. */
+      for (int k = 0; k < count; k++) {
+        struct part *p = &parts[k];
+        struct xpart *xp = &xparts[k];
+        const float h = p->h;
+        const int ti_end = p->ti_end;
+        xp->x_diff[0] = 0.f;
+        xp->x_diff[1] = 0.f;
+        xp->x_diff[2] = 0.f;
+        if (h > h_max) h_max = h;
+        if (ti_end < ti_end_min) ti_end_min = ti_end;
+        if (ti_end > ti_end_max) ti_end_max = ti_end;
+      }
+      for (int k = 0; k < gcount; k++) {
+        struct gpart *gp = &gparts[k];
+        const int ti_end = gp->ti_end;
+        gp->x_diff[0] = 0.f;
+        gp->x_diff[1] = 0.f;
+        gp->x_diff[2] = 0.f;
+        if (ti_end < ti_end_min) ti_end_min = ti_end;
+        if (ti_end > ti_end_max) ti_end_max = ti_end;
       }
+    }
 
     /* Set the values for this cell. */
     c->h_max = h_max;
@@ -1299,61 +1359,24 @@ void space_do_split(struct space *s, struct cell *c) {
     c->ti_end_max = ti_end_max;
     c->maxdepth = maxdepth;
 
+    /* Set ownership according to the start of the parts array. */
+    if (s->nr_parts > 0)
+      c->owner =
+          ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts;
+    else if (s->nr_gparts > 0)
+      c->owner = ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues /
+                 s->nr_gparts;
+    else
+      c->owner = 0; /* Ok, there is really nothing on this rank... */
   }
-
-  /* Otherwise, collect the data for this cell. */
-  else {
-
-    /* Clear the progeny. */
-    bzero(c->progeny, sizeof(struct cell *) * 8);
-    c->split = 0;
-    c->maxdepth = c->depth;
-
-    /* Get dt_min/dt_max. */
-    for (int k = 0; k < count; k++) {
-      struct part *p = &parts[k];
-      struct xpart *xp = &xparts[k];
-      const float h = p->h;
-      const int ti_end = p->ti_end;
-      xp->x_diff[0] = 0.f;
-      xp->x_diff[1] = 0.f;
-      xp->x_diff[2] = 0.f;
-      if (h > h_max) h_max = h;
-      if (ti_end < ti_end_min) ti_end_min = ti_end;
-      if (ti_end > ti_end_max) ti_end_max = ti_end;
-    }
-    for (int k = 0; k < gcount; k++) {
-      struct gpart *gp = &gparts[k];
-      const int ti_end = gp->ti_end;
-      gp->x_diff[0] = 0.f;
-      gp->x_diff[1] = 0.f;
-      gp->x_diff[2] = 0.f;
-      if (ti_end < ti_end_min) ti_end_min = ti_end;
-      if (ti_end > ti_end_max) ti_end_max = ti_end;
-    }
-    c->h_max = h_max;
-    c->ti_end_min = ti_end_min;
-    c->ti_end_max = ti_end_max;
-  }
-
-  /* Set ownership according to the start of the parts array. */
-  if (s->nr_parts > 0)
-    c->owner =
-        ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts;
-  else if (s->nr_gparts > 0)
-    c->owner =
-        ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts;
-  else
-    c->owner = 0; /* Ok, there is really nothing on this rank... */
 }
 
 /**
- * @brief Return a used cell to the cell buffer.
+ * @brief Return a used cell to the buffer od unused sub-cells.
  *
  * @param s The #space.
  * @param c The #cell.
  */
-
 void space_recycle(struct space *s, struct cell *c) {
 
   /* Lock the space. */
@@ -1369,8 +1392,8 @@ void space_recycle(struct space *s, struct cell *c) {
   bzero(c, sizeof(struct cell));
 
   /* Hook this cell into the buffer. */
-  c->next = s->cells_new;
-  s->cells_new = c;
+  c->next = s->cells_sub;
+  s->cells_sub = c;
   s->tot_cells -= 1;
 
   /* Unlock the space. */
@@ -1378,39 +1401,42 @@ void space_recycle(struct space *s, struct cell *c) {
 }
 
 /**
- * @brief Get a new empty cell.
+ * @brief Get a new empty (sub-)#cell.
+ *
+ * If there are cells in the buffer, use the one at the end of the linked list.
+ * If we have no cells, allocate a new chunk of memory and pick one from there.
  *
  * @param s The #space.
  */
-
 struct cell *space_getcell(struct space *s) {
 
-  struct cell *c;
-  int k;
-
   /* Lock the space. */
   lock_lock(&s->lock);
 
   /* Is the buffer empty? */
-  if (s->cells_new == NULL) {
-    if (posix_memalign((void *)&s->cells_new, 64,
+  if (s->cells_sub == NULL) {
+    if (posix_memalign((void *)&s->cells_sub, cell_align,
                        space_cellallocchunk * sizeof(struct cell)) != 0)
       error("Failed to allocate more cells.");
-    bzero(s->cells_new, space_cellallocchunk * sizeof(struct cell));
-    for (k = 0; k < space_cellallocchunk - 1; k++)
-      s->cells_new[k].next = &s->cells_new[k + 1];
-    s->cells_new[space_cellallocchunk - 1].next = NULL;
+
+    /* Zero everything for good measure */
+    bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell));
+
+    /* Constructed a linked list */
+    for (int k = 0; k < space_cellallocchunk - 1; k++)
+      s->cells_sub[k].next = &s->cells_sub[k + 1];
+    s->cells_sub[space_cellallocchunk - 1].next = NULL;
   }
 
   /* Pick off the next cell. */
-  c = s->cells_new;
-  s->cells_new = c->next;
+  struct cell *c = s->cells_sub;
+  s->cells_sub = c->next;
   s->tot_cells += 1;
 
   /* Unlock the space. */
   lock_unlock_blind(&s->lock);
 
-  /* Init some things in the cell. */
+  /* Init some things in the cell we just got. */
   bzero(c, sizeof(struct cell));
   c->nodeID = -1;
   if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0)
@@ -1446,6 +1472,23 @@ void space_init_parts(struct space *s) {
   }
 }
 
+/**
+ * @brief Initialises all the extra particle data
+ *
+ * Calls cooling_init_xpart() on all the particles
+ */
+void space_init_xparts(struct space *s) {
+
+  const size_t nr_parts = s->nr_parts;
+  struct part *restrict p = s->parts;
+  struct xpart *restrict xp = s->xparts;
+
+  for (size_t i = 0; i < nr_parts; ++i) {
+
+    cooling_init_part(&p[i], &xp[i]);
+  }
+}
+
 /**
  * @brief Initialises all the g-particles by setting them into a valid state
  *
@@ -1492,7 +1535,6 @@ void space_init_gparts(struct space *s) {
  * parts with a cutoff below half the cell width are then split
  * recursively.
  */
-
 void space_init(struct space *s, const struct swift_params *params,
                 double dim[3], struct part *parts, struct gpart *gparts,
                 size_t Npart, size_t Ngpart, int periodic, int gravity,
@@ -1515,7 +1557,6 @@ void space_init(struct space *s, const struct swift_params *params,
   s->gparts = gparts;
   s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length");
   s->nr_queues = 1; /* Temporary value until engine construction */
-  s->size_parts_foreign = 0;
 
   /* Get the constants for the scheduler */
   space_maxsize = parser_get_opt_param_int(params, "Scheduler:cell_max_size",
@@ -1570,13 +1611,13 @@ void space_init(struct space *s, const struct swift_params *params,
 
     /* Check that all the part positions are reasonable, wrap if periodic. */
     if (periodic) {
-      for (int k = 0; k < Npart; k++)
+      for (size_t k = 0; k < Npart; k++)
         for (int j = 0; j < 3; j++) {
           while (parts[k].x[j] < 0) parts[k].x[j] += dim[j];
           while (parts[k].x[j] >= dim[j]) parts[k].x[j] -= dim[j];
         }
     } else {
-      for (int k = 0; k < Npart; k++)
+      for (size_t k = 0; k < Npart; k++)
         for (int j = 0; j < 3; j++)
           if (parts[k].x[j] < 0 || parts[k].x[j] >= dim[j])
             error("Not all particles are within the specified domain.");
@@ -1584,13 +1625,13 @@ void space_init(struct space *s, const struct swift_params *params,
 
     /* Same for the gparts */
     if (periodic) {
-      for (int k = 0; k < Ngpart; k++)
+      for (size_t k = 0; k < Ngpart; k++)
         for (int j = 0; j < 3; j++) {
           while (gparts[k].x[j] < 0) gparts[k].x[j] += dim[j];
           while (gparts[k].x[j] >= dim[j]) gparts[k].x[j] -= dim[j];
         }
     } else {
-      for (int k = 0; k < Ngpart; k++)
+      for (size_t k = 0; k < Ngpart; k++)
         for (int j = 0; j < 3; j++)
           if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j])
             error("Not all g-particles are within the specified domain.");
@@ -1607,6 +1648,7 @@ void space_init(struct space *s, const struct swift_params *params,
 
   /* Set the particles in a state where they are ready for a run */
   space_init_parts(s);
+  space_init_xparts(s);
   space_init_gparts(s);
 
   /* Init the space lock. */
@@ -1632,8 +1674,8 @@ void space_link_cleanup(struct space *s) {
  */
 void space_clean(struct space *s) {
 
-  for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells[i]);
-  free(s->cells);
+  for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]);
+  free(s->cells_top);
   free(s->parts);
   free(s->xparts);
   free(s->gparts);
diff --git a/src/space.h b/src/space.h
index 6fe3681c85068979a555ff1d78e32ba7577cf3f0..72b17405f13766ad2ccc9d53712068f28172067b 100644
--- a/src/space.h
+++ b/src/space.h
@@ -37,11 +37,10 @@
 #include "space.h"
 
 /* Some constants. */
-#define space_maxdepth 10
 #define space_cellallocchunk 1000
 #define space_splitsize_default 400
 #define space_maxsize_default 8000000
-#define space_subsize_default 8000000
+#define space_subsize_default 64000000
 #define space_stretch 1.10f
 #define space_maxreldx 0.25f
 
@@ -53,86 +52,86 @@ extern int space_subsize;
 /* Map shift vector to sortlist. */
 extern const int sortlistID[27];
 
-/* Entry in a list of sorted indices. */
-struct entry {
-  float d;
-  int i;
-};
-
-/* The space in which the cells reside. */
+/**
+ * @brief The space in which the cells and particles reside.
+ */
 struct space {
 
-  /* Spatial extent. */
+  /*! Spatial extent. */
   double dim[3];
 
-  /* Cell widths. */
-  double width[3], iwidth[3];
+  /*! Is the space periodic? */
+  int periodic;
+
+  /*! Are we doing gravity? */
+  int gravity;
+
+  /*! Width of the top-level cells. */
+  double width[3];
+
+  /*! Inverse of the top-level cell width */
+  double iwidth[3];
 
-  /* The minimum and maximum cutoff radii. */
-  double h_max, cell_min;
+  /*! The minimum top-level cell width allowed. */
+  double cell_min;
 
-  /* Current maximum displacement for particles. */
+  /*! Current maximum displacement for particles. */
   float dx_max;
 
-  /* Number of cells. */
-  int nr_cells, tot_cells;
+  /*! Space dimensions in number of top-cells. */
+  int cdim[3];
 
-  /* Space dimensions in number of cells. */
-  int maxdepth, cdim[3];
+  /*! Maximal depth reached by the tree */
+  int maxdepth;
 
-  /* The (level 0) cells themselves. */
-  struct cell *cells;
+  /*! Number of top-level cells. */
+  int nr_cells;
 
-  /* Buffer of unused cells. */
-  struct cell *cells_new;
+  /*! Total number of cells (top- and sub-) */
+  int tot_cells;
 
-  /* The particle data (cells have pointers to this). */
-  struct part *parts;
-  struct xpart *xparts;
-  struct gpart *gparts;
+  /*! The (level 0) cells themselves. */
+  struct cell *cells_top;
 
-  /* The total number of parts in the space. */
+  /*! Buffer of unused cells for the sub-cells. */
+  struct cell *cells_sub;
+
+  /*! The total number of parts in the space. */
   size_t nr_parts, size_parts;
+
+  /*! The total number of g-parts in the space. */
   size_t nr_gparts, size_gparts;
 
-  /* Is the space periodic? */
-  int periodic;
+  /*! The particle data (cells have pointers to this). */
+  struct part *parts;
 
-  /* Are we doing gravity? */
-  int gravity;
+  /*! The extended particle data (cells have pointers to this). */
+  struct xpart *xparts;
 
-  /* General-purpose lock for this space. */
+  /*! The g-particle data (cells have pointers to this). */
+  struct gpart *gparts;
+
+  /*! General-purpose lock for this space. */
   swift_lock_type lock;
 
-  /* Number of queues in the system. */
+  /*! Number of queues in the system. */
   int nr_queues;
 
-  /* The associated engine. */
+  /*! The associated engine. */
   struct engine *e;
 
-  /* Buffers for parts that we will receive from foreign cells. */
+#ifdef WITH_MPI
+
+  /*! Buffers for parts that we will receive from foreign cells. */
   struct part *parts_foreign;
   size_t nr_parts_foreign, size_parts_foreign;
+
+  /*! Buffers for g-parts that we will receive from foreign cells. */
   struct gpart *gparts_foreign;
   size_t nr_gparts_foreign, size_gparts_foreign;
-};
 
-/* Interval stack necessary for parallel particle sorting. */
-struct qstack {
-  volatile ptrdiff_t i, j;
-  volatile int min, max;
-  volatile int ready;
-};
-struct parallel_sort {
-  struct part *parts;
-  struct gpart *gparts;
-  struct xpart *xparts;
-  int *ind;
-  struct qstack *stack;
-  unsigned int stack_size;
-  volatile unsigned int first, last, waiting;
+#endif
 };
-extern struct parallel_sort space_sort_struct;
 
 /* function prototypes. */
 void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
@@ -156,10 +155,14 @@ void space_map_parts_xparts(struct space *s,
                                         struct cell *c));
 void space_map_cells_post(struct space *s, int full,
                           void (*fun)(struct cell *c, void *data), void *data);
+void space_parts_sort_mapper(void *map_data, int num_elements,
+                             void *extra_data);
+void space_gparts_sort_mapper(void *map_data, int num_elements,
+                              void *extra_data);
 void space_rebuild(struct space *s, double h_max, int verbose);
 void space_recycle(struct space *s, struct cell *c);
 void space_split(struct space *s, struct cell *cells, int verbose);
-void space_do_split(struct space *s, struct cell *c);
+void space_split_mapper(void *map_data, int num_elements, void *extra_data);
 void space_do_parts_sort();
 void space_do_gparts_sort();
 void space_init_parts(struct space *s);
diff --git a/src/swift.h b/src/swift.h
index 7e3116c1de8bc8e6cc2f89d0d5cbe9771ffbf33a..3119adfd4b95639a84137e243c6e971d80ef3825 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -27,6 +27,7 @@
 #include "cell.h"
 #include "clocks.h"
 #include "const.h"
+#include "cooling.h"
 #include "cycle.h"
 #include "debug.h"
 #include "engine.h"
@@ -42,7 +43,7 @@
 #include "part.h"
 #include "partition.h"
 #include "physical_constants.h"
-#include "potentials.h"
+#include "potential.h"
 #include "queue.h"
 #include "runner.h"
 #include "scheduler.h"
diff --git a/src/task.c b/src/task.c
index 13dd47e6cbf68de4ea6cd8ba6b898ee41a06618d..8da526f57886fa68121061c8eae2316912e73a30 100644
--- a/src/task.c
+++ b/src/task.c
@@ -49,19 +49,21 @@
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
     "none",       "sort",    "self",          "pair",          "sub_self",
-    "sub_pair",   "init",    "ghost",         "drift",         "kick",
+    "sub_pair",   "init",    "ghost",         "extra_ghost",   "kick",
     "kick_fixdt", "send",    "recv",          "grav_gather_m", "grav_fft",
-    "grav_mm",    "grav_up", "grav_external", "part_sort",     "gpart_sort",
-    "split_cell", "rewait"};
+    "grav_mm",    "grav_up", "grav_external", "cooling"};
 
-const char *subtaskID_names[task_subtype_count] = {"none", "density", "force",
-                                                   "grav", "tend"};
+const char *subtaskID_names[task_subtype_count] = {
+    "none", "density", "gradient", "force", "grav", "tend"};
 
 /**
  * @brief Computes the overlap between the parts array of two given cells.
+ *
+ * @param ci The first #cell.
+ * @param cj The second #cell.
  */
 __attribute__((always_inline)) INLINE static size_t task_cell_overlap_part(
-    const struct cell *ci, const struct cell *cj) {
+    const struct cell *restrict ci, const struct cell *restrict cj) {
 
   if (ci == NULL || cj == NULL) return 0;
 
@@ -78,9 +80,12 @@ __attribute__((always_inline)) INLINE static size_t task_cell_overlap_part(
 
 /**
  * @brief Computes the overlap between the gparts array of two given cells.
+ *
+ * @param ci The first #cell.
+ * @param cj The second #cell.
  */
 __attribute__((always_inline)) INLINE static size_t task_cell_overlap_gpart(
-    const struct cell *ci, const struct cell *cj) {
+    const struct cell *restrict ci, const struct cell *restrict cj) {
 
   if (ci == NULL || cj == NULL) return 0;
 
@@ -111,6 +116,8 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
 
     case task_type_sort:
     case task_type_ghost:
+    case task_type_extra_ghost:
+    case task_type_cooling:
       return task_action_part;
       break;
 
@@ -121,6 +128,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       switch (t->subtype) {
 
         case task_subtype_density:
+        case task_subtype_gradient:
         case task_subtype_force:
           return task_action_part;
           break;
@@ -137,7 +145,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       break;
 
     case task_type_init:
-    case task_type_drift:
     case task_type_kick:
     case task_type_kick_fixdt:
     case task_type_send:
@@ -156,18 +163,15 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       return task_action_gpart;
       break;
 
-    case task_type_part_sort:
-    case task_type_gpart_sort:
-    case task_type_split_cell:
-    case task_type_rewait:
-      return task_action_none;
-      break;
-
     default:
-      error("Unknow task_action for task");
+      error("Unknown task_action for task");
       return task_action_none;
       break;
   }
+
+  /* Silence compile warnings */
+  error("Unknown task_action for task");
+  return task_action_none;
 }
 
 /**
@@ -177,7 +181,8 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
  * @param ta The first #task.
  * @param tb The second #task.
  */
-float task_overlap(const struct task *ta, const struct task *tb) {
+float task_overlap(const struct task *restrict ta,
+                   const struct task *restrict tb) {
 
   if (ta == NULL || tb == NULL) return 0.f;
 
diff --git a/src/task.h b/src/task.h
index a7cbf28c3d1c7bde45102e5ce85e18c5f736e343..4928dc00bcd7958efdd6987b5aec90ab4b3e92fa 100644
--- a/src/task.h
+++ b/src/task.h
@@ -23,15 +23,18 @@
 #ifndef SWIFT_TASK_H
 #define SWIFT_TASK_H
 
+#include "../config.h"
+
 /* Includes. */
+#include "align.h"
 #include "cell.h"
 #include "cycle.h"
 
-/* Some constants. */
-#define task_maxwait 3
-#define task_maxunlock 15
+#define task_align 128
 
-/* The different task types. */
+/**
+ * @brief The different task types.
+ */
 enum task_types {
   task_type_none = 0,
   task_type_sort,
@@ -41,7 +44,7 @@ enum task_types {
   task_type_sub_pair,
   task_type_init,
   task_type_ghost,
-  task_type_drift,
+  task_type_extra_ghost,
   task_type_kick,
   task_type_kick_fixdt,
   task_type_send,
@@ -51,26 +54,26 @@ enum task_types {
   task_type_grav_mm,
   task_type_grav_up,
   task_type_grav_external,
-  task_type_part_sort,
-  task_type_gpart_sort,
-  task_type_split_cell,
-  task_type_rewait,
+  task_type_cooling,
   task_type_count
-};
+} __attribute__((packed));
 
-extern const char *taskID_names[];
-
-/* The different task sub-types. */
+/**
+ * @brief The different task sub-types (for pairs, selfs and sub-tasks).
+ */
 enum task_subtypes {
   task_subtype_none = 0,
   task_subtype_density,
+  task_subtype_gradient,
   task_subtype_force,
   task_subtype_grav,
   task_subtype_tend,
   task_subtype_count
-};
+} __attribute__((packed));
 
-/* The kind of action the task perform */
+/**
+ * @brief The type of particles/objects this task acts upon in a given cell.
+ */
 enum task_actions {
   task_action_none,
   task_action_part,
@@ -80,30 +83,74 @@ enum task_actions {
   task_action_count
 };
 
+/**
+ * @brief Names of the task types.
+ */
+extern const char *taskID_names[];
+
+/**
+ * @brief Names of the task sub-types.
+ */
 extern const char *subtaskID_names[];
 
-/* Data of a task. */
+/**
+ * @brief A task to be run by the #scheduler.
+ */
 struct task {
 
-  enum task_types type;
-  enum task_subtypes subtype;
-  char skip, tight, implicit;
-  int flags, wait, rank, weight;
-
+  /*! Pointers to the cells this task acts upon */
   struct cell *ci, *cj;
 
-  void *buff;
+  /*! List of tasks unlocked by this one */
+  struct task **unlock_tasks;
+
+  /*! Start and end time of this task */
+  ticks tic, toc;
 
 #ifdef WITH_MPI
+
+  /*! Buffer for this task's communications */
+  void *buff;
+
+  /*! MPI request corresponding to this task */
   MPI_Request req;
+
 #endif
 
-  int rid, last_rid;
-  ticks tic, toc;
+  /*! Flags used to carry additional information (e.g. sort directions) */
+  int flags;
 
-  int nr_unlock_tasks;
-  struct task **unlock_tasks;
-};
+  /*! Rank of a task in the order */
+  int rank;
+
+  /*! Weight of the task */
+  int weight;
+
+  /*! ID of the queue or runner owning this task */
+  short int rid;
+
+  /*! Number of tasks unlocked by this one */
+  short int nr_unlock_tasks;
+
+  /*! Number of unsatisfied dependencies */
+  short int wait;
+
+  /*! Type of the task */
+  enum task_types type;
+
+  /*! Sub-type of the task (for the tasks that have one */
+  enum task_subtypes subtype;
+
+  /*! Should the scheduler skip this task ? */
+  char skip;
+
+  /*! Does this task require the particles to be tightly in the cell ? */
+  char tight;
+
+  /*! Is this task implicit (i.e. does not do anything) ? */
+  char implicit;
+
+} SWIFT_STRUCT_ALIGN;
 
 /* Function prototypes. */
 void task_unlock(struct task *t);
diff --git a/src/threadpool.c b/src/threadpool.c
new file mode 100644
index 0000000000000000000000000000000000000000..4ef75954b39603db0d442acc9be2bd95b39614d3
--- /dev/null
+++ b/src/threadpool.c
@@ -0,0 +1,166 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* This object's header. */
+#include "threadpool.h"
+
+/* Local headers. */
+#include "atomic.h"
+#include "error.h"
+
+void *threadpool_runner(void *data) {
+
+  /* Our threadpool. */
+  struct threadpool *tp = (struct threadpool *)data;
+
+  /* Main loop. */
+  while (1) {
+
+    /* Let the controller know that this thread is waiting. */
+    pthread_mutex_lock(&tp->thread_mutex);
+    tp->num_threads_waiting += 1;
+    if (tp->num_threads_waiting == tp->num_threads) {
+      pthread_cond_signal(&tp->control_cond);
+    }
+
+    /* Wait for the controller. */
+    pthread_cond_wait(&tp->thread_cond, &tp->thread_mutex);
+    tp->num_threads_waiting -= 1;
+    tp->num_threads_running += 1;
+    if (tp->num_threads_running == tp->num_threads) {
+      pthread_cond_signal(&tp->control_cond);
+    }
+    pthread_mutex_unlock(&tp->thread_mutex);
+
+    /* The index of the mapping task we will work on next. */
+    size_t task_ind;
+    while ((task_ind = atomic_add(&tp->map_data_count, tp->map_data_chunk)) <
+           tp->map_data_size) {
+      const int num_elements = task_ind + tp->map_data_chunk > tp->map_data_size
+                                   ? tp->map_data_size - task_ind
+                                   : tp->map_data_chunk;
+      tp->map_function((char *)tp->map_data + (tp->map_data_stride * task_ind),
+                       num_elements, tp->map_extra_data);
+    }
+  }
+}
+
+/**
+ * @brief Initialises the #threadpool with a given number of threads.
+ *
+ * @param tp The #threadpool.
+ * @param num_threads The number of threads.
+ */
+void threadpool_init(struct threadpool *tp, int num_threads) {
+
+  /* Initialize the thread counters. */
+  tp->num_threads = num_threads;
+  tp->num_threads_waiting = 0;
+
+  /* Init the threadpool mutexes. */
+  if (pthread_mutex_init(&tp->thread_mutex, NULL) != 0)
+    error("Failed to initialize mutexex.");
+  if (pthread_cond_init(&tp->control_cond, NULL) != 0 ||
+      pthread_cond_init(&tp->thread_cond, NULL) != 0)
+    error("Failed to initialize condition variables.");
+
+  /* Set the task counter to zero. */
+  tp->map_data_size = 0;
+  tp->map_data_count = 0;
+  tp->map_data_stride = 0;
+  tp->map_data_chunk = 0;
+  tp->map_function = NULL;
+
+  /* Allocate the threads. */
+  if ((tp->threads = (pthread_t *)malloc(sizeof(pthread_t) * num_threads)) ==
+      NULL) {
+    error("Failed to allocate thread array.");
+  }
+
+  /* Create and start the threads. */
+  pthread_mutex_lock(&tp->thread_mutex);
+  for (int k = 0; k < num_threads; k++) {
+    if (pthread_create(&tp->threads[k], NULL, &threadpool_runner, tp) != 0)
+      error("Failed to create threadpool runner thread.");
+  }
+
+  /* Wait for all the threads to be up and running. */
+  while (tp->num_threads_waiting < tp->num_threads) {
+    pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
+  }
+  pthread_mutex_unlock(&tp->thread_mutex);
+}
+
+/**
+ * @brief Map a function to an array of data in parallel using a #threadpool.
+ *
+ * The function @c map_function is called on each element of @c map_data
+ * in parallel.
+ *
+ * @param tp The #threadpool on which to run.
+ * @param map_function The function that will be applied to the map data.
+ * @param map_data The data on which the mapping function will be called.
+ * @param N Number of elements in @c map_data.
+ * @param stride Size, in bytes, of each element of @c map_data.
+ * @param chunk Number of map data elements to pass to the function at a time.
+ * @param extra_data Addtitional pointer that will be passed to the mapping
+ *        function, may contain additional data.
+ */
+void threadpool_map(struct threadpool *tp, threadpool_map_function map_function,
+                    void *map_data, size_t N, int stride, int chunk,
+                    void *extra_data) {
+
+  /* Set the map data and signal the threads. */
+  pthread_mutex_lock(&tp->thread_mutex);
+  tp->map_data_stride = stride;
+  tp->map_data_size = N;
+  tp->map_data_count = 0;
+  tp->map_data_chunk = chunk;
+  tp->map_function = map_function;
+  tp->map_data = map_data;
+  tp->map_extra_data = extra_data;
+  tp->num_threads_running = 0;
+  pthread_cond_broadcast(&tp->thread_cond);
+
+  /* Wait for all the threads to be up and running. */
+  while (tp->num_threads_running < tp->num_threads) {
+    pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
+  }
+
+  /* Wait for all threads to be done. */
+  while (tp->num_threads_waiting < tp->num_threads) {
+    pthread_cond_wait(&tp->control_cond, &tp->thread_mutex);
+  }
+  pthread_mutex_unlock(&tp->thread_mutex);
+}
+
+/**
+ * @brief Frees up the memory allocated for this #threadpool.
+ */
+void threadpool_clean(struct threadpool *tp) { free(tp->threads); }
diff --git a/src/threadpool.h b/src/threadpool.h
new file mode 100644
index 0000000000000000000000000000000000000000..76aa0c119610c4d540e117f046b286095a9c676d
--- /dev/null
+++ b/src/threadpool.h
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_THREADPOOL_H
+#define SWIFT_THREADPOOL_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <pthread.h>
+
+/* Function type for mappings. */
+typedef void (*threadpool_map_function)(void *map_data, int num_elements,
+                                        void *extra_data);
+
+/* Data of a threadpool. */
+struct threadpool {
+
+  /* Number of threads in this pool. */
+  int num_threads;
+
+  /* The threads themselves. */
+  pthread_t *threads;
+
+  /* This is where threads go to rest. */
+  pthread_mutex_t thread_mutex;
+  pthread_cond_t control_cond, thread_cond;
+
+  /* Current map data and count. */
+  void *map_data, *map_extra_data;
+  volatile size_t map_data_count, map_data_size, map_data_stride,
+      map_data_chunk;
+  volatile threadpool_map_function map_function;
+
+  /* Counter for the number of threads that are done. */
+  volatile int num_threads_waiting, num_threads_running;
+};
+
+/* Function prototypes. */
+void threadpool_init(struct threadpool *tp, int num_threads);
+void threadpool_map(struct threadpool *tp, threadpool_map_function map_function,
+                    void *map_data, size_t N, int stride, int chunk,
+                    void *extra_data);
+void threadpool_clean(struct threadpool *tp);
+
+#endif /* SWIFT_THREADPOOL_H */
diff --git a/src/timers.c b/src/timers.c
index b621d27c90902f06c3760cbef6a88237a2b3b95b..c2f3b35d75ea340082acf1a5ec334bb3543bab12 100644
--- a/src/timers.c
+++ b/src/timers.c
@@ -37,11 +37,11 @@ ticks timers[timer_count];
  * To reset all timers, use the mask #timers_mask_all.
  */
 
-void timers_reset(unsigned int mask) {
+void timers_reset(unsigned long long mask) {
 
   int k;
 
   /* Loop over the timers and set the masked ones to zero. */
   for (k = 0; k < timer_count; k++)
-    if (mask & (1 << k)) timers[k] = 0;
+    if (mask & (1ull << k)) timers[k] = 0;
 }
diff --git a/src/timers.h b/src/timers.h
index aa8455397daf1e88b709a8332e3ae63694991e94..b93a34df4d90251d4686afaef0be51b36dc9a25e 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -36,17 +36,21 @@ enum {
   timer_kick,
   timer_dosort,
   timer_doself_density,
+  timer_doself_gradient,
   timer_doself_force,
   timer_doself_grav_pp,
   timer_dopair_density,
+  timer_dopair_gradient,
   timer_dopair_force,
   timer_dopair_grav_pm,
   timer_dopair_grav_pp,
   timer_dograv_external,
   timer_dosub_self_density,
+  timer_dosub_self_gradient,
   timer_dosub_self_force,
   timer_dosub_self_grav,
   timer_dosub_pair_density,
+  timer_dosub_pair_gradient,
   timer_dosub_pair_force,
   timer_dosub_pair_grav,
   timer_dopair_subset,
@@ -57,6 +61,7 @@ enum {
   timer_qsteal,
   timer_runners,
   timer_step,
+  timer_do_cooling,
   timer_count,
 };
 
@@ -64,7 +69,7 @@ enum {
 extern ticks timers[timer_count];
 
 /* Mask for all timers. */
-#define timers_mask_all ((1 << timer_count) - 1)
+#define timers_mask_all ((1ull << timer_count) - 1)
 
 /* Define the timer macros. */
 #ifdef TIMER
@@ -74,7 +79,7 @@ extern ticks timers[timer_count];
 #define TIMER_TOC(t) timers_toc(t, tic)
 #define TIMER_TIC2 ticks tic2 = getticks();
 #define TIMER_TOC2(t) timers_toc(t, tic2)
-INLINE static ticks timers_toc(int t, ticks tic) {
+INLINE static ticks timers_toc(unsigned int t, ticks tic) {
   ticks d = (getticks() - tic);
   atomic_add(&timers[t], d);
   return d;
@@ -87,6 +92,6 @@ INLINE static ticks timers_toc(int t, ticks tic) {
 #endif
 
 /* Function prototypes. */
-void timers_reset(unsigned int mask);
+void timers_reset(unsigned long long mask);
 
 #endif /* SWIFT_TIMERS_H */
diff --git a/src/timestep.h b/src/timestep.h
index 569120cf9cf989b633da35529f7693c8f62a1910..599fb4762e11b08fc942fb02acbbf1970f477de4 100644
--- a/src/timestep.h
+++ b/src/timestep.h
@@ -68,16 +68,18 @@ __attribute__((always_inline)) INLINE static int get_integer_timestep(
 __attribute__((always_inline)) INLINE static int get_gpart_timestep(
     const struct gpart *restrict gp, const struct engine *restrict e) {
 
-  const float new_dt_external = gravity_compute_timestep_external(
-      e->external_potential, e->physical_constants, gp);
-  const float new_dt_self =
-      gravity_compute_timestep_self(e->physical_constants, gp);
+  const float new_dt_external = external_gravity_timestep(
+      e->time, e->external_potential, e->physical_constants, gp);
 
-  float new_dt = fminf(new_dt_external, new_dt_self);
+  /* const float new_dt_self = */
+  /*     gravity_compute_timestep_self(e->physical_constants, gp); */
+  const float new_dt_self = FLT_MAX;  // MATTHIEU
+
+  float new_dt = min(new_dt_external, new_dt_self);
 
   /* Limit timestep within the allowed range */
-  new_dt = fminf(new_dt, e->dt_max);
-  new_dt = fmaxf(new_dt, e->dt_min);
+  new_dt = min(new_dt, e->dt_max);
+  new_dt = max(new_dt, e->dt_min);
 
   /* Convert to integer time */
   const int new_dti =
@@ -100,21 +102,28 @@ __attribute__((always_inline)) INLINE static int get_part_timestep(
   /* Compute the next timestep (hydro condition) */
   const float new_dt_hydro = hydro_compute_timestep(p, xp, e->hydro_properties);
 
+  /* Compute the next timestep (cooling condition) */
+  float new_dt_cooling = FLT_MAX;
+  if (e->policy & engine_policy_cooling)
+    new_dt_cooling = cooling_timestep(e->cooling_func, e->physical_constants,
+                                      e->internalUnits, p);
+
   /* Compute the next timestep (gravity condition) */
   float new_dt_grav = FLT_MAX;
   if (p->gpart != NULL) {
 
-    const float new_dt_external = gravity_compute_timestep_external(
-        e->external_potential, e->physical_constants, p->gpart);
+    const float new_dt_external = external_gravity_timestep(
+        e->time, e->external_potential, e->physical_constants, p->gpart);
+
     /* const float new_dt_self = */
     /*     gravity_compute_timestep_self(e->physical_constants, p->gpart); */
     const float new_dt_self = FLT_MAX;  // MATTHIEU
 
-    new_dt_grav = fminf(new_dt_external, new_dt_self);
+    new_dt_grav = min(new_dt_external, new_dt_self);
   }
 
   /* Final time-step is minimum of hydro and gravity */
-  float new_dt = fminf(new_dt_hydro, new_dt_grav);
+  float new_dt = min(min(new_dt_hydro, new_dt_cooling), new_dt_grav);
 
   /* Limit change in h */
   const float dt_h_change =
@@ -122,11 +131,11 @@ __attribute__((always_inline)) INLINE static int get_part_timestep(
           ? fabsf(e->hydro_properties->log_max_h_change * p->h / p->force.h_dt)
           : FLT_MAX;
 
-  new_dt = fminf(new_dt, dt_h_change);
+  new_dt = min(new_dt, dt_h_change);
 
   /* Limit timestep within the allowed range */
-  new_dt = fminf(new_dt, e->dt_max);
-  new_dt = fmaxf(new_dt, e->dt_min);
+  new_dt = min(new_dt, e->dt_max);
+  new_dt = max(new_dt, e->dt_min);
 
   /* Convert to integer time */
   const int new_dti =
diff --git a/src/tools.c b/src/tools.c
index b64e17849081994e1969d5f8de0636768dcb729a..060bf1439f30dc6237938c060bc4ddc8d9be822b 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -442,43 +442,6 @@ void pairs_single_grav(double *dim, long long int pid,
       aabs[2]);
 }
 
-/**
- * @brief Test the density function by dumping it for two random parts.
- *
- * @param N number of intervals in [0,1].
- */
-void density_dump(int N) {
-
-  int k;
-  float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4];
-  struct part /**pi[4],  *pj[4],*/ Pi[4], Pj[4];
-
-  /* Init the interaction parameters. */
-  for (k = 0; k < 4; k++) {
-    Pi[k].mass = 1.0f;
-    Pi[k].rho = 0.0f;
-    Pi[k].density.wcount = 0.0f;
-    Pi[k].id = k;
-    Pj[k].mass = 1.0f;
-    Pj[k].rho = 0.0f;
-    Pj[k].density.wcount = 0.0f;
-    Pj[k].id = k + 4;
-    hi[k] = 1.0;
-    hj[k] = 1.0;
-  }
-
-  for (k = 0; k <= N; k++) {
-    r2[3] = r2[2];
-    r2[2] = r2[1];
-    r2[1] = r2[0];
-    r2[0] = ((float)k) / N;
-    Pi[0].density.wcount = 0;
-    Pj[0].density.wcount = 0;
-    runner_iact_density(r2[0], NULL, hi[0], hj[0], &Pi[0], &Pj[0]);
-    printf(" %e %e %e", r2[0], Pi[0].density.wcount, Pj[0].density.wcount);
-  }
-}
-
 /**
  * @brief Compute the force on a single particle brute-force.
  */
@@ -499,7 +462,7 @@ void engine_single_density(double *dim, long long int pid,
   hydro_init_part(&p);
 
   /* Loop over all particle pairs (force). */
-  for (int k = 0; k < N; k++) {
+  for (k = 0; k < N; k++) {
     if (parts[k].id == p.id) continue;
     for (int i = 0; i < 3; i++) {
       dx[i] = p.x[i] - parts[k].x[i];
@@ -520,7 +483,7 @@ void engine_single_density(double *dim, long long int pid,
   /* Dump the result. */
   hydro_end_density(&p, 0);
   message("part %lli (h=%e) has wcount=%e, rho=%e.", p.id, p.h,
-          p.density.wcount, p.rho);
+          p.density.wcount, hydro_get_density(&p));
   fflush(stdout);
 }
 
diff --git a/src/units.c b/src/units.c
index 5c262ae03639262dfa126b101402b8fbfe41259a..2241d441ec9af9b6d5083191e8f61010ebaccb20 100644
--- a/src/units.c
+++ b/src/units.c
@@ -316,6 +316,13 @@ void units_get_base_unit_exponants_array(float baseUnitsExp[5],
 
     case UNIT_CONV_TEMPERATURE:
       baseUnitsExp[UNIT_TEMPERATURE] = 1.f;
+      break;
+
+    case UNIT_CONV_VOLUME:
+      baseUnitsExp[UNIT_LENGTH] = 3.f;
+
+    case UNIT_CONV_INV_VOLUME:
+      baseUnitsExp[UNIT_LENGTH] = -3.f;
   }
 }
 
diff --git a/src/units.h b/src/units.h
index c67f6ebbab324e3c90ae86eb1ea11dcf013c5dc7..78fdf1c23c3c276607d5353ee3437d8eb1e96537 100644
--- a/src/units.h
+++ b/src/units.h
@@ -29,25 +29,25 @@
  * @brief The unit system used internally.
  *
  * This structure contains the conversion factors to the 7 cgs base units to the
- *internal units.
- * It is used everytime a conversion is performed or an i/o function is called.
- *
+ * internal units. It is used everytime a conversion is performed or an i/o
+ * function is called.
  **/
 struct UnitSystem {
-  double UnitMass_in_cgs; /*< Conversion factor from grams to internal mass
-                             units */
 
-  double UnitLength_in_cgs; /*< Conversion factor from centimeters to internal
-                               length units. */
+  /*! Conversion factor from grams to internal mass units */
+  double UnitMass_in_cgs;
+
+  /*! Conversion factor from centimeters to internal length unit */
+  double UnitLength_in_cgs;
 
-  double UnitTime_in_cgs; /*< Conversion factor from seconds to internal time
-                             units. */
+  /*! Conversion factor from seconds to internal time units */
+  double UnitTime_in_cgs;
 
-  double UnitCurrent_in_cgs; /*< Conversion factor from Ampere to internal
-                                current units. */
+  /*! Conversion factor from Ampere to internal current units */
+  double UnitCurrent_in_cgs;
 
-  double UnitTemperature_in_cgs; /*< Conversion factor from Kelvins to internal
-                                    temperature units. */
+  /*! Conversion factor from Kelvins to internal temperature units. */
+  double UnitTemperature_in_cgs;
 };
 
 /**
@@ -89,7 +89,9 @@ enum UnitConversionFactor {
   UNIT_CONV_MAGNETIC_FLUX,
   UNIT_CONV_MAGNETIC_FIELD,
   UNIT_CONV_MAGNETIC_INDUCTANCE,
-  UNIT_CONV_TEMPERATURE
+  UNIT_CONV_TEMPERATURE,
+  UNIT_CONV_VOLUME,
+  UNIT_CONV_INV_VOLUME
 };
 
 void units_init_cgs(struct UnitSystem*);
diff --git a/src/version.c b/src/version.c
index 68a051fa08c53c68319c1e785c0c6503afe18f4d..8bd94e5651dbc597fcd80bc585a47c6633ee3993 100644
--- a/src/version.c
+++ b/src/version.c
@@ -218,7 +218,7 @@ const char *hdf5_version(void) {
 #ifdef HAVE_HDF5
   unsigned int majnum, minnum, relnum;
   H5get_libversion(&majnum, &minnum, &relnum);
-  sprintf(version, "%i.%i.%i", majnum, minnum, relnum);
+  sprintf(version, "%u.%u.%u", majnum, minnum, relnum);
 #else
   sprintf(version, "Unknown version");
 #endif
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2adce2c90985b537a668adfccec0d5241113314e..136b7ad231947574a5459298e7fb85902028a3f4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -23,12 +23,17 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS
 # List of programs and scripts to run in the test suite
 TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \
         testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh  \
-        testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT
+        testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \
+        testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \
+        testMatrixInversion testThreadpool
 
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
 		 testSPHStep testPair test27cells test125cells testParser \
-                 testKernel testKernelGrav testFFT testInteractions testMaths testSymmetry
+                 testKernel testKernelGrav testFFT testInteractions testMaths \
+                 testSymmetry testThreadpool \
+                 testAdiabaticIndex testRiemannExact testRiemannTRRS \
+                 testRiemannHLLC testMatrixInversion
 
 # Sources for the individual programs
 testGreetings_SOURCES = testGreetings.c
@@ -61,7 +66,21 @@ testFFT_SOURCES = testFFT.c
 
 testInteractions_SOURCES = testInteractions.c
 
+testAdiabaticIndex_SOURCES = testAdiabaticIndex.c
+
+testRiemannExact_SOURCES = testRiemannExact.c
+
+testRiemannTRRS_SOURCES = testRiemannTRRS.c
+
+testRiemannHLLC_SOURCES = testRiemannHLLC.c
+
+testMatrixInversion_SOURCES = testMatrixInversion.c
+
+testThreadpool_SOURCES = testThreadpool.c
+
 # Files necessary for distribution
 EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
-	     test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \
-	     test125cells.sh testParserInput.yaml
+	     test27cells.sh test27cellsPerturbed.sh testParser.sh \
+	     test125cells.sh testParserInput.yaml difffloat.py \
+	     tolerance_125.dat tolerance_27_normal.dat tolerance_27_perturbed.dat \
+	     tolerance_pair_normal.dat tolerance_pair_perturbed.dat
diff --git a/tests/test125cells.c b/tests/test125cells.c
index 37d530fa7b3750a3304cb11868ac43e6df62781d..e666658f43de135e3e72521b52f2a688c596a6f6 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -99,6 +99,8 @@ void set_energy_state(struct part *part, enum pressure_field press, float size,
   part->u = pressure / (hydro_gamma_minus_one * density);
 #elif defined(MINIMAL_SPH)
   part->u = pressure / (hydro_gamma_minus_one * density);
+#elif defined(GIZMO_SPH)
+  part->primitives.P = pressure;
 #else
   error("Need to define pressure here !");
 #endif
@@ -127,7 +129,7 @@ void get_solution(const struct cell *main_cell, struct solution_part *solution,
                   float density, enum velocity_field vel,
                   enum pressure_field press, float size) {
 
-  for (size_t i = 0; i < main_cell->count; ++i) {
+  for (int i = 0; i < main_cell->count; ++i) {
 
     solution[i].id = main_cell->parts[i].id;
 
@@ -187,12 +189,29 @@ void get_solution(const struct cell *main_cell, struct solution_part *solution,
 void reset_particles(struct cell *c, enum velocity_field vel,
                      enum pressure_field press, float size, float density) {
 
-  for (size_t i = 0; i < c->count; ++i) {
+  for (int i = 0; i < c->count; ++i) {
 
     struct part *p = &c->parts[i];
 
     set_velocity(p, vel, size);
     set_energy_state(p, press, size, density);
+
+#if defined(GIZMO_SPH)
+    p->geometry.volume = p->conserved.mass / density;
+    p->primitives.rho = density;
+    p->primitives.v[0] = p->v[0];
+    p->primitives.v[1] = p->v[1];
+    p->primitives.v[2] = p->v[2];
+    p->conserved.momentum[0] = p->conserved.mass * p->v[0];
+    p->conserved.momentum[1] = p->conserved.mass * p->v[1];
+    p->conserved.momentum[2] = p->conserved.mass * p->v[2];
+    p->conserved.energy =
+        p->primitives.P / hydro_gamma_minus_one * p->geometry.volume +
+        0.5f * (p->conserved.momentum[0] * p->conserved.momentum[0] +
+                p->conserved.momentum[1] * p->conserved.momentum[1] +
+                p->conserved.momentum[2] * p->conserved.momentum[2]) /
+            p->conserved.mass;
+#endif
   }
 }
 
@@ -238,7 +257,12 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
         part->x[1] = offset[1] + size * (y + 0.5) / (float)n;
         part->x[2] = offset[2] + size * (z + 0.5) / (float)n;
         part->h = size * h / (float)n;
+
+#ifdef GIZMO_SPH
+        part->conserved.mass = density * volume / count;
+#else
         part->mass = density * volume / count;
+#endif
 
         set_velocity(part, vel, size);
         set_energy_state(part, press, size, density);
@@ -248,6 +272,24 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
         part->ti_end = 1;
 
         hydro_first_init_part(part, xpart);
+
+#if defined(GIZMO_SPH)
+        part->geometry.volume = part->conserved.mass / density;
+        part->primitives.rho = density;
+        part->primitives.v[0] = part->v[0];
+        part->primitives.v[1] = part->v[1];
+        part->primitives.v[2] = part->v[2];
+        part->conserved.momentum[0] = part->conserved.mass * part->v[0];
+        part->conserved.momentum[1] = part->conserved.mass * part->v[1];
+        part->conserved.momentum[2] = part->conserved.mass * part->v[2];
+        part->conserved.energy =
+            part->primitives.P / hydro_gamma_minus_one * part->geometry.volume +
+            0.5f * (part->conserved.momentum[0] * part->conserved.momentum[0] +
+                    part->conserved.momentum[1] * part->conserved.momentum[1] +
+                    part->conserved.momentum[2] * part->conserved.momentum[2]) /
+                part->conserved.mass;
+#endif
+
         ++part;
         ++xpart;
       }
@@ -304,7 +346,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
   fprintf(file, "# Main cell --------------------------------------------\n");
 
   /* Write main cell */
-  for (size_t pid = 0; pid < main_cell->count; pid++) {
+  for (int pid = 0; pid < main_cell->count; pid++) {
     fprintf(file,
             "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
             "%8.5f "
@@ -313,7 +355,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
             main_cell->parts[pid].x[1], main_cell->parts[pid].x[2],
             main_cell->parts[pid].v[0], main_cell->parts[pid].v[1],
             main_cell->parts[pid].v[2], main_cell->parts[pid].h,
-            main_cell->parts[pid].rho,
+            hydro_get_density(&main_cell->parts[pid]),
 #ifdef MINIMAL_SPH
             0.f,
 #else
@@ -343,7 +385,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
 
     fprintf(file, "# Solution ---------------------------------------------\n");
 
-    for (size_t pid = 0; pid < main_cell->count; pid++) {
+    for (int pid = 0; pid < main_cell->count; pid++) {
       fprintf(file,
               "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
               "%8.5f %8.5f "
@@ -466,7 +508,6 @@ int main(int argc, char *argv[]) {
   /* Build the infrastructure */
   struct space space;
   space.periodic = 0;
-  space.h_max = h;
 
   struct phys_const prog_const;
   prog_const.const_newton_G = 1.f;
@@ -478,6 +519,7 @@ int main(int argc, char *argv[]) {
   hp.CFL_condition = 0.1;
 
   struct engine engine;
+  bzero(&engine, sizeof(struct engine));
   engine.hydro_properties = &hp;
   engine.physical_constants = &prog_const;
   engine.s = &space;
@@ -523,7 +565,7 @@ int main(int argc, char *argv[]) {
 
   /* Start the test */
   ticks time = 0;
-  for (size_t i = 0; i < runs; ++i) {
+  for (size_t n = 0; n < runs; ++n) {
 
     const ticks tic = getticks();
 
@@ -601,7 +643,7 @@ int main(int argc, char *argv[]) {
     time += toc - tic;
 
     /* Dump if necessary */
-    if (i == 0) {
+    if (n == 0) {
       sprintf(outputFileName, "swift_dopair_125_%s.dat",
               outputFileNameExtension);
       dump_particle_fields(outputFileName, main_cell, solution, 0);
diff --git a/tests/test27cells.c b/tests/test27cells.c
index 3d1d56d9245f9a9aee9e705754c46bb22ba0ef0a..1a1ab88748d922b3e7fbb30a73a10809dca10863 100644
--- a/tests/test27cells.c
+++ b/tests/test27cells.c
@@ -104,7 +104,11 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
         }
         part->h = size * h / (float)n;
         part->id = ++(*partId);
+#ifdef GIZMO_SPH
+        part->conserved.mass = density * volume / count;
+#else
         part->mass = density * volume / count;
+#endif
         part->ti_begin = 0;
         part->ti_end = 1;
         ++part;
@@ -146,7 +150,7 @@ void clean_up(struct cell *ci) {
  * @brief Initializes all particles field to be ready for a density calculation
  */
 void zero_particle_fields(struct cell *c) {
-  for (size_t pid = 0; pid < c->count; pid++) {
+  for (int pid = 0; pid < c->count; pid++) {
     hydro_init_part(&c->parts[pid]);
   }
 }
@@ -155,7 +159,7 @@ void zero_particle_fields(struct cell *c) {
  * @brief Ends the loop by adding the appropriate coefficients
  */
 void end_calculation(struct cell *c) {
-  for (size_t pid = 0; pid < c->count; pid++) {
+  for (int pid = 0; pid < c->count; pid++) {
     hydro_end_density(&c->parts[pid], 1);
   }
 }
@@ -177,14 +181,15 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
   fprintf(file, "# Main cell --------------------------------------------\n");
 
   /* Write main cell */
-  for (size_t pid = 0; pid < main_cell->count; pid++) {
+  for (int pid = 0; pid < main_cell->count; pid++) {
     fprintf(file,
             "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
             "%13e %13e %13e\n",
             main_cell->parts[pid].id, main_cell->parts[pid].x[0],
             main_cell->parts[pid].x[1], main_cell->parts[pid].x[2],
             main_cell->parts[pid].v[0], main_cell->parts[pid].v[1],
-            main_cell->parts[pid].v[2], main_cell->parts[pid].rho,
+            main_cell->parts[pid].v[2],
+            hydro_get_density(&main_cell->parts[pid]),
 #if defined(GIZMO_SPH)
             0.f,
 #else
@@ -219,14 +224,14 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
                 "# Offset: [%2d %2d %2d] -----------------------------------\n",
                 i - 1, j - 1, k - 1);
 
-        for (size_t pjd = 0; pjd < cj->count; pjd++) {
+        for (int pjd = 0; pjd < cj->count; pjd++) {
           fprintf(
               file,
               "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
               "%13e %13e %13e\n",
               cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
               cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
-              cj->parts[pjd].v[2], cj->parts[pjd].rho,
+              cj->parts[pjd].v[2], hydro_get_density(&cj->parts[pjd]),
 #if defined(GIZMO_SPH)
               0.f,
 #else
@@ -339,7 +344,6 @@ int main(int argc, char *argv[]) {
   /* Build the infrastructure */
   struct space space;
   space.periodic = 0;
-  space.h_max = h;
 
   struct engine engine;
   engine.s = &space;
diff --git a/tests/testAdiabaticIndex.c b/tests/testAdiabaticIndex.c
new file mode 100644
index 0000000000000000000000000000000000000000..e0c8c4f54bd2d6e5ddadb25bc44b96f1ca19aad2
--- /dev/null
+++ b/tests/testAdiabaticIndex.c
@@ -0,0 +1,116 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "adiabatic_index.h"
+#include "error.h"
+
+/**
+ * @brief Check that a and b are consistent (up to some absolute error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(float a, float b, const char* s) {
+  if (fabsf(a - b) > 1.e-5f) {
+    error("Values are inconsistent: %g %g (%s)!", a, b, s);
+  } else {
+    message("Values are consistent: %g %g (%s).", a, b, s);
+  }
+}
+
+/**
+ * @brief Check that the pre-defined adiabatic index constants contain correct
+ * values
+ */
+void check_constants() {
+  float val;
+
+  val = 0.5 * (hydro_gamma + 1.0f) / hydro_gamma;
+  check_value(val, hydro_gamma_plus_one_over_two_gamma, "(gamma+1)/(2 gamma)");
+
+  val = 0.5 * (hydro_gamma - 1.0f) / hydro_gamma;
+  check_value(val, hydro_gamma_minus_one_over_two_gamma, "(gamma-1)/(2 gamma)");
+
+  val = (hydro_gamma - 1.0f) / (hydro_gamma + 1.0f);
+  check_value(val, hydro_gamma_minus_one_over_gamma_plus_one,
+              "(gamma-1)/(gamma+1)");
+
+  val = 2.0f / (hydro_gamma + 1.0f);
+  check_value(val, hydro_two_over_gamma_plus_one, "2/(gamma+1)");
+
+  val = 2.0f / (hydro_gamma - 1.0f);
+  check_value(val, hydro_two_over_gamma_minus_one, "2/(gamma-1)");
+
+  val = 0.5f * (hydro_gamma - 1.0f);
+  check_value(val, hydro_gamma_minus_one_over_two, "(gamma-1)/2");
+
+  val = 2.0f * hydro_gamma / (hydro_gamma - 1.0f);
+  check_value(val, hydro_two_gamma_over_gamma_minus_one, "(2 gamma)/(gamma-1)");
+
+  val = 1.0f / hydro_gamma;
+  check_value(val, hydro_one_over_gamma, "1/gamma");
+}
+
+/**
+ * @brief Check that the adiabatic index power functions return the correct
+ * values
+ */
+void check_functions() {
+  float val_a, val_b;
+  const float x = 0.4;
+
+  val_a = pow(x, -hydro_gamma);
+  val_b = pow_minus_gamma(x);
+  check_value(val_a, val_b, "x^(-gamma)");
+
+  val_a = pow(x, 2.0f / (hydro_gamma - 1.0f));
+  val_b = pow_two_over_gamma_minus_one(x);
+  check_value(val_a, val_b, "x^(2/(gamma-1))");
+
+  val_a = pow(x, 2.0f * hydro_gamma / (hydro_gamma - 1.0f));
+  val_b = pow_two_gamma_over_gamma_minus_one(x);
+  check_value(val_a, val_b, "x^((2 gamma)/(gamma-1))");
+
+  val_a = pow(x, 0.5f * (hydro_gamma - 1.0f) / hydro_gamma);
+  val_b = pow_gamma_minus_one_over_two_gamma(x);
+  check_value(val_a, val_b, "x^((gamma-1)/(2 gamma))");
+
+  val_a = pow(x, -0.5f * (hydro_gamma + 1.0f) / hydro_gamma);
+  val_b = pow_minus_gamma_plus_one_over_two_gamma(x);
+  check_value(val_a, val_b, "x^(-(gamma+1)/(2 gamma))");
+
+  val_a = pow(x, 1.0f / hydro_gamma);
+  val_b = pow_one_over_gamma(x);
+  check_value(val_a, val_b, "x^(1/gamma)");
+}
+
+/**
+ * @brief Check adiabatic index constants and power functions
+ */
+int main() {
+
+  /* check the values of the adiabatic index constants */
+  check_constants();
+
+  /* check the adiabatic index power functions */
+  check_functions();
+
+  return 0;
+}
diff --git a/tests/testInteractions.c b/tests/testInteractions.c
index 9d8dca79f3f84ea06e42d61390e68467a5f1b415..52ad0c54258848883a9025bbcd9d68133eddc4b9 100644
--- a/tests/testInteractions.c
+++ b/tests/testInteractions.c
@@ -173,7 +173,7 @@ void test_interactions(struct part *parts, int count,
 
   /* Dump state of particles before serial interaction. */
   dump_indv_particle_fields(serial_filename, &pi);
-  for (size_t i = 1; i < count; i++)
+  for (int i = 1; i < count; i++)
     dump_indv_particle_fields(serial_filename, &parts[i]);
 
   /* Make copy of pi to be used in vectorised version. */
@@ -206,7 +206,7 @@ void test_interactions(struct part *parts, int count,
 
   /* Dump result of serial interaction. */
   dump_indv_particle_fields(serial_filename, &pi);
-  for (size_t i = 1; i < count; i++)
+  for (int i = 1; i < count; i++)
     dump_indv_particle_fields(serial_filename, &parts[i]);
 
   /* Setup arrays for vector interaction. */
diff --git a/tests/testMatrixInversion.c b/tests/testMatrixInversion.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a45cd52d6f5d3ec96cc6d3f34fd683971f4cf19
--- /dev/null
+++ b/tests/testMatrixInversion.c
@@ -0,0 +1,125 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include "const.h"
+#include "dimension.h"
+#include "error.h"
+#include "tools.h"
+
+void setup_matrix(float A[3][3]) {
+  A[0][0] = random_uniform(-1.0, 1.0);
+  A[0][1] = random_uniform(-1.0, 1.0);
+  A[0][2] = random_uniform(-1.0, 1.0);
+  A[1][0] = random_uniform(-1.0, 1.0);
+  A[1][1] = random_uniform(-1.0, 1.0);
+  A[1][2] = random_uniform(-1.0, 1.0);
+  A[2][0] = random_uniform(-1.0, 1.0);
+  A[2][1] = random_uniform(-1.0, 1.0);
+  A[2][2] = random_uniform(-1.0, 1.0);
+}
+
+int is_unit_matrix(float A[3][3]) {
+  int check = 1;
+
+  check &= (fabsf(A[0][0] - 1.0f) < 1.e-6f);
+
+#if defined(HYDRO_DIMENSION_2D) && defined(HYDRO_DIMENSION_3D)
+  check &= (fabsf(A[0][1]) < 1.e-6f);
+  check &= (fabsf(A[1][0]) < 1.e-6f);
+  check &= (fabsf(A[1][1] - 1.0f) < 1.e-6f);
+#if defined(HYDRO_DIMENSION_3D)
+  check &= (fabsf(A[0][2]) < 1.e-6f);
+  check &= (fabsf(A[1][2]) < 1.e-6f);
+  check &= (fabsf(A[2][0]) < 1.e-6f);
+  check &= (fabsf(A[2][1]) < 1.e-6f);
+  check &= (fabsf(A[2][2] - 1.0f) < 1.e-6f);
+#endif  // 3D
+#endif  // 2D and 3D
+
+  return check;
+}
+
+void print_matrix(float A[3][3], const char* s) {
+  message("Matrix %s:", s);
+#if defined(HYDRO_DIMENSION_1D)
+  message("[%.3e]", A[0][0]);
+#elif defined(HYDRO_DIMENSION_2D)
+  message("[%.3e, %.3e]", A[0][0], A[0][1]);
+  message("[%.3e, %.3e]", A[1][0], A[1][1]);
+#elif defined(HYDRO_DIMENSION_3D)
+  message("[%.8e, %.8e, %.8e]", A[0][0], A[0][1], A[0][2]);
+  message("[%.8e, %.8e, %.8e]", A[1][0], A[1][1], A[1][2]);
+  message("[%.8e, %.8e, %.8e]", A[2][0], A[2][1], A[2][2]);
+#endif
+}
+
+void multiply_matrices(float A[3][3], float B[3][3], float C[3][3]) {
+#if defined(HYDRO_DIMENSION_1D)
+  C[0][0] = A[0][0] * B[0][0];
+#elif defined(HYDRO_DIMENSION_2D)
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 2; ++j) {
+      C[i][j] = 0.0f;
+      for (int k = 0; k < 2; ++k) {
+        C[i][j] += A[i][k] * B[k][j];
+      }
+    }
+  }
+#elif defined(HYDRO_DIMENSION_3D)
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      C[i][j] = 0.0f;
+      for (int k = 0; k < 3; ++k) {
+        C[i][j] += A[i][k] * B[k][j];
+      }
+    }
+  }
+#endif
+}
+
+int main() {
+
+  float A[3][3], B[3][3], C[3][3];
+  setup_matrix(A);
+
+  memcpy(B, A, 9 * sizeof(float));
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      if (A[i][j] != B[i][j]) {
+        error("Matrices not equal after copy!");
+      }
+    }
+  }
+
+  invert_dimension_by_dimension_matrix(A);
+
+  multiply_matrices(A, B, C);
+
+  if (!is_unit_matrix(C)) {
+    print_matrix(A, "A");
+    print_matrix(B, "B");
+    print_matrix(C, "C");
+    error("Inverted matrix is wrong!");
+  }
+
+  return 0;
+}
diff --git a/tests/testPair.c b/tests/testPair.c
index 8afc2250434de7fdfeaa19a36a213dd9ea79d861..efa1e628c2d57bf7922be8affdd5436ebca2f9cf 100644
--- a/tests/testPair.c
+++ b/tests/testPair.c
@@ -63,7 +63,11 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
         part->v[2] = random_uniform(-0.05, 0.05);
         part->h = size * h / (float)n;
         part->id = ++(*partId);
+#ifdef GIZMO_SPH
+        part->conserved.mass = density * volume / count;
+#else
         part->mass = density * volume / count;
+#endif
         part->ti_begin = 0;
         part->ti_end = 1;
         ++part;
@@ -105,7 +109,7 @@ void clean_up(struct cell *ci) {
  * @brief Initializes all particles field to be ready for a density calculation
  */
 void zero_particle_fields(struct cell *c) {
-  for (size_t pid = 0; pid < c->count; pid++) {
+  for (int pid = 0; pid < c->count; pid++) {
     hydro_init_part(&c->parts[pid]);
   }
 }
@@ -125,13 +129,13 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
 
   fprintf(file, "# ci --------------------------------------------\n");
 
-  for (size_t pid = 0; pid < ci->count; pid++) {
+  for (int pid = 0; pid < ci->count; pid++) {
     fprintf(file,
             "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
             "%13e %13e %13e\n",
             ci->parts[pid].id, ci->parts[pid].x[0], ci->parts[pid].x[1],
             ci->parts[pid].x[2], ci->parts[pid].v[0], ci->parts[pid].v[1],
-            ci->parts[pid].v[2], ci->parts[pid].rho,
+            ci->parts[pid].v[2], hydro_get_density(&ci->parts[pid]),
 #if defined(GIZMO_SPH)
             0.f,
 #else
@@ -149,13 +153,13 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
 
   fprintf(file, "# cj --------------------------------------------\n");
 
-  for (size_t pjd = 0; pjd < cj->count; pjd++) {
+  for (int pjd = 0; pjd < cj->count; pjd++) {
     fprintf(file,
             "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
             "%13e %13e %13e\n",
             cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
             cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
-            cj->parts[pjd].v[2], cj->parts[pjd].rho,
+            cj->parts[pjd].v[2], hydro_get_density(&cj->parts[pjd]),
 #if defined(GIZMO_SPH)
             0.f,
 #else
@@ -238,7 +242,6 @@ int main(int argc, char *argv[]) {
   }
 
   space.periodic = 0;
-  space.h_max = h;
 
   engine.s = &space;
   engine.time = 0.1f;
diff --git a/tests/testReading.c b/tests/testReading.c
index 5a9707d2705ed021996859f324310c4a4926730c..2ef32a5ef11c7e24a379ce5131df9cbea153fa7c 100644
--- a/tests/testReading.c
+++ b/tests/testReading.c
@@ -28,7 +28,7 @@ int main() {
   size_t Ngas = 0, Ngpart = 0;
   int periodic = -1;
   int flag_entropy_ICs = -1;
-  int i, j, k, n;
+  int i, j, k;
   double dim[3];
   struct part *parts = NULL;
   struct gpart *gparts = NULL;
@@ -55,14 +55,14 @@ int main() {
   assert(periodic == 1);
 
   /* Check particles */
-  for (n = 0; n < Ngas; ++n) {
+  for (size_t n = 0; n < Ngas; ++n) {
 
     /* Check that indices are in a reasonable range */
     unsigned long long index = parts[n].id;
     assert(index < Ngas);
 
     /* Check masses */
-    float mass = parts[n].mass;
+    float mass = hydro_get_mass(&parts[n]);
     float correct_mass = boxSize * boxSize * boxSize * rho / Ngas;
     assert(mass == correct_mass);
 
diff --git a/tests/testRiemannExact.c b/tests/testRiemannExact.c
new file mode 100644
index 0000000000000000000000000000000000000000..1943820339ba2ac06d194a17d2d450157ded1a31
--- /dev/null
+++ b/tests/testRiemannExact.c
@@ -0,0 +1,338 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <string.h>
+#include "error.h"
+#include "riemann/riemann_exact.h"
+#include "tools.h"
+
+int opposite(float a, float b) {
+  if ((a - b)) {
+    return fabs((a + b) / (a - b)) < 1.e-4;
+  } else {
+    return a == 0.0f;
+  }
+}
+
+int equal(float a, float b) {
+  if ((a + b)) {
+    return fabs((a - b) / (a + b)) < 1.e-4;
+  } else {
+    return a == 0.0f;
+  }
+}
+
+/**
+ * @brief Check that a and b are consistent (up to some error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(float a, float b, const char* s) {
+  if (fabsf(a - b) / fabsf(a + b) > 1.e-5f && fabsf(a - b) > 1.e-5f) {
+    error("Values are inconsistent: %g %g (%s)!", a, b, s);
+  } else {
+    message("Values are consistent: %g %g (%s).", a, b, s);
+  }
+}
+
+struct riemann_statevector {
+  /*! @brief Density */
+  float rho;
+
+  /*! @brief Fluid velocity */
+  float v;
+
+  /*! @brief Pressure */
+  float P;
+};
+
+/**
+ * @brief Check that the solution to the Riemann problem with given left and
+ * right state is consistent with the given expected solution
+ *
+ * @param WL Left state
+ * @param WR Right state
+ * @param Whalf Expected solution
+ * @param s String used to identify this check in messages
+ */
+void check_riemann_solution(struct riemann_statevector* WL,
+                            struct riemann_statevector* WR,
+                            struct riemann_statevector* Whalf, const char* s) {
+  float WLarr[5], WRarr[5], Whalfarr[5], n_unit[3];
+
+  n_unit[0] = 1.0f;
+  n_unit[1] = 0.0f;
+  n_unit[2] = 0.0f;
+
+  WLarr[0] = WL->rho;
+  WLarr[1] = WL->v;
+  WLarr[2] = 0.0f;
+  WLarr[3] = 0.0f;
+  WLarr[4] = WL->P;
+
+  WRarr[0] = WR->rho;
+  WRarr[1] = WR->v;
+  WRarr[2] = 0.0f;
+  WRarr[3] = 0.0f;
+  WRarr[4] = WR->P;
+
+  riemann_solver_solve(WLarr, WRarr, Whalfarr, n_unit);
+
+  message("Checking %s...", s);
+  check_value(Whalfarr[0], Whalf->rho, "rho");
+  check_value(Whalfarr[1], Whalf->v, "v");
+  check_value(Whalfarr[4], Whalf->P, "P");
+}
+
+/**
+ * @brief Check the exact Riemann solver on the Toro test problems
+ */
+void check_riemann_exact() {
+  struct riemann_statevector WL, WR, Whalf;
+
+  /* Test 1 */
+  WL.rho = 1.0f;
+  WL.v = 0.0f;
+  WL.P = 1.0f;
+  WR.rho = 0.125f;
+  WR.v = 0.0f;
+  WR.P = 0.1f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.47969f;
+  Whalf.v = 0.841194f;
+  Whalf.P = 0.293945f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.411437f;
+  Whalf.v = 0.953205f;
+  Whalf.P = 0.306011f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.534767f;
+  Whalf.v = 0.760062f;
+  Whalf.P = 0.285975f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 1");
+
+  /* Test 2 */
+  WL.rho = 1.0f;
+  WL.v = -2.0f;
+  WL.P = 0.4f;
+  WR.rho = 1.0f;
+  WR.v = 2.0f;
+  WR.P = 0.4f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.00617903f;
+  Whalf.v = 0.0f;
+  Whalf.P = 8.32249e-5f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.0257933f;
+  Whalf.v = 0.0f;
+  Whalf.P = 0.00304838f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.0f;
+  Whalf.v = 0.0f;
+  Whalf.P = 0.0f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 2");
+
+  /* Test 3 */
+  WL.rho = 1.0f;
+  WL.v = 0.0f;
+  WL.P = 1000.0f;
+  WR.rho = 1.0f;
+  WR.v = 0.0f;
+  WR.P = 0.01f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.615719f;
+  Whalf.v = 18.2812f;
+  Whalf.P = 445.626f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.563517f;
+  Whalf.v = 19.9735f;
+  Whalf.P = 465.453f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.656768f;
+  Whalf.v = 16.9572f;
+  Whalf.P = 431.345f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 3");
+
+  /* Test 4 */
+  WL.rho = 1.0f;
+  WL.v = 0.0f;
+  WL.P = 0.01f;
+  WR.rho = 1.0f;
+  WR.v = 0.0f;
+  WR.P = 100.0f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.61577f;
+  Whalf.v = -5.78022f;
+  Whalf.P = 44.5687f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.563567f;
+  Whalf.v = -6.31525f;
+  Whalf.P = 46.5508f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.656819f;
+  Whalf.v = -5.36146f;
+  Whalf.P = 43.1412f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 4");
+
+  /* Test 5 */
+  WL.rho = 5.99924f;
+  WL.v = 19.5975f;
+  WL.P = 460.894f;
+  WR.rho = 5.99242f;
+  WR.v = -6.19633f;
+  WR.P = 46.0950f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 12.743f;
+  Whalf.v = 8.56045f;
+  Whalf.P = 1841.82f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 5.99924f;
+  Whalf.v = 19.5975f;
+  Whalf.P = 460.894f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 11.5089f;
+  Whalf.v = 8.42099f;
+  Whalf.P = 2026.27f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 5");
+}
+
+/**
+ * @brief Check the symmetry of the TRRS Riemann solver
+ */
+void check_riemann_symmetry() {
+  float WL[5], WR[5], Whalf1[5], Whalf2[5], n_unit1[3], n_unit2[3], n_norm,
+      vij[3], totflux1[5], totflux2[5];
+
+  WL[0] = random_uniform(0.1f, 1.0f);
+  WL[1] = random_uniform(-10.0f, 10.0f);
+  WL[2] = random_uniform(-10.0f, 10.0f);
+  WL[3] = random_uniform(-10.0f, 10.0f);
+  WL[4] = random_uniform(0.1f, 1.0f);
+  WR[0] = random_uniform(0.1f, 1.0f);
+  WR[1] = random_uniform(-10.0f, 10.0f);
+  WR[2] = random_uniform(-10.0f, 10.0f);
+  WR[3] = random_uniform(-10.0f, 10.0f);
+  WR[4] = random_uniform(0.1f, 1.0f);
+
+  n_unit1[0] = random_uniform(-1.0f, 1.0f);
+  n_unit1[1] = random_uniform(-1.0f, 1.0f);
+  n_unit1[2] = random_uniform(-1.0f, 1.0f);
+
+  n_norm = sqrtf(n_unit1[0] * n_unit1[0] + n_unit1[1] * n_unit1[1] +
+                 n_unit1[2] * n_unit1[2]);
+  n_unit1[0] /= n_norm;
+  n_unit1[1] /= n_norm;
+  n_unit1[2] /= n_norm;
+
+  n_unit2[0] = -n_unit1[0];
+  n_unit2[1] = -n_unit1[1];
+  n_unit2[2] = -n_unit1[2];
+
+  riemann_solver_solve(WL, WR, Whalf1, n_unit1);
+  riemann_solver_solve(WR, WL, Whalf2, n_unit2);
+
+  if (!equal(Whalf1[0], Whalf2[0]) || !equal(Whalf1[1], Whalf2[1]) ||
+      !equal(Whalf1[2], Whalf2[2]) || !equal(Whalf1[3], Whalf2[3]) ||
+      !equal(Whalf1[4], Whalf2[4])) {
+    message(
+        "Solver asymmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0],
+        Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
+    message("Asymmetry in solution!\n");
+    /* This asymmetry is to be expected, since we do an iteration. Are the
+       results at least consistent? */
+    check_value(Whalf1[0], Whalf2[0], "Rho solution");
+    check_value(Whalf1[1], Whalf2[1], "V[0] solution");
+    check_value(Whalf1[2], Whalf2[2], "V[1] solution");
+    check_value(Whalf1[3], Whalf2[3], "V[2] solution");
+    check_value(Whalf1[4], Whalf2[4], "Pressure solution");
+  } else {
+    message(
+        "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0],
+        Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
+  }
+
+  vij[0] = random_uniform(-10.0f, 10.0f);
+  vij[1] = random_uniform(-10.0f, 10.0f);
+  vij[2] = random_uniform(-10.0f, 10.0f);
+
+  riemann_solve_for_flux(WL, WR, n_unit1, vij, totflux1);
+  riemann_solve_for_flux(WR, WL, n_unit2, vij, totflux2);
+
+  if (!opposite(totflux1[0], totflux2[0]) ||
+      !opposite(totflux1[1], totflux2[1]) ||
+      !opposite(totflux1[2], totflux2[2]) ||
+      !opposite(totflux1[3], totflux2[3]) ||
+      !opposite(totflux1[4], totflux2[4])) {
+    message(
+        "Flux solver asymmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
+        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+    message("Asymmetry in flux solution!");
+    /* This asymmetry is to be expected, since we do an iteration. Are the
+       results at least consistent? */
+    check_value(totflux1[0], totflux2[0], "Mass flux");
+    check_value(totflux1[1], totflux2[1], "Momentum[0] flux");
+    check_value(totflux1[2], totflux2[2], "Momentum[1] flux");
+    check_value(totflux1[3], totflux2[3], "Momentum[2] flux");
+    check_value(totflux1[4], totflux2[4], "Energy flux");
+  } else {
+    message(
+        "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
+        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+  }
+}
+
+/**
+ * @brief Check the exact Riemann solver
+ */
+int main() {
+
+  /* check the exact Riemann solver */
+  check_riemann_exact();
+
+  /* symmetry test */
+  int i;
+  for (i = 0; i < 100; ++i) check_riemann_symmetry();
+
+  return 0;
+}
diff --git a/tests/testRiemannHLLC.c b/tests/testRiemannHLLC.c
new file mode 100644
index 0000000000000000000000000000000000000000..4cf883b68efbcfd795d0b7894adb9e7265b14d14
--- /dev/null
+++ b/tests/testRiemannHLLC.c
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <string.h>
+#include "error.h"
+#include "riemann/riemann_hllc.h"
+#include "tools.h"
+
+int consistent_with_zero(float val) { return fabs(val) < 1.e-4; }
+
+/**
+ * @brief Check the symmetry of the TRRS Riemann solver
+ */
+void check_riemann_symmetry() {
+  float WL[5], WR[5], n_unit1[3], n_unit2[3], n_norm, vij[3], totflux1[5],
+      totflux2[5];
+
+  WL[0] = random_uniform(0.1f, 1.0f);
+  WL[1] = random_uniform(-10.0f, 10.0f);
+  WL[2] = random_uniform(-10.0f, 10.0f);
+  WL[3] = random_uniform(-10.0f, 10.0f);
+  WL[4] = random_uniform(0.1f, 1.0f);
+  WR[0] = random_uniform(0.1f, 1.0f);
+  WR[1] = random_uniform(-10.0f, 10.0f);
+  WR[2] = random_uniform(-10.0f, 10.0f);
+  WR[3] = random_uniform(-10.0f, 10.0f);
+  WR[4] = random_uniform(0.1f, 1.0f);
+
+  n_unit1[0] = random_uniform(-1.0f, 1.0f);
+  n_unit1[1] = random_uniform(-1.0f, 1.0f);
+  n_unit1[2] = random_uniform(-1.0f, 1.0f);
+
+  n_norm = sqrtf(n_unit1[0] * n_unit1[0] + n_unit1[1] * n_unit1[1] +
+                 n_unit1[2] * n_unit1[2]);
+  n_unit1[0] /= n_norm;
+  n_unit1[1] /= n_norm;
+  n_unit1[2] /= n_norm;
+
+  n_unit2[0] = -n_unit1[0];
+  n_unit2[1] = -n_unit1[1];
+  n_unit2[2] = -n_unit1[2];
+
+  vij[0] = random_uniform(-10.0f, 10.0f);
+  vij[1] = random_uniform(-10.0f, 10.0f);
+  vij[2] = random_uniform(-10.0f, 10.0f);
+
+  riemann_solve_for_flux(WL, WR, n_unit1, vij, totflux1);
+  riemann_solve_for_flux(WR, WL, n_unit2, vij, totflux2);
+
+  if (!consistent_with_zero(totflux1[0] + totflux2[0]) ||
+      !consistent_with_zero(totflux1[1] + totflux2[1]) ||
+      !consistent_with_zero(totflux1[2] + totflux2[2]) ||
+      !consistent_with_zero(totflux1[3] + totflux2[3]) ||
+      !consistent_with_zero(totflux1[4] + totflux2[4])) {
+    message(
+        "Flux solver asymmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
+        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+    error("Asymmetry in flux solution!");
+  } else {
+    message(
+        "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
+        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+  }
+}
+
+/**
+ * @brief Check the HLLC Riemann solver
+ */
+int main() {
+
+  int i;
+  /* symmetry test */
+  for (i = 0; i < 100; i++) check_riemann_symmetry();
+
+  return 0;
+}
diff --git a/tests/testRiemannTRRS.c b/tests/testRiemannTRRS.c
new file mode 100644
index 0000000000000000000000000000000000000000..18ecbdce9173f43674a63b21231322cb01620d29
--- /dev/null
+++ b/tests/testRiemannTRRS.c
@@ -0,0 +1,324 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Bert Vandenbroucke (bert.vandenbroucke@gmail.com).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <string.h>
+#include "error.h"
+#include "riemann/riemann_trrs.h"
+#include "tools.h"
+
+int opposite(float a, float b) {
+  if ((a - b)) {
+    return fabs((a + b) / (a - b)) < 1.e-4;
+  } else {
+    return a == 0.0f;
+  }
+}
+
+int equal(float a, float b) {
+  if ((a + b)) {
+    return fabs((a - b) / (a + b)) < 1.e-4;
+  } else {
+    return a == 0.0f;
+  }
+}
+
+/**
+ * @brief Check that a and b are consistent (up to some error)
+ *
+ * @param a First value
+ * @param b Second value
+ * @param s String used to identify this check in messages
+ */
+void check_value(float a, float b, const char* s) {
+  if (fabsf(a - b) / fabsf(a + b) > 1.e-5f && fabsf(a - b) > 1.e-5f) {
+    error("Values are inconsistent: %g %g (%s)!", a, b, s);
+  } else {
+    message("Values are consistent: %g %g (%s).", a, b, s);
+  }
+}
+
+struct riemann_statevector {
+  /*! @brief Density */
+  float rho;
+
+  /*! @brief Fluid velocity */
+  float v;
+
+  /*! @brief Pressure */
+  float P;
+};
+
+/**
+ * @brief Check that the solution to the Riemann problem with given left and
+ * right state is consistent with the given expected solution
+ *
+ * @param WL Left state
+ * @param WR Right state
+ * @param Whalf Expected solution
+ * @param s String used to identify this check in messages
+ */
+void check_riemann_solution(struct riemann_statevector* WL,
+                            struct riemann_statevector* WR,
+                            struct riemann_statevector* Whalf, const char* s) {
+  float WLarr[5], WRarr[5], Whalfarr[5], n_unit[3];
+
+  n_unit[0] = 1.0f;
+  n_unit[1] = 0.0f;
+  n_unit[2] = 0.0f;
+
+  WLarr[0] = WL->rho;
+  WLarr[1] = WL->v;
+  WLarr[2] = 0.0f;
+  WLarr[3] = 0.0f;
+  WLarr[4] = WL->P;
+
+  WRarr[0] = WR->rho;
+  WRarr[1] = WR->v;
+  WRarr[2] = 0.0f;
+  WRarr[3] = 0.0f;
+  WRarr[4] = WR->P;
+
+  riemann_solver_solve(WLarr, WRarr, Whalfarr, n_unit);
+
+  message("Checking %s...", s);
+  check_value(Whalfarr[0], Whalf->rho, "rho");
+  check_value(Whalfarr[1], Whalf->v, "v");
+  check_value(Whalfarr[4], Whalf->P, "P");
+}
+
+/**
+ * @brief Check the TRRS Riemann solver on the Toro test problems
+ */
+void check_riemann_trrs() {
+  struct riemann_statevector WL, WR, Whalf;
+
+  /* Test 1 */
+  WL.rho = 1.0f;
+  WL.v = 0.0f;
+  WL.P = 1.0f;
+  WR.rho = 0.125f;
+  WR.v = 0.0f;
+  WR.P = 0.1f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.481167f;
+  Whalf.v = 0.838085f;
+  Whalf.P = 0.295456f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.41586f;
+  Whalf.v = 0.942546f;
+  Whalf.P = 0.310406f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.53478f;
+  Whalf.v = 0.760037f;
+  Whalf.P = 0.285989f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 1");
+
+  /* Test 2 */
+  WL.rho = 1.0f;
+  WL.v = -2.0f;
+  WL.P = 0.4f;
+  WR.rho = 1.0f;
+  WR.v = 2.0f;
+  WR.P = 0.4f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.00617903f;
+  Whalf.v = 0.0f;
+  Whalf.P = 8.32249e-5f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.0257933f;
+  Whalf.v = 0.0f;
+  Whalf.P = 0.00304838f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.013932f;
+  Whalf.v = 0.0f;
+  Whalf.P = 7.76405e-5f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 2");
+
+  /* Test 3 */
+  WL.rho = 1.0f;
+  WL.v = 0.0f;
+  WL.P = 1000.0f;
+  WR.rho = 1.0f;
+  WR.v = 0.0f;
+  WR.P = 0.01f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.919498f;
+  Whalf.v = 3.37884f;
+  Whalf.P = 869.464f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.941258f;
+  Whalf.v = 2.19945f;
+  Whalf.P = 922.454f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.902032f;
+  Whalf.v = 4.49417f;
+  Whalf.P = 813.662f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 3");
+
+  /* Test 4 */
+  WL.rho = 1.0f;
+  WL.v = 0.0f;
+  WL.P = 0.01f;
+  WR.rho = 1.0f;
+  WR.v = 0.0f;
+  WR.P = 100.0f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 0.857525f;
+  Whalf.v = -1.93434f;
+  Whalf.P = 77.4007f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 0.880649f;
+  Whalf.v = -1.45215f;
+  Whalf.P = 84.4119f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 0.843058f;
+  Whalf.v = -2.31417f;
+  Whalf.P = 71.0747f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 4");
+
+  /* Test 5 */
+  WL.rho = 5.99924f;
+  WL.v = 19.5975f;
+  WL.P = 460.894f;
+  WR.rho = 5.99242f;
+  WR.v = -6.19633f;
+  WR.P = 46.0950f;
+#if defined(HYDRO_GAMMA_5_3)
+  Whalf.rho = 5.99924f;
+  Whalf.v = 19.5975f;
+  Whalf.P = 460.894f;
+#elif defined(HYDRO_GAMMA_4_3)
+  Whalf.rho = 5.99924f;
+  Whalf.v = 19.5975f;
+  Whalf.P = 460.894f;
+#elif defined(HYDRO_GAMMA_2_1)
+  Whalf.rho = 5.99924f;
+  Whalf.v = 19.5975f;
+  Whalf.P = 460.894f;
+#else
+#error "Unsupported adiabatic index!"
+#endif
+  check_riemann_solution(&WL, &WR, &Whalf, "Test 5");
+}
+
+/**
+ * @brief Check the symmetry of the TRRS Riemann solver
+ */
+void check_riemann_symmetry() {
+  float WL[5], WR[5], Whalf1[5], Whalf2[5], n_unit1[3], n_unit2[3], n_norm,
+      vij[3], totflux1[5], totflux2[5];
+
+  WL[0] = random_uniform(0.1f, 1.0f);
+  WL[1] = random_uniform(-10.0f, 10.0f);
+  WL[2] = random_uniform(-10.0f, 10.0f);
+  WL[3] = random_uniform(-10.0f, 10.0f);
+  WL[4] = random_uniform(0.1f, 1.0f);
+  WR[0] = random_uniform(0.1f, 1.0f);
+  WR[1] = random_uniform(-10.0f, 10.0f);
+  WR[2] = random_uniform(-10.0f, 10.0f);
+  WR[3] = random_uniform(-10.0f, 10.0f);
+  WR[4] = random_uniform(0.1f, 1.0f);
+
+  n_unit1[0] = random_uniform(-1.0f, 1.0f);
+  n_unit1[1] = random_uniform(-1.0f, 1.0f);
+  n_unit1[2] = random_uniform(-1.0f, 1.0f);
+
+  n_norm = sqrtf(n_unit1[0] * n_unit1[0] + n_unit1[1] * n_unit1[1] +
+                 n_unit1[2] * n_unit1[2]);
+  n_unit1[0] /= n_norm;
+  n_unit1[1] /= n_norm;
+  n_unit1[2] /= n_norm;
+
+  n_unit2[0] = -n_unit1[0];
+  n_unit2[1] = -n_unit1[1];
+  n_unit2[2] = -n_unit1[2];
+
+  riemann_solver_solve(WL, WR, Whalf1, n_unit1);
+  riemann_solver_solve(WR, WL, Whalf2, n_unit2);
+
+  if (!equal(Whalf1[0], Whalf2[0]) || !equal(Whalf1[1], Whalf2[1]) ||
+      !equal(Whalf1[2], Whalf2[2]) || !equal(Whalf1[3], Whalf2[3]) ||
+      !equal(Whalf1[4], Whalf2[4])) {
+    message(
+        "Solver asymmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0],
+        Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
+    error("Asymmetry in solution!");
+  } else {
+    message(
+        "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0],
+        Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
+  }
+
+  vij[0] = random_uniform(-10.0f, 10.0f);
+  vij[1] = random_uniform(-10.0f, 10.0f);
+  vij[2] = random_uniform(-10.0f, 10.0f);
+
+  riemann_solve_for_flux(WL, WR, n_unit1, vij, totflux1);
+  riemann_solve_for_flux(WR, WL, n_unit2, vij, totflux2);
+
+  if (!opposite(totflux1[0], totflux2[0]) ||
+      !opposite(totflux1[1], totflux2[1]) ||
+      !opposite(totflux1[2], totflux2[2]) ||
+      !opposite(totflux1[3], totflux2[3]) ||
+      !opposite(totflux1[4], totflux2[4])) {
+    message(
+        "Solver asymmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
+        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+    error("Asymmetry in solution!");
+  } else {
+    message(
+        "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
+        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
+        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
+        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+  }
+}
+
+/**
+ * @brief Check the TRRS Riemann solver
+ */
+int main() {
+
+  /* check the TRRS Riemann solver */
+  check_riemann_trrs();
+
+  /* symmetry test */
+  int i;
+  for (i = 0; i < 100; i++) check_riemann_symmetry();
+
+  return 0;
+}
diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c
index fa49ed9d00c37393abd2f7e17ae628d79b4125f6..ff2ec841b27bd5ca6190517bc39f4da0c28fbc0c 100644
--- a/tests/testSPHStep.c
+++ b/tests/testSPHStep.c
@@ -102,10 +102,6 @@ int main() {
 
   int i, j, k, offset[3];
   struct part *p;
-  struct hydro_props hp;
-  hp.target_neighbours = 48.;
-  hp.delta_neighbours = 1.;
-  hp.max_smoothing_iterations = 30;
 
   int N = 10;
   float dim = 1.;
@@ -146,11 +142,24 @@ int main() {
   /* Create the infrastructure */
   struct space space;
   space.periodic = 0;
-  space.h_max = 1.;
+  space.cell_min = 1.;
+
+  struct phys_const prog_const;
+  prog_const.const_newton_G = 1.f;
+
+  struct hydro_props hp;
+  hp.target_neighbours = 48.f;
+  hp.delta_neighbours = 2.;
+  hp.max_smoothing_iterations = 1;
+  hp.CFL_condition = 0.1;
 
   struct engine e;
-  e.s = &space;
+  bzero(&e, sizeof(struct engine));
   e.hydro_properties = &hp;
+  e.physical_constants = &prog_const;
+  e.s = &space;
+  e.time = 0.1f;
+  e.ti_current = 1;
 
   struct runner r;
   r.e = &e;
diff --git a/tests/testSymmetry.c b/tests/testSymmetry.c
index eb3fab6becca08e9ef87e7c60cc8c04bd2a0290c..6469d314fb8b1438cc2c9737669c1a13a97bd803 100644
--- a/tests/testSymmetry.c
+++ b/tests/testSymmetry.c
@@ -46,6 +46,55 @@ int main(int argc, char *argv[]) {
   pi.id = 1;
   pj.id = 2;
 
+#if defined(GIZMO_SPH)
+  /* Give the primitive variables sensible values, since the Riemann solver does
+     not like negative densities and pressures */
+  pi.primitives.rho = random_uniform(0.1f, 1.0f);
+  pi.primitives.v[0] = random_uniform(-10.0f, 10.0f);
+  pi.primitives.v[1] = random_uniform(-10.0f, 10.0f);
+  pi.primitives.v[2] = random_uniform(-10.0f, 10.0f);
+  pi.primitives.P = random_uniform(0.1f, 1.0f);
+  pj.primitives.rho = random_uniform(0.1f, 1.0f);
+  pj.primitives.v[0] = random_uniform(-10.0f, 10.0f);
+  pj.primitives.v[1] = random_uniform(-10.0f, 10.0f);
+  pj.primitives.v[2] = random_uniform(-10.0f, 10.0f);
+  pj.primitives.P = random_uniform(0.1f, 1.0f);
+  /* make gradients zero */
+  pi.primitives.gradients.rho[0] = 0.0f;
+  pi.primitives.gradients.rho[1] = 0.0f;
+  pi.primitives.gradients.rho[2] = 0.0f;
+  pi.primitives.gradients.v[0][0] = 0.0f;
+  pi.primitives.gradients.v[0][1] = 0.0f;
+  pi.primitives.gradients.v[0][2] = 0.0f;
+  pi.primitives.gradients.v[1][0] = 0.0f;
+  pi.primitives.gradients.v[1][1] = 0.0f;
+  pi.primitives.gradients.v[1][2] = 0.0f;
+  pi.primitives.gradients.v[2][0] = 0.0f;
+  pi.primitives.gradients.v[2][1] = 0.0f;
+  pi.primitives.gradients.v[2][2] = 0.0f;
+  pi.primitives.gradients.P[0] = 0.0f;
+  pi.primitives.gradients.P[1] = 0.0f;
+  pi.primitives.gradients.P[2] = 0.0f;
+  pj.primitives.gradients.rho[0] = 0.0f;
+  pj.primitives.gradients.rho[1] = 0.0f;
+  pj.primitives.gradients.rho[2] = 0.0f;
+  pj.primitives.gradients.v[0][0] = 0.0f;
+  pj.primitives.gradients.v[0][1] = 0.0f;
+  pj.primitives.gradients.v[0][2] = 0.0f;
+  pj.primitives.gradients.v[1][0] = 0.0f;
+  pj.primitives.gradients.v[1][1] = 0.0f;
+  pj.primitives.gradients.v[1][2] = 0.0f;
+  pj.primitives.gradients.v[2][0] = 0.0f;
+  pj.primitives.gradients.v[2][1] = 0.0f;
+  pj.primitives.gradients.v[2][2] = 0.0f;
+  pj.primitives.gradients.P[0] = 0.0f;
+  pj.primitives.gradients.P[1] = 0.0f;
+  pj.primitives.gradients.P[2] = 0.0f;
+  /* set time step to reasonable value */
+  pi.force.dt = 0.001;
+  pj.force.dt = 0.001;
+#endif
+
   /* Make an xpart companion */
   struct xpart xpi, xpj;
   bzero(&xpi, sizeof(struct xpart));
@@ -100,12 +149,54 @@ int main(int argc, char *argv[]) {
   dx[2] = -dx[2];
   runner_iact_nonsym_force(r2, dx, pj2.h, pi2.h, &pj2, &pi2);
 
-  /* Check that the particles are the same */
+/* Check that the particles are the same */
+#if defined(GIZMO_SPH)
+  i_ok = 0;
+  j_ok = 0;
+  for (size_t i = 0; i < sizeof(struct part) / sizeof(float); ++i) {
+    float a = *(((float *)&pi) + i);
+    float b = *(((float *)&pi2) + i);
+    float c = *(((float *)&pj) + i);
+    float d = *(((float *)&pj2) + i);
+
+    int a_is_b;
+    if ((a + b)) {
+      a_is_b = (fabs((a - b) / (a + b)) > 1.e-4);
+    } else {
+      a_is_b = !(a == 0.0f);
+    }
+    int c_is_d;
+    if ((c + d)) {
+      c_is_d = (fabs((c - d) / (c + d)) > 1.e-4);
+    } else {
+      c_is_d = !(c == 0.0f);
+    }
+
+    if (a_is_b) {
+      message("%.8e, %.8e, %lu", a, b, i);
+    }
+    if (c_is_d) {
+      message("%.8e, %.8e, %lu", c, d, i);
+    }
+
+    i_ok |= a_is_b;
+    j_ok |= c_is_d;
+  }
+#else
   i_ok = memcmp(&pi, &pi2, sizeof(struct part));
   j_ok = memcmp(&pj, &pj2, sizeof(struct part));
+#endif
 
-  if (i_ok) error("Particles 'pi' do not match after force");
-  if (j_ok) error("Particles 'pj' do not match after force");
+  if (i_ok) {
+    printParticle_single(&pi, &xpi);
+    printParticle_single(&pi2, &xpi);
+    error("Particles 'pi' do not match after force");
+  }
+  if (j_ok) {
+    printParticle_single(&pj, &xpj);
+    printParticle_single(&pj2, &xpj);
+    error("Particles 'pj' do not match after force");
+  }
 
   return 0;
 }
diff --git a/tests/testThreadpool.c b/tests/testThreadpool.c
new file mode 100644
index 0000000000000000000000000000000000000000..aa65d533a29afbe4e7e8384fb887281822a31e58
--- /dev/null
+++ b/tests/testThreadpool.c
@@ -0,0 +1,85 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+// Standard includes.
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+// Local includes.
+#include "../src/atomic.h"
+#include "../src/threadpool.h"
+
+void map_function_first(void *map_data, int num_elements, void *extra_data) {
+  const int *inputs = (int *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    int input = inputs[ind];
+    usleep(rand() % 1000000);
+    printf("map_function_first: got input %i.\n", input);
+    fflush(stdout);
+  }
+}
+
+void map_function_second(void *map_data, int num_elements, void *extra_data) {
+  const int *inputs = (int *)map_data;
+  for (int ind = 0; ind < num_elements; ind++) {
+    int input = inputs[ind];
+    usleep(rand() % 1000000);
+    printf("map_function_second: got input %i.\n", input);
+    fflush(stdout);
+  }
+}
+
+int main(int argc, char *argv[]) {
+
+  // Some constants for this test.
+  const int num_threads = 16;
+  const int N = 20;
+  const int num_runs = 2;
+
+  // Create a threadpool with 8 threads.
+  struct threadpool tp;
+  threadpool_init(&tp, num_threads);
+
+  // Main loop.
+  for (int run = 0; run < num_runs; run++) {
+
+    // Run over a set of integers and print them.
+    int data[N];
+    for (int k = 0; k < N; k++) data[k] = k;
+    printf("processing integers from 0..%i.\n", N);
+    fflush(stdout);
+    threadpool_map(&tp, map_function_first, data, N, sizeof(int), 1, NULL);
+
+    // Do the same thing again, with less jobs than threads.
+    printf("processing integers from 0..%i.\n", N / 2);
+    fflush(stdout);
+    threadpool_map(&tp, map_function_second, data, N / 2, sizeof(int), 1, NULL);
+
+    // Do the same thing again, with a chunk size of two.
+    printf("processing integers from 0..%i.\n", N);
+    fflush(stdout);
+    threadpool_map(&tp, map_function_first, data, N, sizeof(int), 2, NULL);
+  }
+
+  /* Be clean */
+  threadpool_clean(&tp);
+
+  return 0;
+}
diff --git a/tests/testTimeIntegration.c b/tests/testTimeIntegration.c
index 03893daf3530df040e5a5630bc6dc1d930ddcd1b..f39adaee902ac3460b01857c002659b8bb2101f4 100644
--- a/tests/testTimeIntegration.c
+++ b/tests/testTimeIntegration.c
@@ -116,9 +116,6 @@ int main() {
 
     /* Kick... */
     runner_do_kick(&run, &c, 0);
-
-    /* Drift... */
-    runner_do_drift(&run, &c, 0);
   }
 
   /* Clean-up */