diff --git a/.gitignore b/.gitignore
index 7d6d9021f12ebfcb837d19c443362f1ecbc4077f..28a830818af36faad3f4278c6adcba5562b59ee7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,6 +47,8 @@ tests/brute_force_27_perturbed.dat
 tests/swift_dopair_27_perturbed.dat
 tests/brute_force_125_standard.dat
 tests/swift_dopair_125_standard.dat
+tests/brute_force_125_perturbed.dat
+tests/swift_dopair_125_perturbed.dat
 tests/testGreetings
 tests/testReading
 tests/input.hdf5
@@ -65,6 +67,7 @@ tests/parser_output.yml
 tests/test27cells.sh
 tests/test27cellsPerturbed.sh
 tests/test125cells.sh
+tests/test125cellsPerturbed.sh
 tests/testPair.sh
 tests/testPairPerturbed.sh
 tests/testParser.sh
diff --git a/configure.ac b/configure.ac
index 8a2d0f30ae297993b34153bc9a4c04085f4748f5..788bb57eed801c1a1dff2204b57b34c4fadf3b58 100644
--- a/configure.ac
+++ b/configure.ac
@@ -853,6 +853,7 @@ AC_CONFIG_FILES([tests/testPairPerturbed.sh], [chmod +x tests/testPairPerturbed.
 AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh])
 AC_CONFIG_FILES([tests/test27cellsPerturbed.sh], [chmod +x tests/test27cellsPerturbed.sh])
 AC_CONFIG_FILES([tests/test125cells.sh], [chmod +x tests/test125cells.sh])
+AC_CONFIG_FILES([tests/test125cellsPerturbed.sh], [chmod +x tests/test125cellsPerturbed.sh])
 AC_CONFIG_FILES([tests/testParser.sh], [chmod +x tests/testParser.sh])
 
 # Save the compilation options
diff --git a/examples/DiscPatch/HydroStatic/plot.py b/examples/DiscPatch/HydroStatic/plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..2de749f9e3b3c287390218e09ea347d660f9ce8a
--- /dev/null
+++ b/examples/DiscPatch/HydroStatic/plot.py
@@ -0,0 +1,103 @@
+################################################################################
+# This file is part of SWIFT.
+# Copyright (c) 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+################################################################################
+
+##
+# This script plots the Disc-Patch_*.hdf5 snapshots.
+# It takes two (optional) parameters: the counter value of the first and last
+# snapshot to plot (default: 0 81).
+##
+
+import numpy as np
+import h5py
+import matplotlib
+matplotlib.use("Agg")
+import pylab as pl
+import glob
+import sys
+
+# Parameters
+surface_density = 10.
+scale_height = 100.
+z_disc = 200.
+utherm = 20.2615290634
+gamma = 5. / 3.
+
+start = 0
+stop = 81
+if len(sys.argv) > 1:
+  start = int(sys.argv[1])
+if len(sys.argv) > 2:
+  stop = int(sys.argv[2])
+
+# Get the analytic solution for the density
+def get_analytic_density(x):
+  return 0.5 * surface_density / scale_height / \
+           np.cosh( (x - z_disc) / scale_height )**2
+
+# Get the analytic solution for the (isothermal) pressure
+def get_analytic_pressure(x):
+  return (gamma - 1.) * utherm * get_analytic_density(x)
+
+# Get the data fields to plot from the snapshot file with the given name:
+#  snapshot time, z-coord, density, pressure, velocity norm
+def get_data(name):
+  file = h5py.File(name, "r")
+  coords = np.array(file["/PartType0/Coordinates"])
+  rho = np.array(file["/PartType0/Density"])
+  u = np.array(file["/PartType0/InternalEnergy"])
+  v = np.array(file["/PartType0/Velocities"])
+
+  P = (gamma - 1.) * rho * u
+
+  vtot = np.sqrt( v[:,0]**2 + v[:,1]**2 + v[:,2]**2 )
+
+  return float(file["/Header"].attrs["Time"]), coords[:,2], rho, P, vtot
+
+# scan the folder for snapshot files and plot all of them (within the requested
+# range)
+for f in sorted(glob.glob("Disc-Patch_*.hdf5")):
+  num = int(f[-8:-5])
+  if num < start or num > stop:
+    continue
+
+  print "processing", f, "..."
+
+  zrange = np.linspace(0., 400., 1000)
+  time, z, rho, P, v = get_data(f)
+
+  fig, ax = pl.subplots(3, 1, sharex = True)
+
+  ax[0].plot(z, rho, "r.")
+  ax[0].plot(zrange, get_analytic_density(zrange), "k-")
+  ax[0].set_ylabel("density")
+
+  ax[1].plot(z, v, "r.")
+  ax[1].plot(zrange, np.zeros(len(zrange)), "k-")
+  ax[1].set_ylabel("velocity norm")
+
+  ax[2].plot(z, P, "r.")
+  ax[2].plot(zrange, get_analytic_pressure(zrange), "k-")
+  ax[2].set_xlim(0., 400.)
+  ax[2].set_xlabel("z")
+  ax[2].set_ylabel("pressure")
+
+  pl.suptitle("t = {0:.2f}".format(time))
+
+  pl.savefig("{name}.png".format(name = f[:-5]))
+  pl.close()
diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml
index 69010d06c6b2c02dd982c8f22c58778691d8bdca..7d07b2cef22f2a23b7d66af79b3ef1306df2de01 100644
--- a/examples/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_12/eagle_12.yml
@@ -31,8 +31,6 @@ Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration.
   theta:                 0.7      # Opening angle (Multipole acceptance criterion)
   epsilon:               0.0001   # Softening length (in internal units).
-  a_smooth:              1000.
-  r_cut:                 4.
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/HydrostaticHalo/density_profile.py b/examples/HydrostaticHalo/density_profile.py
index d0afd399f951cf3b727e869ca8571a3a802c2e8d..5248587ec343d3c0ffe2cef0cbd8716b9a1e055c 100644
--- a/examples/HydrostaticHalo/density_profile.py
+++ b/examples/HydrostaticHalo/density_profile.py
@@ -1,6 +1,27 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
 import numpy as np
 import h5py as h5
-import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
 import sys
 
 #for the plotting
@@ -46,7 +67,8 @@ for i in range(n_snaps):
     f = h5.File(filename,'r')
     coords_dset = f["PartType0/Coordinates"]
     coords = np.array(coords_dset)
-#translate coords by centre of box
+
+    #translate coords by centre of box
     header = f["Header"]
     snap_time = header.attrs["Time"]
     snap_time_cgs = snap_time * unit_time_cgs
@@ -63,58 +85,46 @@ for i in range(n_snaps):
     bin_width = bin_edges[1] - bin_edges[0]
     hist = np.histogram(r,bins = bin_edges)[0] # number of particles in each bin
 
-#find the mass in each radial bin
 
+    #find the mass in each radial bin
     mass_dset = f["PartType0/Masses"]
-#mass of each particles should be equal
+
+    #mass of each particles should be equal
     part_mass = np.array(mass_dset)[0]
     part_mass_cgs = part_mass * unit_mass_cgs
     part_mass_over_virial_mass = part_mass_cgs / M_vir_cgs 
 
     mass_hist = hist * part_mass_over_virial_mass
     radial_bin_mids = np.linspace(bin_width/2.,max_r - bin_width/2.,n_radial_bins)
-#volume in each radial bin
+
+    #volume in each radial bin
     volume = 4.*np.pi * radial_bin_mids**2 * bin_width
 
-#now divide hist by the volume so we have a density in each bin
 
+    #now divide hist by the volume so we have a density in each bin
     density = mass_hist / volume
 
-    ##read the densities
-
-    # density_dset = f["PartType0/Density"]
-    # density = np.array(density_dset)
-    # density_cgs = density * unit_mass_cgs / unit_length_cgs**3
-    # rho = density_cgs * r_vir_cgs**3 / M_vir_cgs
-
     t = np.linspace(10./n_radial_bins,10.0,1000)
     rho_analytic = t**(-2)/(4.*np.pi)
 
-    #calculate cooling radius
-
-    #r_cool_over_r_vir = np.sqrt((2.*(gamma - 1.)*lambda_cgs*M_vir_cgs*X_H**2)/(4.*np.pi*CONST_m_H_CGS**2*v_c_cgs**2*r_vir_cgs**3))*np.sqrt(snap_time_cgs)
 
-    #initial analytic density profile
-    
+    #initial analytic density profile    
     if (i == 0):
         r_0 = radial_bin_mids[0]
         rho_0 = density[0]
-
         rho_analytic_init = rho_0 * (radial_bin_mids/r_0)**(-2)
-    plt.plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell")
-    #plt.plot(t,rho_analytic,label = "Initial analytic density profile"
-    plt.xlabel(r"$r / r_{vir}$")
-    plt.ylabel(r"$\rho / \rho_{init})$")
-    plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
-    #plt.ylim((1.e-2,1.e1))
-    #plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,20),'r',label = "Cooling radius")
-    plt.xlim((radial_bin_mids[0],max_r))
-    plt.ylim((0,20))
-    plt.plot((0,max_r),(1,1))
-    #plt.xscale('log')
-    #plt.yscale('log')
-    plt.legend(loc = "upper right")
+
+    figure()
+    plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell")
+    #plot(t,rho_analytic,label = "Initial analytic density profile")
+    xlabel(r"$r / r_{vir}$")
+    ylabel(r"$\rho / \rho_{init}$")
+    title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
+    xlim((radial_bin_mids[0],max_r))
+    ylim((0,2))
+    plot((0,max_r),(1,1))
+    legend(loc = "upper right")
     plot_filename = "./plots/density_profile/density_profile_%03d.png" %i
-    plt.savefig(plot_filename,format = "png")
-    plt.close()
+    savefig(plot_filename,format = "png")
+    close()
 
diff --git a/examples/HydrostaticHalo/internal_energy_profile.py b/examples/HydrostaticHalo/internal_energy_profile.py
index ea52cf8fc5fd098a46f05eaa58494529a868000c..f1be049adb8e972f89fd9ffe86106b1b9f3b19dc 100644
--- a/examples/HydrostaticHalo/internal_energy_profile.py
+++ b/examples/HydrostaticHalo/internal_energy_profile.py
@@ -1,6 +1,27 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
 import numpy as np
 import h5py as h5
-import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
 import sys
 
 def do_binning(x,y,x_bin_edges):
@@ -48,8 +69,6 @@ unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
 v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
-#lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"])
-#X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"])
 header = f["Header"]
 N = header.attrs["NumPart_Total"][0]
 box_centre = np.array(header.attrs["BoxSize"])
@@ -64,7 +83,8 @@ for i in range(n_snaps):
     f = h5.File(filename,'r')
     coords_dset = f["PartType0/Coordinates"]
     coords = np.array(coords_dset)
-#translate coords by centre of box
+
+    #translate coords by centre of box
     header = f["Header"]
     snap_time = header.attrs["Time"]
     snap_time_cgs = snap_time * unit_time_cgs
@@ -75,11 +95,11 @@ for i in range(n_snaps):
     radius_cgs = radius*unit_length_cgs
     radius_over_virial_radius = radius_cgs / r_vir_cgs
 
-#get the internal energies
+    #get the internal energies
     u_dset = f["PartType0/InternalEnergy"]
     u = np.array(u_dset)
 
-#make dimensionless
+    #make dimensionless
     u /= v_c**2/(2. * (gamma - 1.))
     r = radius_over_virial_radius
 
@@ -90,21 +110,16 @@ for i in range(n_snaps):
     radial_bin_mids = np.linspace(bin_widths / 2. , max_r - bin_widths / 2. , n_radial_bins) 
     binned_u = u_totals / hist
 
-    #calculate cooling radius
-
-    #r_cool_over_r_vir = np.sqrt((2.*(gamma - 1.)*lambda_cgs*M_vir_cgs*X_H**2)/(4.*np.pi*CONST_m_H_CGS**2*v_c_cgs**2*r_vir_cgs**3))*np.sqrt(snap_time_cgs)
-
-    plt.plot(radial_bin_mids,binned_u,'ko',label = "Numerical solution")
-    #plt.plot((0,1),(1,1),label = "Analytic Solution")
-    #plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,2),'r',label = "Cooling radius")
-    plt.legend(loc = "lower right")
-    plt.xlabel(r"$r / r_{vir}$")
-    plt.ylabel(r"$u / (v_c^2 / (2(\gamma - 1)) $")
-    plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
-    plt.ylim((0,2))
+    figure()
+    plot(radial_bin_mids,binned_u,'ko',label = "Numerical solution")
+    legend(loc = "lower right")
+    xlabel(r"$r / r_{vir}$")
+    ylabel(r"$u / (v_c^2 / (2(\gamma - 1)) $")
+    title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
+    ylim((0,2))
     plot_filename = "./plots/internal_energy/internal_energy_profile_%03d.png" %i
-    plt.savefig(plot_filename,format = "png")
-    plt.close()
+    savefig(plot_filename,format = "png")
+    close()
 
 
         
diff --git a/examples/HydrostaticHalo/run.sh b/examples/HydrostaticHalo/run.sh
index d23ead6a67f43c9d19d76a797e72d050a3978d61..82584282559c1fceb0492aada671ff83fb74c924 100755
--- a/examples/HydrostaticHalo/run.sh
+++ b/examples/HydrostaticHalo/run.sh
@@ -1,11 +1,14 @@
 #!/bin/bash
 
 # Generate the initial conditions if they are not present.
-echo "Generating initial conditions for the isothermal potential box example..."
-python makeIC.py 100000
+if [ ! -e Hydrostatic.hdf5 ]
+then
+    echo "Generating initial conditions for the isothermal potential box example..."
+    python makeIC.py 100000
+fi
 
 # Run for 10 dynamical times
-../swift -g -s -t 2 hydrostatic.yml 2>&1 | tee output.log
+../swift -g -s -t 1 hydrostatic.yml 2>&1 | tee output.log
 
 echo "Plotting density profiles"
 mkdir plots
diff --git a/examples/HydrostaticHalo/test_energy_conservation.py b/examples/HydrostaticHalo/test_energy_conservation.py
index ca091050c4127d11a37a2cc7504e42d244031e25..8368d475813d248ca93c12e46737b062752ab779 100644
--- a/examples/HydrostaticHalo/test_energy_conservation.py
+++ b/examples/HydrostaticHalo/test_energy_conservation.py
@@ -1,6 +1,27 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
 import numpy as np
 import h5py as h5
-import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
 import sys
 
 n_snaps = int(sys.argv[1])
@@ -24,7 +45,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 header = f["Header"]
 N = header.attrs["NumPart_Total"][0]
@@ -45,7 +66,8 @@ for i in range(n_snaps):
     f = h5.File(filename,'r')
     coords_dset = f["PartType0/Coordinates"]
     coords = np.array(coords_dset)
-#translate coords by centre of box
+
+    #translate coords by centre of box
     header = f["Header"]
     snap_time = header.attrs["Time"]
     snap_time_cgs = snap_time * unit_time_cgs
@@ -73,7 +95,6 @@ for i in range(n_snaps):
     internal_energy_array = np.append(internal_energy_array,total_internal_energy)
 
 #put energies in units of v_c^2 and rescale by number of particles
-
 pe = potential_energy_array / (N*v_c**2)
 ke = kinetic_energy_array / (N*v_c**2)
 ie = internal_energy_array / (N*v_c**2)
@@ -82,14 +103,15 @@ te = pe + ke + ie
 dyn_time_cgs = r_vir_cgs / v_c_cgs
 time_array = time_array_cgs / dyn_time_cgs
 
-plt.plot(time_array,ke,label = "Kinetic Energy")
-plt.plot(time_array,pe,label = "Potential Energy")
-plt.plot(time_array,ie,label = "Internal Energy")
-plt.plot(time_array,te,label = "Total Energy")
-plt.legend(loc = "lower right")
-plt.xlabel(r"$t / t_{dyn}$")
-plt.ylabel(r"$E / v_c^2$")
-plt.title(r"$%d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(N,v_c))
-plt.ylim((-2,2))
-plt.savefig("energy_conservation.png",format = 'png')
+figure()
+plot(time_array,ke,label = "Kinetic Energy")
+plot(time_array,pe,label = "Potential Energy")
+plot(time_array,ie,label = "Internal Energy")
+plot(time_array,te,label = "Total Energy")
+legend(loc = "lower right")
+xlabel(r"$t / t_{dyn}$")
+ylabel(r"$E / v_c^2$")
+title(r"$%d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(N,v_c))
+ylim((-2,2))
+savefig("energy_conservation.png",format = 'png')
 
diff --git a/examples/HydrostaticHalo/velocity_profile.py b/examples/HydrostaticHalo/velocity_profile.py
index 9133195d942233514148aa419003ee0ab7923494..f8f607362846a323937a9203dab8bc228f52a149 100644
--- a/examples/HydrostaticHalo/velocity_profile.py
+++ b/examples/HydrostaticHalo/velocity_profile.py
@@ -1,6 +1,27 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
 import numpy as np
 import h5py as h5
-import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
 import sys
 
 def do_binning(x,y,x_bin_edges):
@@ -62,7 +83,8 @@ for i in range(n_snaps):
     f = h5.File(filename,'r')
     coords_dset = f["PartType0/Coordinates"]
     coords = np.array(coords_dset)
-#translate coords by centre of box
+
+    #translate coords by centre of box
     header = f["Header"]
     snap_time = header.attrs["Time"]
     snap_time_cgs = snap_time * unit_time_cgs
@@ -73,16 +95,15 @@ for i in range(n_snaps):
     radius_cgs = radius*unit_length_cgs
     radius_over_virial_radius = radius_cgs / r_vir_cgs
 
-#get the internal energies
+    #get the internal energies
     vel_dset = f["PartType0/Velocities"]
     vel = np.array(vel_dset)
 
-#make dimensionless
+    #make dimensionless
     vel /= v_c
     r = radius_over_virial_radius
 
     #find radial component of velocity
-
     v_r = np.zeros(r.size)
     for j in range(r.size):
         v_r[j] = -np.dot(coords[j,:],vel[j,:])/radius[j]
@@ -94,18 +115,13 @@ for i in range(n_snaps):
     radial_bin_mids = np.linspace(bin_widths / 2. , max_r - bin_widths / 2. , n_radial_bins) 
     binned_v_r = v_r_totals / hist
 
-    #calculate cooling radius
-
-    #r_cool_over_r_vir = np.sqrt((2.*(gamma - 1.)*lambda_cgs*M_vir_cgs*X_H**2)/(4.*np.pi*CONST_m_H_CGS**2*v_c_cgs**2*r_vir_cgs**3))*np.sqrt(snap_time_cgs)
-
-    plt.plot(radial_bin_mids,binned_v_r,'ko',label = "Average radial velocity in shell")
-    #plt.plot((0,1),(1,1),label = "Analytic Solution")
-    #plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,2),'r',label = "Cooling radius")
-    plt.legend(loc = "upper right")
-    plt.xlabel(r"$r / r_{vir}$")
-    plt.ylabel(r"$v_r / v_c$")
-    plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
-    plt.ylim((0,2))
+    figure()
+    plot(radial_bin_mids,binned_v_r,'ko',label = "Average radial velocity in shell")
+    legend(loc = "upper right")
+    xlabel(r"$r / r_{vir}$")
+    ylabel(r"$v_r / v_c$")
+    title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
+    ylim((-1,1))
     plot_filename = "./plots/radial_velocity_profile/velocity_profile_%03d.png" %i
-    plt.savefig(plot_filename,format = "png")
-    plt.close()
+    savefig(plot_filename,format = "png")
+    close()
diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py
index 8e032500016eb6cc8e0decc54968bb5b841d7f93..8f3cd943b3cf19c4ae231d125c5ef97d076e0e8e 100644
--- a/examples/UniformDMBox/makeIC.py
+++ b/examples/UniformDMBox/makeIC.py
@@ -26,7 +26,7 @@ from numpy import *
 # with a density of 1
 
 # Parameters
-periodic= 0           # 1 For periodic box
+periodic= 1           # 1 For periodic box
 boxSize = 1.
 rho = 1.
 L = int(sys.argv[1])  # Number of particles along one axis
diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml
index 8d9ec300164a7bf8f3df257c34ee44d4f77fe94e..cffd442a9a5b16d8e042e41caf9991fcf0e1202e 100644
--- a/examples/UniformDMBox/uniformBox.yml
+++ b/examples/UniformDMBox/uniformBox.yml
@@ -35,4 +35,4 @@ Statistics:
 
 # Parameters related to the initial conditions
 InitialConditions:
-  file_name:  ./uniformDMBox_100.hdf5     # The file to read
+  file_name:  ./uniformDMBox_50.hdf5     # The file to read
diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py
new file mode 100755
index 0000000000000000000000000000000000000000..04cd59feedba7ee41621ac0891d544c4aa294543
--- /dev/null
+++ b/examples/analyse_tasks.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python
+"""
+Usage:
+    analsyse_tasks.py [options] input.dat
+
+where input.dat is a thread info file for a step.  Use the '-y interval' flag
+of the swift command to create these.
+
+The output is an analysis of the task timings, including deadtime per thread
+and step, total amount of time spent for each task type, for the whole step
+and per thread and the minimum and maximum times spent per task type.
+
+This file is part of SWIFT.
+Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.collections as collections
+import matplotlib.ticker as plticker
+import pylab as pl
+import sys
+import argparse
+
+#  Handle the command line.
+parser = argparse.ArgumentParser(description="Analyse task dumps")
+
+parser.add_argument("input", help="Thread data file (-y output)")
+parser.add_argument("-v", "--verbose", dest="verbose",
+                    help="Verbose output (default: False)",
+                    default=False, action="store_true")
+
+args = parser.parse_args()
+infile = args.input
+
+#  Tasks and subtypes. Indexed as in tasks.h.
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
+             "init_grav", "ghost", "extra_ghost", "drift_part",
+             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
+             "grav_top_level", "grav_long_range", "grav_mm", "grav_down",
+             "cooling", "sourceterms", "count"]
+
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
+            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
+
+#  Read input.
+data = pl.loadtxt( infile )
+
+maxthread = int(max(data[:,0])) + 1
+print "# Maximum thread id:", maxthread
+
+#  Recover the start and end time
+full_step = data[0,:]
+tic_step = int(full_step[4])
+toc_step = int(full_step[5])
+CPU_CLOCK = float(full_step[-1]) / 1000.0
+data = data[1:,:]
+if args.verbose:
+    print "CPU frequency:", CPU_CLOCK * 1000.0
+
+#  Avoid start and end times of zero.
+data = data[data[:,4] != 0]
+data = data[data[:,5] != 0]
+
+#  Calculate the time range.
+total_t = (toc_step - tic_step)/ CPU_CLOCK
+print "# Data range: ", total_t, "ms"
+
+#  Correct times to relative values.
+start_t = float(tic_step)
+data[:,4] -= start_t
+data[:,5] -= start_t
+
+tasks = {}
+tasks[-1] = []
+for i in range(maxthread):
+    tasks[i] = []
+
+#  Gather into by thread data.
+num_lines = pl.size(data) / 10
+for line in range(num_lines):
+    thread = int(data[line,0])
+    tic = int(data[line,4]) / CPU_CLOCK
+    toc = int(data[line,5]) / CPU_CLOCK
+    tasktype = int(data[line,1])
+    subtype = int(data[line,2])
+
+    tasks[thread].append([tic,toc,tasktype,subtype])
+
+#  Sort by tic and gather used thread ids.
+threadids = []
+for i in range(maxthread):
+    if len(tasks[i]) > 0:
+        tasks[i] = sorted(tasks[i], key=lambda task: task[0])
+        threadids.append(i)
+
+#  Times per task.
+print "# Task times:"
+print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+      .format("type/subtype", "count","minimum", "maximum",
+              "sum", "mean", "percent")
+alltasktimes = {}
+for i in threadids:
+    tasktimes = {}
+    for task in tasks[i]:
+        key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
+        dt = task[1] - task[0]
+        if not key in tasktimes:
+            tasktimes[key] = []
+        tasktimes[key].append(dt)
+
+        if not key in alltasktimes:
+            alltasktimes[key] = []
+        alltasktimes[key].append(dt)
+
+    print "# Thread : ", i
+    for key in sorted(tasktimes.keys()):
+        taskmin = min(tasktimes[key])
+        taskmax = max(tasktimes[key])
+        tasksum = sum(tasktimes[key])
+        print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+              .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
+                      tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
+    print
+
+print "# All threads : "
+for key in sorted(alltasktimes.keys()):
+    taskmin = min(alltasktimes[key])
+    taskmax = max(alltasktimes[key])
+    tasksum = sum(alltasktimes[key])
+    print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+          .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
+                  tasksum / len(alltasktimes[key]),
+                  tasksum / (len(threadids) * total_t) * 100.0)
+print
+
+#  Dead times.
+print "# Deadtimes:"
+print "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
+      .format("count", "minimum", "maximum", "sum", "mean", "percent")
+alldeadtimes = []
+for i in threadids:
+    deadtimes = []
+    last = 0
+    for task in tasks[i]:
+        dt = task[0] - last
+        deadtimes.append(dt)
+        last = task[1]
+    dt = total_t - last
+    deadtimes.append(dt)
+
+    deadmin = min(deadtimes)
+    deadmax = max(deadtimes)
+    deadsum = sum(deadtimes)
+    print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+          .format(i, len(deadtimes), deadmin, deadmax, deadsum,
+                  deadsum / len(deadtimes), deadsum / total_t * 100.0)
+    alldeadtimes.extend(deadtimes)
+
+deadmin = min(alldeadtimes)
+deadmax = max(alldeadtimes)
+deadsum = sum(alldeadtimes)
+print "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
+      .format(len(alldeadtimes), deadmin, deadmax, deadsum,
+              deadsum / len(alldeadtimes),
+              deadsum / (len(threadids) * total_t ) * 100.0)
+print
+
+
+sys.exit(0)
diff --git a/examples/analyse_tasks_MPI.py b/examples/analyse_tasks_MPI.py
new file mode 100755
index 0000000000000000000000000000000000000000..9feffaf67ec393257d75428e310a2e8b807df39a
--- /dev/null
+++ b/examples/analyse_tasks_MPI.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python
+"""
+Usage:
+    analsyse_tasks_MPI.py [options] input.dat
+
+where input.dat is a thread info file for an MPI step.  Use the '-y interval'
+flag of the swift command to create these.
+
+The output is an analysis of the task timings, including deadtime per thread
+and step, total amount of time spent for each task type, for the whole step
+and per thread and the minimum and maximum times spent per task type.
+
+This file is part of SWIFT.
+Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.collections as collections
+import matplotlib.ticker as plticker
+import pylab as pl
+import sys
+import argparse
+
+#  Handle the command line.
+parser = argparse.ArgumentParser(description="Analyse task dumps")
+
+parser.add_argument("input", help="Thread data file (-y output)")
+parser.add_argument("-v", "--verbose", dest="verbose",
+                    help="Verbose output (default: False)",
+                    default=False, action="store_true")
+
+args = parser.parse_args()
+infile = args.input
+
+#  Tasks and subtypes. Indexed as in tasks.h.
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
+             "init_grav", "ghost", "extra_ghost", "drift_part",
+             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
+             "grav_top_level", "grav_long_range", "grav_mm", "grav_down",
+             "cooling", "sourceterms", "count"]
+
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
+            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
+
+#  Read input.
+data = pl.loadtxt( infile )
+
+#  Get the CPU clock to convert ticks into milliseconds.
+full_step = data[0,:]
+CPU_CLOCK = float(full_step[-1]) / 1000.0
+if args.verbose:
+    print "# CPU frequency:", CPU_CLOCK * 1000.0
+
+nranks = int(max(data[:,0])) + 1
+print "# Number of ranks:", nranks
+maxthread = int(max(data[:,1])) + 1
+print "# Maximum thread id:", maxthread
+
+#  Avoid start and end times of zero.
+sdata = data[data[:,5] != 0]
+sdata = data[data[:,6] != 0]
+
+#  Now we process all the ranks.
+for rank in range(nranks):
+    print "# Rank", rank
+    data = sdata[sdata[:,0] == rank]
+
+    #  Recover the start and end time
+    full_step = data[0,:]
+    tic_step = int(full_step[5])
+    toc_step = int(full_step[6])
+    data = data[1:,:]
+
+    #  Avoid start and end times of zero.
+    data = data[data[:,5] != 0]
+    data = data[data[:,6] != 0]
+
+    #  Calculate the time range.
+    total_t = (toc_step - tic_step)/ CPU_CLOCK
+    print "# Data range: ", total_t, "ms"
+
+    #  Correct times to relative values.
+    start_t = float(tic_step)
+    data[:,5] -= start_t
+    data[:,6] -= start_t
+    end_t = (toc_step - start_t) / CPU_CLOCK
+
+    tasks = {}
+    tasks[-1] = []
+    for i in range(maxthread):
+        tasks[i] = []
+
+    #  Gather into by thread data.
+    num_lines = pl.size(data) / 12
+    for line in range(num_lines):
+        thread = int(data[line,1])
+        tic = int(data[line,5]) / CPU_CLOCK
+        toc = int(data[line,6]) / CPU_CLOCK
+        tasktype = int(data[line,2])
+        subtype = int(data[line,3])
+
+        tasks[thread].append([tic,toc,tasktype,subtype])
+
+    #  Sort by tic and gather used threads.
+    threadids = []
+    for i in range(maxthread):
+        tasks[i] = sorted(tasks[i], key=lambda task: task[0])
+        threadids.append(i)
+
+    #  Times per task.
+    print "# Task times:"
+    print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+          .format("type/subtype", "count","minimum", "maximum",
+                  "sum", "mean", "percent")
+    alltasktimes = {}
+    for i in threadids:
+        tasktimes = {}
+        for task in tasks[i]:
+            key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
+            dt = task[1] - task[0]
+            if not key in tasktimes:
+                tasktimes[key] = []
+            tasktimes[key].append(dt)
+
+            if not key in alltasktimes:
+                alltasktimes[key] = []
+            alltasktimes[key].append(dt)
+
+        print "# Thread : ", i
+        for key in sorted(tasktimes.keys()):
+            taskmin = min(tasktimes[key])
+            taskmax = max(tasktimes[key])
+            tasksum = sum(tasktimes[key])
+            print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+                  .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
+                          tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
+        print
+
+    print "# All threads : "
+    for key in sorted(alltasktimes.keys()):
+        taskmin = min(alltasktimes[key])
+        taskmax = max(alltasktimes[key])
+        tasksum = sum(alltasktimes[key])
+        print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+              .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
+                      tasksum / len(alltasktimes[key]),
+                      tasksum / (len(threadids) * total_t) * 100.0)
+    print
+
+    #  Dead times.
+    print "# Deadtimes:"
+    print "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
+          .format("count", "minimum", "maximum", "sum", "mean", "percent")
+    alldeadtimes = []
+    for i in threadids:
+        deadtimes = []
+        last = 0
+        for task in tasks[i]:
+            dt = task[0] - last
+            deadtimes.append(dt)
+            last = task[1]
+        dt = total_t - last
+        deadtimes.append(dt)
+
+        deadmin = min(deadtimes)
+        deadmax = max(deadtimes)
+        deadsum = sum(deadtimes)
+        print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+              .format(i, len(deadtimes), deadmin, deadmax, deadsum,
+                      deadsum / len(deadtimes), deadsum / total_t * 100.0)
+        alldeadtimes.extend(deadtimes)
+
+    deadmin = min(alldeadtimes)
+    deadmax = max(alldeadtimes)
+    deadsum = sum(alldeadtimes)
+    print "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
+          .format(len(alldeadtimes), deadmin, deadmax, deadsum,
+                  deadsum / len(alldeadtimes),
+              deadsum / (len(threadids) * total_t ) * 100.0)
+    print
+
+
+sys.exit(0)
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index 14bc60bc1e1c05ecdc66fb7ac828102b1d5748bf..8006c1a325845d6e9fec655b809310a63daa9ddb 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -107,6 +107,12 @@ DiscPatchPotential:
   timestep_mult:   0.03     # Dimensionless pre-factor for the time-step condition
   growth_time:     5.       # (Optional) Time for the disc to grow to its final size (multiple of the dynamical time)
 
+# Sine Wave potential
+SineWavePotential:
+  amplitude:        10.     # Amplitude of the sine wave (internal units)
+  timestep_limit:   1.      # Time-step dimensionless pre-factor.
+  growth_time:      0.      # (Optional) Time for the potential to grow to its final size.
+ 
 # Parameters related to cooling function  ----------------------------------------------
 
 # Constant du/dt cooling function
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 1be59d1c8449970321b8ef9053ddf24b4559dabd..88f176687db8116cfd4370970769164985e4d366 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -1,18 +1,20 @@
 #!/usr/bin/env python
 """
 Usage:
-    plot_tasks.py input.dat output.png [time-range-ms]
+    plot_tasks.py [options] input.dat output.png
 
-where input.dat is a thread info file for a step.  Use the '-y interval'
-flag of the swift MPI commands to create these. The output plot will be
-called 'output.png'. Use the time-range-ms in millisecs to produce
-plots with the same time span.
+where input.dat is a thread info file for a step.  Use the '-y interval' flag
+of the swift command to create these. The output plot will be called
+'output.png'. The --limit option can be used to produce plots with the same
+time span and the --expand option to expand each thread line into '*expand'
+lines, so that adjacent tasks of the same type can be distinguished. Other
+options can be seen using the --help flag.
 
 This file is part of SWIFT.
 Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
                    Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
-          (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk)
+          (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published
@@ -29,11 +31,42 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
 import matplotlib
+matplotlib.use("Agg")
 import matplotlib.collections as collections
-matplotlib.use('Agg')
+import matplotlib.ticker as plticker
 import pylab as pl
-import numpy as np
 import sys
+import argparse
+
+#  Handle the command line.
+parser = argparse.ArgumentParser(description="Plot task graphs")
+
+parser.add_argument("input", help="Thread data file (-y output)")
+parser.add_argument("outpng", help="Name for output graphic file (PNG)")
+parser.add_argument("-l", "--limit", dest="limit",
+                    help="Upper time limit in millisecs (def: depends on data)",
+                    default=0, type=int)
+parser.add_argument("-e", "--expand", dest="expand",
+                    help="Thread expansion factor (def: 1)",
+                    default=1, type=int)
+parser.add_argument("--height", dest="height",
+                    help="Height of plot in inches (def: 4)",
+                    default=4., type=float)
+parser.add_argument("--width", dest="width",
+                    help="Width of plot in inches (def: 16)",
+                    default=16., type=float)
+parser.add_argument("--nolegend", dest="nolegend",
+                    help="Whether to show the legend (def: False)",
+                    default=False, action="store_true")
+parser.add_argument("-v", "--verbose", dest="verbose",
+                    help="Show colour assignments and other details (def: False)",
+                    default=False, action="store_true")
+
+args = parser.parse_args()
+infile = args.input
+outpng = args.outpng
+delta_t = args.limit
+expand = args.expand
 
 #  Basic plot configuration.
 PLOT_PARAMS = {"axes.labelsize": 10,
@@ -42,7 +75,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
                "legend.fontsize": 12,
                "xtick.labelsize": 10,
                "ytick.labelsize": 10,
-               "figure.figsize" : (16., 4.),
+               "figure.figsize" : (args.width, args.height),
                "figure.subplot.left" : 0.03,
                "figure.subplot.right" : 0.995,
                "figure.subplot.bottom" : 0.1,
@@ -56,9 +89,11 @@ pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init", "ghost", "extra_ghost", "drift", "kick1", "kick2",
-             "timestep", "send", "recv", "grav_top_level", "grav_long_range",
-             "grav_mm", "grav_down", "cooling", "sourceterms", "count"]
+             "init_grav", "ghost", "extra_ghost", "drift_part",
+             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
+             "grav_top_level", "grav_long_range", "grav_mm", "grav_down",
+             "cooling", "sourceterms", "count"]
+
 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
             "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
 
@@ -69,14 +104,16 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
              "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
              "recv/tend", "send/tend"]
 
-#  Get a number of colours for the various types.
-colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon",
-           "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki",
-           "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen",
-           "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey",
-           "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue",
-           "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink",
-           "pink"]
+#  A number of colours for the various types. Recycled when there are
+#  more task types than colours...
+colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
+           "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
+           "brown", "purple", "moccasin", "olivedrab", "chartreuse",
+           "darksage", "darkgreen", "green", "mediumseagreen",
+           "mediumaquamarine", "darkslategrey", "mediumturquoise",
+           "black", "cadetblue", "skyblue", "red", "slategray", "gold",
+           "slateblue", "blueviolet", "mediumorchid", "firebrick",
+           "magenta", "hotpink", "pink", "orange", "lightgreen"]
 maxcolours = len(colours)
 
 #  Set colours of task/subtype.
@@ -87,30 +124,21 @@ for task in TASKTYPES:
     ncolours = (ncolours + 1) % maxcolours
 
 SUBCOLOURS = {}
-for task in SUBTYPES:
+for task in FULLTYPES:
     SUBCOLOURS[task] = colours[ncolours]
     ncolours = (ncolours + 1) % maxcolours
 
-for task in FULLTYPES:
+for task in SUBTYPES:
     SUBCOLOURS[task] = colours[ncolours]
     ncolours = (ncolours + 1) % maxcolours
 
-#  Show docs if help is requested.
-if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
-    from pydoc import help
-    help( "__main__" )
-    sys.exit( 0 )
-
-#  Handle command-line.
-if len( sys.argv ) != 3 and len( sys.argv ) != 4:
-    print "Usage: ", sys.argv[0], "input.dat output.png [time-range-ms]"
-    sys.exit(1)
-
-infile = sys.argv[1]
-outpng = sys.argv[2]
-delta_t = 0
-if len( sys.argv ) == 4:
-    delta_t = int(sys.argv[3])
+#  For fiddling with colours...
+if args.verbose:
+    print "#Selected colours:"
+    for task in sorted(TASKCOLOURS.keys()):
+        print "# " + task + ": " + TASKCOLOURS[task]
+    for task in sorted(SUBCOLOURS.keys()):
+        print "# " + task + ": " + SUBCOLOURS[task]
 
 #  Read input.
 data = pl.loadtxt( infile )
@@ -118,51 +146,61 @@ data = pl.loadtxt( infile )
 nthread = int(max(data[:,0])) + 1
 print "Number of threads:", nthread
 
-# Recover the start and end time
+#  Recover the start and end time
 full_step = data[0,:]
 tic_step = int(full_step[4])
 toc_step = int(full_step[5])
-CPU_CLOCK = float(full_step[-1])
+CPU_CLOCK = float(full_step[-1]) / 1000.0
 data = data[1:,:]
+if args.verbose:
+    print "CPU frequency:", CPU_CLOCK * 1000.0
 
-print "CPU frequency:", CPU_CLOCK
-
-# Avoid start and end times of zero.
+#  Avoid start and end times of zero.
 data = data[data[:,4] != 0]
 data = data[data[:,5] != 0]
 
-# Calculate the time range, if not given.
-delta_t = delta_t * CPU_CLOCK / 1000
+#  Calculate the time range, if not given.
+delta_t = delta_t * CPU_CLOCK
 if delta_t == 0:
-    dt = max(data[:,5]) - min(data[:,4])
+    dt = toc_step - tic_step
     if dt > delta_t:
         delta_t = dt
-    print "Data range: ", delta_t / CPU_CLOCK * 1000, "ms"
+    print "Data range: ", delta_t / CPU_CLOCK, "ms"
 
-# Once more doing the real gather and plots this time.
-start_t = tic_step 
+#  Once more doing the real gather and plots this time.
+start_t = float(tic_step)
 data[:,4] -= start_t
 data[:,5] -= start_t
-end_t = (toc_step - start_t) / CPU_CLOCK * 1000
+end_t = (toc_step - start_t) / CPU_CLOCK
 
 tasks = {}
 tasks[-1] = []
-for i in range(nthread):
+for i in range(nthread*expand):
     tasks[i] = []
 
+#  Counters for each thread when expanding.
+ecounter = []
+for i in range(nthread):
+    ecounter.append(0)
+
 num_lines = pl.size(data) / 10
 for line in range(num_lines):
     thread = int(data[line,0])
+
+    # Expand to cover extra lines if expanding.
+    ethread = thread * expand + (ecounter[thread] % expand)
+    ecounter[thread] = ecounter[thread] + 1
+    thread = ethread
+
     tasks[thread].append({})
     tasktype = TASKTYPES[int(data[line,1])]
     subtype = SUBTYPES[int(data[line,2])]
     tasks[thread][-1]["type"] = tasktype
     tasks[thread][-1]["subtype"] = subtype
-    tic = int(data[line,4]) / CPU_CLOCK * 1000
-    toc = int(data[line,5]) / CPU_CLOCK * 1000
+    tic = int(data[line,4]) / CPU_CLOCK
+    toc = int(data[line,5]) / CPU_CLOCK
     tasks[thread][-1]["tic"] = tic
     tasks[thread][-1]["toc"] = toc
-    tasks[thread][-1]["t"] = (toc + tic)/ 2
     if "self" in tasktype or "pair" in tasktype:
         fulltype = tasktype + "/" + subtype
         if fulltype in SUBCOLOURS:
@@ -171,31 +209,24 @@ for line in range(num_lines):
             tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
     else:
         tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
-    
-for thread in range(nthread):
-    tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"])
-            
+
+# Use expanded threads from now on.
+nthread = nthread * expand
+
 typesseen = []
 fig = pl.figure()
 ax = fig.add_subplot(1,1,1)
-ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK)
+ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
 ax.set_ylim(0, nthread)
-tictoc = np.zeros(2)
 for i in range(nthread):
 
     #  Collect ranges and colours into arrays.
-    tictocs = np.zeros(len(tasks[i])*2)
-    colours = np.empty(len(tasks[i])*2, dtype='object')
-    coloursseen = []
+    tictocs = []
+    colours = []
     j = 0
     for task in tasks[i]:
-        tictocs[j] = task["tic"]
-        tictocs[j+1] = task["toc"]
-        colours[j] = task["colour"]
-        colours[j+1] = task["colour"]
-        j = j + 2
-        if task["colour"] not in coloursseen:
-            coloursseen.append(task["colour"])
+        tictocs.append((task["tic"], task["toc"] - task["tic"]))
+        colours.append(task["colour"])
 
         #  Legend support, collections don't add to this.
         if task["subtype"] != "none":
@@ -206,31 +237,33 @@ for i in range(nthread):
             pl.plot([], [], color=task["colour"], label=qtask)
             typesseen.append(qtask)
 
-    #  Now plot each colour, faster to use a mask to select colour ranges.
-    for colour in coloursseen:
-        collection = collections.BrokenBarHCollection.span_where(tictocs, ymin=i+0.05, ymax=i+0.95,
-                                                                 where=colours == colour,
-                                                                 facecolor=colour,
-                                                                 linewidths=0)
-        ax.add_collection(collection)
-
+    #  Now plot.
+    ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)
 
 #  Legend and room for it.
 nrow = len(typesseen) / 5
-if len(typesseen) * 5 < nrow:
-    nrow = nrow + 1
-ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
-ax.set_ylim(0, nthread + nrow + 1)
-ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
+if not args.nolegend:
+    if len(typesseen) * 5 < nrow:
+        nrow = nrow + 1
+    ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
+    ax.set_ylim(0, nthread + nrow + 1)
+    ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
 
 # Start and end of time-step
 ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
 ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
 
 ax.set_xlabel("Wall clock time [ms]")
-ax.set_ylabel("Thread ID" )
+if expand == 1:
+    ax.set_ylabel("Thread ID" )
+else:
+    ax.set_ylabel("Thread ID * " + str(expand) )
 ax.set_yticks(pl.array(range(nthread)), True)
 
+loc = plticker.MultipleLocator(base=expand)
+ax.yaxis.set_major_locator(loc)
+ax.grid(True, which='major', axis="y", linestyle="-")
+
 pl.show()
 pl.savefig(outpng)
 print "Graphics done, output written to", outpng
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index c95bfa1fd2d087cc907b57201c1a1397cbeb1460..83465aee87e8b641775d760fa4db2f06b125dd8b 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python
 """
 Usage:
-    plot_tasks_MPI.py input.dat png-output-prefix [time-range-ms]
+    plot_tasks_MPI.py [options] input.dat png-output-prefix
 
-where input.dat is a thread info file for a step of an MPI run.  Use the '-y
-interval' flag of the swift MPI commands to create these. The output plots
-will be called 'png-output-prefix<mpi-rank>.png', i.e. one each for all the
-threads in each MPI rank. Use the time-range-ms in millisecs to produce
-plots with the same time span.
+where input.dat is a thread info file for a step.  Use the '-y interval' flag
+of the swift MPI command to create these. The output plot will be called
+'png-output-prefix<mpi-rank>.png', i.e. one each for all the threads in each
+MPI rank.  The --limit option can be used to produce plots with the same time
+span and the --expand option to expand each thread line into '*expand' lines,
+so that adjacent tasks of the same type can be distinguished. Other options
+can be seen using the --help flag.
 
 See the command 'process_plot_tasks_MPI' to efficiently wrap this command to
 process a number of thread info files and create an HTML file to view them.
@@ -17,7 +19,7 @@ This file is part of SWIFT.
 Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
                    Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
-                   Peter W. Draper (p.w.draper@durham.ac.uk)
+          (C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
 All Rights Reserved.
 
 This program is free software: you can redistribute it and/or modify
@@ -35,13 +37,42 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
 import matplotlib
-import matplotlib.collections as collections
 matplotlib.use("Agg")
+import matplotlib.collections as collections
+import matplotlib.ticker as plticker
 import pylab as pl
-import numpy as np
 import sys
-#import warnings
-#warnings.simplefilter("error")
+import argparse
+
+#  Handle the command line.
+parser = argparse.ArgumentParser(description="Plot task graphs")
+
+parser.add_argument("input", help="Thread data file (-y output)")
+parser.add_argument("outbase", help="Base name for output graphic files (PNG)")
+parser.add_argument("-l", "--limit", dest="limit",
+                    help="Upper time limit in millisecs (def: depends on data)",
+                    default=0, type=int)
+parser.add_argument("-e", "--expand", dest="expand",
+                    help="Thread expansion factor (def: 1)",
+                    default=1, type=int)
+parser.add_argument("--height", dest="height",
+                    help="Height of plot in inches (def: 4)",
+                    default=4., type=float)
+parser.add_argument("--width", dest="width",
+                    help="Width of plot in inches (def: 16)",
+                    default=16., type=float)
+parser.add_argument("--nolegend", dest="nolegend",
+                    help="Whether to show the legend (def: False)",
+                    default=False, action="store_true")
+parser.add_argument("-v", "--verbose", dest="verbose",
+                    help="Show colour assignments and other details (def: False)",
+                    default=False, action="store_true")
+
+args = parser.parse_args()
+infile = args.input
+outbase = args.outbase
+delta_t = args.limit
+expand = args.expand
 
 #  Basic plot configuration.
 PLOT_PARAMS = {"axes.labelsize": 10,
@@ -50,7 +81,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
                "legend.fontsize": 12,
                "xtick.labelsize": 10,
                "ytick.labelsize": 10,
-               "figure.figsize" : (16., 4.),
+               "figure.figsize" : (args.width, args.height),
                "figure.subplot.left" : 0.03,
                "figure.subplot.right" : 0.995,
                "figure.subplot.bottom" : 0.1,
@@ -64,26 +95,31 @@ pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init", "ghost", "extra_ghost", "drift", "kick1", "kick2",
-             "timestep", "send", "recv", "grav_gather_m", "grav_fft",
-             "grav_mm", "grav_up", "cooling", "sourceterms", "count"]
+             "init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
+             "kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
+             "grav_long_range", "grav_mm", "grav_down", "cooling",
+             "sourceterms", "count"]
+
 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
-            "tend", "xv", "rho", "gpart", "count"]
+            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
 
 #  Task/subtypes of interest.
-FULLTYPES = ["self/force", "self/density", "sub_self/force",
-             "sub_self/density", "pair/force", "pair/density", "sub_pair/force",
+FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
+             "sub_self/density", "pair/force", "pair/density", "pair/grav",
+             "sub_pair/force",
              "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
              "recv/tend", "send/tend"]
 
-#  Get a number of colours for the various types.
-colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon",
-           "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki",
-           "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen",
-           "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey",
-           "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue",
-           "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink",
-           "pink"]
+#  A number of colours for the various types. Recycled when there are
+#  more task types than colours...
+colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
+           "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
+           "brown", "purple", "moccasin", "olivedrab", "chartreuse",
+           "darksage", "darkgreen", "green", "mediumseagreen",
+           "mediumaquamarine", "darkslategrey", "mediumturquoise",
+           "black", "cadetblue", "skyblue", "red", "slategray", "gold",
+           "slateblue", "blueviolet", "mediumorchid", "firebrick",
+           "magenta", "hotpink", "pink", "orange", "lightgreen"]
 maxcolours = len(colours)
 
 #  Set colours of task/subtype.
@@ -94,43 +130,30 @@ for task in TASKTYPES:
     ncolours = (ncolours + 1) % maxcolours
 
 SUBCOLOURS = {}
-for task in SUBTYPES:
+for task in FULLTYPES:
     SUBCOLOURS[task] = colours[ncolours]
     ncolours = (ncolours + 1) % maxcolours
 
-for task in FULLTYPES:
+for task in SUBTYPES:
     SUBCOLOURS[task] = colours[ncolours]
     ncolours = (ncolours + 1) % maxcolours
 
-#  Show docs if help is requested.
-if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
-    from pydoc import help
-    help( "__main__" )
-    sys.exit( 0 )
-
-#  Handle command-line.
-if len( sys.argv ) != 3 and len( sys.argv ) != 4:
-    print "Usage: ", sys.argv[0], "input.dat png-output-prefix [time-range-ms]"
-    sys.exit(1)
-
-
-infile = sys.argv[1]
-outbase = sys.argv[2]
-delta_t = 0
-if len( sys.argv ) == 4:
-    delta_t = int(sys.argv[3])
+#  For fiddling with colours...
+if args.verbose:
+    print "#Selected colours:"
+    for task in sorted(TASKCOLOURS.keys()):
+        print "# " + task + ": " + TASKCOLOURS[task]
+    for task in sorted(SUBCOLOURS.keys()):
+        print "# " + task + ": " + SUBCOLOURS[task]
 
 #  Read input.
 data = pl.loadtxt( infile )
 
-# Recover the start and end time
+#  Get CPU_CLOCK to convert ticks into milliseconds.
 full_step = data[0,:]
-tic_step = int(full_step[5])
-toc_step = int(full_step[6])
-CPU_CLOCK = float(full_step[-1])
-
-print "CPU frequency:", CPU_CLOCK
-
+CPU_CLOCK = float(full_step[-1]) / 1000.0
+if args.verbose:
+    print "CPU frequency:", CPU_CLOCK * 1000.0
 
 nranks = int(max(data[:,0])) + 1
 print "Number of ranks:", nranks
@@ -144,60 +167,74 @@ sdata = sdata[sdata[:,6] != 0]
 # Each rank can have different clock (compute node), but we want to use the
 # same delta times range for comparisons, so we suck it up and take the hit of
 # precalculating this, unless the user knows better.
-delta_t = delta_t * CPU_CLOCK / 1000
+delta_t = delta_t * CPU_CLOCK
 if delta_t == 0:
     for rank in range(nranks):
         data = sdata[sdata[:,0] == rank]
-        dt = max(data[:,6]) - min(data[:,5])
+        full_step = data[0,:]
+        tic_step = int(full_step[5])
+        toc_step = int(full_step[6])
+        dt = toc_step - tic_step
         if dt > delta_t:
             delta_t = dt
-    print "Data range: ", delta_t / CPU_CLOCK * 1000, "ms"
-
+    print "Data range: ", delta_t / CPU_CLOCK, "ms"
 
 # Once more doing the real gather and plots this time.
 for rank in range(nranks):
     data = sdata[sdata[:,0] == rank]
 
+    #  Start and end times for this rank.
     full_step = data[0,:]
     tic_step = int(full_step[5])
     toc_step = int(full_step[6])
     data = data[1:,:]
     typesseen = []
+    nethread = 0
 
     #  Dummy image for ranks that have no tasks.
     if data.size == 0:
         print "rank ", rank, " has no tasks"
         fig = pl.figure()
         ax = fig.add_subplot(1,1,1)
-        ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK)
-        ax.set_ylim(0, nthread)
+        ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
+        ax.set_ylim(0, nthread*expand)
         start_t = tic_step
-        end_t = (toc_step - start_t) / CPU_CLOCK * 1000
+        end_t = (toc_step - start_t) / CPU_CLOCK
     else:
 
-        start_t = tic_step
+        start_t = float(tic_step)
         data[:,5] -= start_t
         data[:,6] -= start_t
-        end_t = (toc_step - start_t) / CPU_CLOCK * 1000
+        end_t = (toc_step - start_t) / CPU_CLOCK
 
         tasks = {}
         tasks[-1] = []
-        for i in range(nthread):
+        for i in range(nthread*expand):
             tasks[i] = []
 
+        # Counters for each thread when expanding.
+        ecounter = []
+        for i in range(nthread):
+            ecounter.append(0)
+
         num_lines = pl.shape(data)[0]
         for line in range(num_lines):
             thread = int(data[line,1])
+
+            # Expand to cover extra lines if expanding.
+            ethread = thread * expand + (ecounter[thread] % expand)
+            ecounter[thread] = ecounter[thread] + 1
+            thread = ethread
+
             tasks[thread].append({})
             tasktype = TASKTYPES[int(data[line,2])]
             subtype = SUBTYPES[int(data[line,3])]
             tasks[thread][-1]["type"] = tasktype
             tasks[thread][-1]["subtype"] = subtype
-            tic = int(data[line,5]) / CPU_CLOCK * 1000
-            toc = int(data[line,6]) / CPU_CLOCK * 1000
+            tic = int(data[line,5]) / CPU_CLOCK
+            toc = int(data[line,6]) / CPU_CLOCK
             tasks[thread][-1]["tic"] = tic
             tasks[thread][-1]["toc"] = toc
-            tasks[thread][-1]["t"] = (toc + tic)/ 2
             if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
                 fulltype = tasktype + "/" + subtype
                 if fulltype in SUBCOLOURS:
@@ -207,29 +244,23 @@ for rank in range(nranks):
             else:
                 tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
 
-        for thread in range(nthread):
-            tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"])
+        # Use expanded threads from now on.
+        nethread = nthread * expand
 
+        typesseen = []
         fig = pl.figure()
         ax = fig.add_subplot(1,1,1)
-        ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK)
-        ax.set_ylim(0, nthread)
-        tictoc = np.zeros(2)
-        for i in range(nthread):
+        ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
+        ax.set_ylim(0, nethread)
+        for i in range(nethread):
 
             #  Collect ranges and colours into arrays.
-            tictocs = np.zeros(len(tasks[i])*2)
-            colours = np.empty(len(tasks[i])*2, dtype='object')
-            coloursseen = []
+            tictocs = []
+            colours = []
             j = 0
             for task in tasks[i]:
-                tictocs[j] = task["tic"]
-                tictocs[j+1] = task["toc"]
-                colours[j] = task["colour"]
-                colours[j+1] = task["colour"]
-                j = j + 2
-                if task["colour"] not in coloursseen:
-                    coloursseen.append(task["colour"])
+                tictocs.append((task["tic"], task["toc"] - task["tic"]))
+                colours.append(task["colour"])
 
                 #  Legend support, collections don't add to this.
                 if task["subtype"] != "none":
@@ -241,33 +272,34 @@ for rank in range(nranks):
                     pl.plot([], [], color=task["colour"], label=qtask)
                     typesseen.append(qtask)
 
-            #  Now plot each colour, faster to use a mask to select colour ranges.
-            for colour in coloursseen:
-                collection = collections.BrokenBarHCollection.span_where(tictocs,
-                                                                         ymin=i+0.05,
-                                                                         ymax=i+0.95,
-                                                                         where=colours == colour,
-                                                                         facecolor=colour,
-                                                                         linewidths=0)
-                ax.add_collection(collection)
+            #  Now plot.
+            ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)
 
 
     #  Legend and room for it.
     nrow = len(typesseen) / 5
     if len(typesseen) * 5 < nrow:
         nrow = nrow + 1
-    ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
-    ax.set_ylim(0, nthread + nrow + 1)
+    ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white")
+    ax.set_ylim(0, nethread + nrow + 1)
     if data.size > 0:
         ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
 
     # Start and end of time-step
-    ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
-    ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
+    ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1)
+    ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1)
 
     ax.set_xlabel("Wall clock time [ms]")
-    ax.set_ylabel("Thread ID for MPI Rank " + str(rank) )
-    ax.set_yticks(pl.array(range(nthread)), True)
+
+    if expand == 1:
+        ax.set_ylabel("Thread ID" )
+    else:
+        ax.set_ylabel("Thread ID * " + str(expand) )
+    ax.set_yticks(pl.array(range(nethread)), True)
+
+    loc = plticker.MultipleLocator(base=expand)
+    ax.yaxis.set_major_locator(loc)
+    ax.grid(True, which='major', axis="y", linestyle="-")
 
     pl.show()
     outpng = outbase + str(rank) + ".png"
diff --git a/examples/process_plot_tasks b/examples/process_plot_tasks
index cf19401b582c29f7e35073be93569ea8039f958d..b46fce03d8c5f21046a0e4a95a304e006c7b2293 100755
--- a/examples/process_plot_tasks
+++ b/examples/process_plot_tasks
@@ -56,7 +56,8 @@ done
 
 #  And process them,
 echo "Processing thread info files..."
-echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks.py \$0 \$2 $TIMERANGE"
+echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks.py --expand 1 --limit $TIMERANGE --width 16 --height 4 \$0 \$2 "
+echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./analyse_tasks.py \$0 > \$2.stats"
 
 echo "Writing output index.html file"
 #  Construct document - serial.
@@ -75,8 +76,21 @@ echo $list | xargs -n 3 | while read f s g; do
 <h2>Step $s</h2>
 EOF
     cat <<EOF >> index.html
-<a href="step${s}r${i}.png"><img src="step${s}r${i}.png" width=400px/></a>
+<a href="step${s}r${i}.html"><img src="step${s}r${i}.png" width=400px/></a>
 EOF
+    cat <<EOF > step${s}r${i}.html
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<body>
+<img src="step${s}r${i}.png">
+<pre>
+EOF
+cat step${s}r${i}.stats >> step${s}r${i}.html
+cat <<EOF >> step${s}r${i}.html
+</body>
+</html>
+EOF
+
 done
 
 cat <<EOF >> index.html
diff --git a/examples/process_plot_tasks_MPI b/examples/process_plot_tasks_MPI
index d3eb5d4a5fc5918b287cd5d98efcc5881b6f910c..b2672b3711823eb87d0bede5b1ffd8945a735f98 100755
--- a/examples/process_plot_tasks_MPI
+++ b/examples/process_plot_tasks_MPI
@@ -61,7 +61,8 @@ nrank=$(($nrank-1))
 
 #  And process them,
 echo "Processing thread info files..."
-echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks_MPI.py \$0 \$2 $TIMERANGE"
+echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks_MPI.py --expand 1 --limit $TIMERANGE \$0 \$2 "
+echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./analyse_tasks_MPI.py \$0 > \$2.stats"
 
 echo "Writing output index.html file"
 #  Construct document - serial.
@@ -78,12 +79,31 @@ EOF
 echo $list | xargs -n 3 | while read f s g; do
     cat <<EOF >> index.html
 <h2>Step $s</h2>
+<ul style="list-style-type:none">
+<li>
 EOF
     for i in $(seq 0 $nrank); do
-        cat <<EOF >> index.html
-<a href="step${s}r${i}.png"><img src="step${s}r${i}.png" width=400px/></a>
-EOF
+        cat <<EOF2 >> index.html
+<a href="step${s}r${i}.html"><img src="step${s}r${i}.png" width=400px/></a>
+EOF2
+    cat <<EOF2 > step${s}r${i}.html
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<body>
+<img src="step${s}r${i}.png">
+<pre>
+EOF2
+cat step${s}r.stats >> step${s}r${i}.html
+cat <<EOF2 >> step${s}r${i}.html
+</pre>
+</body>
+</html>
+EOF2
     done
+cat <<EOF >> index.html
+</li>
+</ul>
+EOF
 done
 
 cat <<EOF >> index.html
diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4
index 0d0bf431138689487a5fb63a419dfc58ae70d5d0..bba53a4c8a8cb363a017c55c4e4ebbb4c6528dae 100644
--- a/m4/ax_gcc_archflag.m4
+++ b/m4/ax_gcc_archflag.m4
@@ -107,7 +107,7 @@ case $host_cpu in
 	    *2?6[[ad]]?:*:*:*) ax_gcc_arch="sandybridge corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
 	    *3?6[[ae]]?:*:*:*) ax_gcc_arch="ivybridge core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
 	    *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) ax_gcc_arch="haswell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
-	    *3?6d?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
+	    *3?6d?:*:*:*|*4?6f?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
 	    *1?6c?:*:*:*|*2?6[[67]]?:*:*:*|*3?6[[56]]?:*:*:*) ax_gcc_arch="bonnell atom core2 pentium-m pentium3 pentiumpro" ;;
 	    *3?67?:*:*:*|*[[45]]?6[[ad]]?:*:*:*) ax_gcc_arch="silvermont atom core2 pentium-m pentium3 pentiumpro" ;;
 	    *000?f[[012]]?:*:*:*|?f[[012]]?:*:*:*|f[[012]]?:*:*:*) ax_gcc_arch="pentium4 pentiumpro" ;;
diff --git a/src/Makefile.am b/src/Makefile.am
index 7bec5327f4759fcf7d3e1af9d041677ffbc7ab55..2ddcdb0908201c65053d7cc5380a4217277b5c13 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -63,7 +63,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
 		 kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \
                  runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
-		 dimension.h equation_of_state.h part_type.h \
+		 dimension.h equation_of_state.h part_type.h periodic.h \
 		 gravity.h gravity_io.h \
 		 gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
 		 gravity/Default/gravity_debug.h gravity/Default/gravity_part.h  \
@@ -86,6 +86,8 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h
                  hydro/Gizmo/hydro_slope_limiters_cell.h \
                  hydro/Gizmo/hydro_slope_limiters_face.h \
                  hydro/Gizmo/hydro_slope_limiters.h \
+                 hydro/Gizmo/hydro_unphysical.h \
+                 hydro/Gizmo/hydro_velocities.h \
                  hydro/Shadowswift/hydro_debug.h \
                  hydro/Shadowswift/hydro_gradients.h hydro/Shadowswift/hydro.h \
                  hydro/Shadowswift/hydro_iact.h \
diff --git a/src/active.h b/src/active.h
index 02e504f762735994e6c57f7e155071fede016713..58e88835b6f51ae15f9fd7270c0e1f89bbd6d61a 100644
--- a/src/active.h
+++ b/src/active.h
@@ -29,25 +29,48 @@
 #include "timeline.h"
 
 /**
- * @brief Check that a cell been drifted to the current time.
+ * @brief Check that the #part in a #cell have been drifted to the current time.
  *
  * @param c The #cell.
  * @param e The #engine containing information about the current time.
  * @return 1 if the #cell has been drifted to the current time, 0 otherwise.
  */
-__attribute__((always_inline)) INLINE static int cell_is_drifted(
+__attribute__((always_inline)) INLINE static int cell_are_part_drifted(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (c->ti_old > e->ti_current)
+  if (c->ti_old_part > e->ti_current)
     error(
         "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
         "and e->ti_current=%lld (t=%e)",
-        c->ti_old, c->ti_old * e->timeBase, e->ti_current,
+        c->ti_old_part, c->ti_old_part * e->timeBase, e->ti_current,
         e->ti_current * e->timeBase);
 #endif
 
-  return (c->ti_old == e->ti_current);
+  return (c->ti_old_part == e->ti_current);
+}
+
+/**
+ * @brief Check that the #gpart in a #cell have been drifted to the current
+ * time.
+ *
+ * @param c The #cell.
+ * @param e The #engine containing information about the current time.
+ * @return 1 if the #cell has been drifted to the current time, 0 otherwise.
+ */
+__attribute__((always_inline)) INLINE static int cell_are_gpart_drifted(
+    const struct cell *c, const struct engine *e) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->ti_old_gpart > e->ti_current)
+    error(
+        "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
+        "and e->ti_current=%lld (t=%e)",
+        c->ti_old_gpart, c->ti_old_gpart * e->timeBase, e->ti_current,
+        e->ti_current * e->timeBase);
+#endif
+
+  return (c->ti_old_gpart == e->ti_current);
 }
 
 /* Are cells / particles active for regular tasks ? */
diff --git a/src/cell.c b/src/cell.c
index ccc101243ccdffbb25d8a71353e65c9d393b7148..78defcd660eca9a580f4fdb86eaf0fe7ff5ac1ec 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -99,7 +99,8 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
   c->h_max = pc->h_max;
   c->ti_end_min = pc->ti_end_min;
   c->ti_end_max = pc->ti_end_max;
-  c->ti_old = pc->ti_old;
+  c->ti_old_part = pc->ti_old_part;
+  c->ti_old_gpart = pc->ti_old_gpart;
   c->count = pc->count;
   c->gcount = pc->gcount;
   c->scount = pc->scount;
@@ -128,7 +129,8 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
       if (k & 1) temp->loc[2] += temp->width[2];
       temp->depth = c->depth + 1;
       temp->split = 0;
-      temp->dx_max = 0.f;
+      temp->dx_max_part = 0.f;
+      temp->dx_max_gpart = 0.f;
       temp->dx_max_sort = 0.f;
       temp->nodeID = c->nodeID;
       temp->parent = c;
@@ -239,7 +241,8 @@ int cell_pack(struct cell *c, struct pcell *pc) {
   pc->h_max = c->h_max;
   pc->ti_end_min = c->ti_end_min;
   pc->ti_end_max = c->ti_end_max;
-  pc->ti_old = c->ti_old;
+  pc->ti_old_part = c->ti_old_part;
+  pc->ti_old_gpart = c->ti_old_gpart;
   pc->count = c->count;
   pc->gcount = c->gcount;
   pc->scount = c->scount;
@@ -1018,7 +1021,7 @@ void cell_clean_links(struct cell *c, void *data) {
 }
 
 /**
- * @brief Checks that the particles in a cell are at the
+ * @brief Checks that the #part in a cell are at the
  * current point in time
  *
  * Calls error() if the cell is not at the current time.
@@ -1026,7 +1029,7 @@ void cell_clean_links(struct cell *c, void *data) {
  * @param c Cell to act upon
  * @param data The current time on the integer time-line
  */
-void cell_check_particle_drift_point(struct cell *c, void *data) {
+void cell_check_part_drift_point(struct cell *c, void *data) {
 
 #ifdef SWIFT_DEBUG_CHECKS
 
@@ -1035,14 +1038,40 @@ void cell_check_particle_drift_point(struct cell *c, void *data) {
   /* Only check local cells */
   if (c->nodeID != engine_rank) return;
 
-  if (c->ti_old != ti_drift)
-    error("Cell in an incorrect time-zone! c->ti_old=%lld ti_drift=%lld",
-          c->ti_old, ti_drift);
+  if (c->ti_old_part != ti_drift)
+    error("Cell in an incorrect time-zone! c->ti_old_part=%lld ti_drift=%lld",
+          c->ti_old_part, ti_drift);
 
   for (int i = 0; i < c->count; ++i)
     if (c->parts[i].ti_drift != ti_drift)
       error("part in an incorrect time-zone! p->ti_drift=%lld ti_drift=%lld",
             c->parts[i].ti_drift, ti_drift);
+#else
+  error("Calling debugging code without debugging flag activated.");
+#endif
+}
+
+/**
+ * @brief Checks that the #gpart and #spart in a cell are at the
+ * current point in time
+ *
+ * Calls error() if the cell is not at the current time.
+ *
+ * @param c Cell to act upon
+ * @param data The current time on the integer time-line
+ */
+void cell_check_gpart_drift_point(struct cell *c, void *data) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+
+  const integertime_t ti_drift = *(integertime_t *)data;
+
+  /* Only check local cells */
+  if (c->nodeID != engine_rank) return;
+
+  if (c->ti_old_gpart != ti_drift)
+    error("Cell in an incorrect time-zone! c->ti_old_gpart=%lld ti_drift=%lld",
+          c->ti_old_gpart, ti_drift);
 
   for (int i = 0; i < c->gcount; ++i)
     if (c->gparts[i].ti_drift != ti_drift)
@@ -1622,7 +1651,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
           error("bad flags in sort task.");
 #endif
         scheduler_activate(s, ci->sorts);
-        if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift);
+        if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part);
       }
       if (cj->dx_max_sort > space_maxreldx * cj->dmin) {
         for (struct cell *finger = cj; finger != NULL;
@@ -1638,7 +1667,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
           error("bad flags in sort task.");
 #endif
         scheduler_activate(s, cj->sorts);
-        if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift);
+        if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part);
       }
     }
     /* Store current values of dx_max and h_max. */
@@ -1651,7 +1680,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
 
       /* Check whether there was too much particle motion, i.e. the
          cell neighbour conditions were violated. */
-      if (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin)
+      if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part >
+          cj->dmin)
         rebuild = 1;
 
 #ifdef WITH_MPI
@@ -1662,6 +1692,9 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         scheduler_activate(s, ci->recv_xv);
         if (cell_is_active(ci, e)) {
           scheduler_activate(s, ci->recv_rho);
+#ifdef EXTRA_HYDRO_LOOP
+          scheduler_activate(s, ci->recv_gradient);
+#endif
           scheduler_activate(s, ci->recv_ti);
         }
 
@@ -1674,19 +1707,28 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         scheduler_activate(s, l->t);
 
         /* Drift both cells, the foreign one at the level which it is sent. */
-        if (l->t->ci->drift)
-          scheduler_activate(s, l->t->ci->drift);
+        if (l->t->ci->drift_part)
+          scheduler_activate(s, l->t->ci->drift_part);
         else
           error("Drift task missing !");
-        if (t->type == task_type_pair) scheduler_activate(s, cj->drift);
+        if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part);
 
         if (cell_is_active(cj, e)) {
+
           for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
                l = l->next)
             ;
           if (l == NULL) error("Missing link to send_rho task.");
           scheduler_activate(s, l->t);
 
+#ifdef EXTRA_HYDRO_LOOP
+          for (l = cj->send_gradient;
+               l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_gradient task.");
+          scheduler_activate(s, l->t);
+#endif
+
           for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
                l = l->next)
             ;
@@ -1700,6 +1742,9 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         scheduler_activate(s, cj->recv_xv);
         if (cell_is_active(cj, e)) {
           scheduler_activate(s, cj->recv_rho);
+#ifdef EXTRA_HYDRO_LOOP
+          scheduler_activate(s, cj->recv_gradient);
+#endif
           scheduler_activate(s, cj->recv_ti);
         }
 
@@ -1712,19 +1757,28 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         scheduler_activate(s, l->t);
 
         /* Drift both cells, the foreign one at the level which it is sent. */
-        if (l->t->ci->drift)
-          scheduler_activate(s, l->t->ci->drift);
+        if (l->t->ci->drift_part)
+          scheduler_activate(s, l->t->ci->drift_part);
         else
           error("Drift task missing !");
-        if (t->type == task_type_pair) scheduler_activate(s, ci->drift);
+        if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part);
 
         if (cell_is_active(ci, e)) {
+
           for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
                l = l->next)
             ;
           if (l == NULL) error("Missing link to send_rho task.");
           scheduler_activate(s, l->t);
 
+#ifdef EXTRA_HYDRO_LOOP
+          for (l = ci->send_gradient;
+               l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_gradient task.");
+          scheduler_activate(s, l->t);
+#endif
+
           for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
                l = l->next)
             ;
@@ -1732,13 +1786,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
           scheduler_activate(s, l->t);
         }
       } else if (t->type == task_type_pair) {
-        scheduler_activate(s, ci->drift);
-        scheduler_activate(s, cj->drift);
+        scheduler_activate(s, ci->drift_part);
+        scheduler_activate(s, cj->drift_part);
       }
 #else
       if (t->type == task_type_pair) {
-        scheduler_activate(s, ci->drift);
-        scheduler_activate(s, cj->drift);
+        scheduler_activate(s, ci->drift_part);
+        scheduler_activate(s, cj->drift_part);
       }
 #endif
     }
@@ -1756,13 +1810,15 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
   if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out);
   if (c->ghost != NULL) scheduler_activate(s, c->ghost);
   if (c->init_grav != NULL) scheduler_activate(s, c->init_grav);
-  if (c->drift != NULL) scheduler_activate(s, c->drift);
+  if (c->drift_part != NULL) scheduler_activate(s, c->drift_part);
+  if (c->drift_gpart != NULL) scheduler_activate(s, c->drift_gpart);
   if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
   if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
   if (c->timestep != NULL) scheduler_activate(s, c->timestep);
+  if (c->grav_ghost[0] != NULL) scheduler_activate(s, c->grav_ghost[0]);
+  if (c->grav_ghost[1] != NULL) scheduler_activate(s, c->grav_ghost[1]);
   if (c->grav_down != NULL) scheduler_activate(s, c->grav_down);
   if (c->grav_long_range != NULL) scheduler_activate(s, c->grav_long_range);
-  if (c->grav_top_level != NULL) scheduler_activate(s, c->grav_top_level);
   if (c->cooling != NULL) scheduler_activate(s, c->cooling);
   if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
 
@@ -1790,30 +1846,28 @@ void cell_set_super(struct cell *c, struct cell *super) {
 }
 
 /**
- * @brief Recursively drifts particles of all kinds in a cell hierarchy.
+ * @brief Recursively drifts the #part in a cell hierarchy.
  *
  * @param c The #cell.
  * @param e The #engine (to get ti_current).
  */
-void cell_drift_particles(struct cell *c, const struct engine *e) {
+void cell_drift_part(struct cell *c, const struct engine *e) {
 
   const float hydro_h_max = e->hydro_properties->h_max;
   const double timeBase = e->timeBase;
-  const integertime_t ti_old = c->ti_old;
+  const integertime_t ti_old_part = c->ti_old_part;
   const integertime_t ti_current = e->ti_current;
   struct part *const parts = c->parts;
   struct xpart *const xparts = c->xparts;
-  struct gpart *const gparts = c->gparts;
-  struct spart *const sparts = c->sparts;
 
   /* Drift from the last time the cell was drifted to the current time */
-  const double dt = (ti_current - ti_old) * timeBase;
+  const double dt = (ti_current - ti_old_part) * timeBase;
   float dx_max = 0.f, dx2_max = 0.f;
   float dx_max_sort = 0.0f, dx2_max_sort = 0.f;
   float cell_h_max = 0.f;
 
   /* Check that we are actually going to move forward. */
-  if (ti_current < ti_old) error("Attempt to drift to the past");
+  if (ti_current < ti_old_part) error("Attempt to drift to the past");
 
   /* Are we not in a leaf ? */
   if (c->split) {
@@ -1824,37 +1878,15 @@ void cell_drift_particles(struct cell *c, const struct engine *e) {
         struct cell *cp = c->progeny[k];
 
         /* Collect */
-        cell_drift_particles(cp, e);
+        cell_drift_part(cp, e);
 
         /* Update */
-        dx_max = max(dx_max, cp->dx_max);
+        dx_max = max(dx_max, cp->dx_max_part);
         dx_max_sort = max(dx_max_sort, cp->dx_max_sort);
         cell_h_max = max(cell_h_max, cp->h_max);
       }
 
-  } else if (ti_current > ti_old) {
-
-    /* Loop over all the g-particles in the cell */
-    const size_t nr_gparts = c->gcount;
-    for (size_t k = 0; k < nr_gparts; k++) {
-
-      /* Get a handle on the gpart. */
-      struct gpart *const gp = &gparts[k];
-
-      /* Drift... */
-      drift_gpart(gp, dt, timeBase, ti_old, ti_current);
-
-      /* Compute (square of) motion since last cell construction */
-      const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
-                        gp->x_diff[1] * gp->x_diff[1] +
-                        gp->x_diff[2] * gp->x_diff[2];
-      dx2_max = max(dx2_max, dx2);
-
-      /* Init gravity force fields. */
-      if (gpart_is_active(gp, e)) {
-        gravity_init_gpart(gp);
-      }
-    }
+  } else if (ti_current > ti_old_part) {
 
     /* Loop over all the gas particles in the cell */
     const size_t nr_parts = c->count;
@@ -1865,7 +1897,7 @@ void cell_drift_particles(struct cell *c, const struct engine *e) {
       struct xpart *const xp = &xparts[k];
 
       /* Drift... */
-      drift_part(p, xp, dt, timeBase, ti_old, ti_current);
+      drift_part(p, xp, dt, timeBase, ti_old_part, ti_current);
 
       /* Limit h to within the allowed range */
       p->h = min(p->h, hydro_h_max);
@@ -1889,6 +1921,86 @@ void cell_drift_particles(struct cell *c, const struct engine *e) {
       }
     }
 
+    /* Now, get the maximal particle motion from its square */
+    dx_max = sqrtf(dx2_max);
+    dx_max_sort = sqrtf(dx2_max_sort);
+
+  } else {
+
+    cell_h_max = c->h_max;
+    dx_max = c->dx_max_part;
+    dx_max_sort = c->dx_max_sort;
+  }
+
+  /* Store the values */
+  c->h_max = cell_h_max;
+  c->dx_max_part = dx_max;
+  c->dx_max_sort = dx_max_sort;
+
+  /* Update the time of the last drift */
+  c->ti_old_part = ti_current;
+}
+
+/**
+ * @brief Recursively drifts the #gpart in a cell hierarchy.
+ *
+ * @param c The #cell.
+ * @param e The #engine (to get ti_current).
+ */
+void cell_drift_gpart(struct cell *c, const struct engine *e) {
+
+  const double timeBase = e->timeBase;
+  const integertime_t ti_old_gpart = c->ti_old_gpart;
+  const integertime_t ti_current = e->ti_current;
+  struct gpart *const gparts = c->gparts;
+  struct spart *const sparts = c->sparts;
+
+  /* Drift from the last time the cell was drifted to the current time */
+  const double dt = (ti_current - ti_old_gpart) * timeBase;
+  float dx_max = 0.f, dx2_max = 0.f;
+
+  /* Check that we are actually going to move forward. */
+  if (ti_current < ti_old_gpart) error("Attempt to drift to the past");
+
+  /* Are we not in a leaf ? */
+  if (c->split) {
+
+    /* Loop over the progeny and collect their data. */
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+
+        /* Recurse */
+        cell_drift_gpart(cp, e);
+
+        /* Update */
+        dx_max = max(dx_max, cp->dx_max_gpart);
+      }
+
+  } else if (ti_current > ti_old_gpart) {
+
+    /* Loop over all the g-particles in the cell */
+    const size_t nr_gparts = c->gcount;
+    for (size_t k = 0; k < nr_gparts; k++) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *const gp = &gparts[k];
+
+      /* Drift... */
+      drift_gpart(gp, dt, timeBase, ti_old_gpart, ti_current);
+
+      /* Compute (square of) motion since last cell construction */
+      const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
+                        gp->x_diff[1] * gp->x_diff[1] +
+                        gp->x_diff[2] * gp->x_diff[2];
+      dx2_max = max(dx2_max, dx2);
+
+      /* Init gravity force fields. */
+      if (gpart_is_active(gp, e)) {
+        gravity_init_gpart(gp);
+      }
+    }
+
     /* Loop over all the star particles in the cell */
     const size_t nr_sparts = c->scount;
     for (size_t k = 0; k < nr_sparts; k++) {
@@ -1897,29 +2009,24 @@ void cell_drift_particles(struct cell *c, const struct engine *e) {
       struct spart *const sp = &sparts[k];
 
       /* Drift... */
-      drift_spart(sp, dt, timeBase, ti_old, ti_current);
+      drift_spart(sp, dt, timeBase, ti_old_gpart, ti_current);
 
       /* Note: no need to compute dx_max as all spart have a gpart */
     }
 
     /* Now, get the maximal particle motion from its square */
     dx_max = sqrtf(dx2_max);
-    dx_max_sort = sqrtf(dx2_max_sort);
 
   } else {
 
-    cell_h_max = c->h_max;
-    dx_max = c->dx_max;
-    dx_max_sort = c->dx_max_sort;
+    dx_max = c->dx_max_gpart;
   }
 
   /* Store the values */
-  c->h_max = cell_h_max;
-  c->dx_max = dx_max;
-  c->dx_max_sort = dx_max_sort;
+  c->dx_max_gpart = dx_max;
 
   /* Update the time of the last drift */
-  c->ti_old = ti_current;
+  c->ti_old_gpart = ti_current;
 }
 
 /**
diff --git a/src/cell.h b/src/cell.h
index 05fed82d79b0c3c4f8e4343813a4d6938d402bf8..dfee0fe3fe2563449e000e473b7bc04575b04951 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -74,7 +74,7 @@ struct pcell {
 
   /* Stats on this cell's particles. */
   double h_max;
-  integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old;
+  integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old_part, ti_old_gpart;
 
   /* Number of particles in this cell. */
   int count, gcount, scount;
@@ -159,8 +159,11 @@ struct cell {
   /*! The extra ghost task for complex hydro schemes */
   struct task *extra_ghost;
 
-  /*! The drift task */
-  struct task *drift;
+  /*! The drift task for parts */
+  struct task *drift_part;
+
+  /*! The drift task for gparts */
+  struct task *drift_gpart;
 
   /*! The first kick task */
   struct task *kick1;
@@ -171,10 +174,10 @@ struct cell {
   /*! The task to compute time-steps */
   struct task *timestep;
 
-  /*! Task constructing the multipole from the particles */
-  struct task *grav_top_level;
+  /*! Task linking the FFT mesh to the rest of gravity tasks */
+  struct task *grav_ghost[2];
 
-  /*! Task constructing the multipole from the particles */
+  /*! Task computing long range non-periodic gravity interactions */
   struct task *grav_long_range;
 
   /*! Task propagating the multipole to the particles */
@@ -235,24 +238,30 @@ struct cell {
   /*! Maximum beginning of (integer) time step in this cell. */
   integertime_t ti_beg_max;
 
-  /*! Last (integer) time the cell's particle was drifted forward in time. */
-  integertime_t ti_old;
-
   /*! Last (integer) time the cell's sort arrays were updated. */
   integertime_t ti_sort;
 
+  /*! Last (integer) time the cell's part were drifted forward in time. */
+  integertime_t ti_old_part;
+
+  /*! Last (integer) time the cell's gpart were drifted forward in time. */
+  integertime_t ti_old_gpart;
+
   /*! Last (integer) time the cell's multipole was drifted forward in time. */
   integertime_t ti_old_multipole;
 
   /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */
   float dmin;
 
-  /*! Maximum particle movement in this cell since last construction. */
-  float dx_max;
-
   /*! Maximum particle movement in this cell since the last sort. */
   float dx_max_sort;
 
+  /*! Maximum part movement in this cell since last construction. */
+  float dx_max_part;
+
+  /*! Maximum gpart movement in this cell since last construction. */
+  float dx_max_gpart;
+
   /*! Nr of #part in this cell. */
   int count;
 
@@ -364,13 +373,15 @@ void cell_clean_links(struct cell *c, void *data);
 void cell_make_multipoles(struct cell *c, integertime_t ti_current);
 void cell_check_multipole(struct cell *c, void *data);
 void cell_clean(struct cell *c);
-void cell_check_particle_drift_point(struct cell *c, void *data);
+void cell_check_part_drift_point(struct cell *c, void *data);
+void cell_check_gpart_drift_point(struct cell *c, void *data);
 void cell_check_multipole_drift_point(struct cell *c, void *data);
 void cell_reset_task_counters(struct cell *c);
 int cell_is_drift_needed(struct cell *c, const struct engine *e);
 int cell_unskip_tasks(struct cell *c, struct scheduler *s);
 void cell_set_super(struct cell *c, struct cell *super);
-void cell_drift_particles(struct cell *c, const struct engine *e);
+void cell_drift_part(struct cell *c, const struct engine *e);
+void cell_drift_gpart(struct cell *c, const struct engine *e);
 void cell_drift_multipole(struct cell *c, const struct engine *e);
 void cell_drift_all_multipoles(struct cell *c, const struct engine *e);
 void cell_check_timesteps(struct cell *c);
diff --git a/src/common_io.c b/src/common_io.c
index df0bbdc29ec357da3ba14410c0f9c56e0d69160a..168fcf2c695014cf532e622c928414b875fc54d5 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -74,7 +74,7 @@ hid_t io_hdf5_type(enum IO_DATA_TYPE type) {
     case DOUBLE:
       return H5T_NATIVE_DOUBLE;
     case CHAR:
-      return H5T_C_S1;
+      return H5T_NATIVE_CHAR;
     default:
       error("Unknown type");
       return 0;
diff --git a/src/const.h b/src/const.h
index 6962ee8bca32e92664e3f20cdb23e7cb6fbc4abd..141eb48acc633542aa98655caa8debdd2dbce530 100644
--- a/src/const.h
+++ b/src/const.h
@@ -52,8 +52,43 @@
 /* Options to control the movement of particles for GIZMO_SPH. */
 /* This option disables particle movement */
 //#define GIZMO_FIX_PARTICLES
+/* Try to keep cells regular by adding a correction velocity. */
+#define GIZMO_STEER_MOTION
 //#define GIZMO_TOTAL_ENERGY
 
+/* Options to control handling of unphysical values (GIZMO_SPH only). */
+/* In GIZMO, mass and energy (and hence density and pressure) can in principle
+   become negative, which will cause unwanted behaviour that can make the code
+   crash.
+   If no options are selected below, we assume (and pray) that this will not
+   happen, and add no restrictions to how these variables are treated. */
+/* Check for unphysical values and crash if they occur. */
+//#define GIZMO_UNPHYSICAL_ERROR
+/* Check for unphysical values and reset them to safe values. */
+#define GIZMO_UNPHYSICAL_RESCUE
+/* Show a warning message if an unphysical value was reset (only works if
+   GIZMO_UNPHYSICAL_RESCUE is also selected). */
+//#define GIZMO_UNPHYSICAL_WARNING
+
+/* Parameters that control how GIZMO handles pathological particle
+   configurations. */
+/* Show a warning message if a pathological configuration has been detected. */
+//#define GIZMO_PATHOLOGICAL_WARNING
+/* Crash if a pathological configuration has been detected. */
+//#define GIZMO_PATHOLOGICAL_ERROR
+/* Maximum allowed gradient matrix condition number. If the condition number of
+   the gradient matrix (defined in equation C1 in Hopkins, 2015) is larger than
+   this value, we artificially increase the number of neighbours to get a more
+   homogeneous sampling. */
+#define const_gizmo_max_condition_number 100.0f
+/* Correction factor applied to the particle wcount to force more neighbours if
+   the condition number is too large. */
+#define const_gizmo_w_correction_factor 0.9f
+/* Lower limit on the wcount correction factor. If the condition number is still
+   too high after this wcount correction has been applied, we give up on the
+   gradient matrix and use SPH gradients instead. */
+#define const_gizmo_min_wcorr 0.5f
+
 /* Types of gradients to use for SHADOWFAX_SPH */
 /* If no option is chosen, no gradients are used (first order scheme) */
 #define SHADOWFAX_GRADIENTS
diff --git a/src/debug.c b/src/debug.c
index 3732ee5e769277deb393926ea2dc6f04fba93782..601f63d6e11bbbf95f62eaef1ec6ec7ec06d3ad9 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -259,8 +259,8 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
     message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]);
     result = 0;
   }
-  if (c->dx_max != dx_max) {
-    message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max, dx_max);
+  if (c->dx_max_part != dx_max) {
+    message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max_part, dx_max);
     message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]);
     result = 0;
   }
diff --git a/src/drift.h b/src/drift.h
index d9b79f7f0549d85b6f05e8ce4a394aaa5b2a4d8d..e86d290cb796153d3c3fc43c21b25d2c7e435657 100644
--- a/src/drift.h
+++ b/src/drift.h
@@ -39,7 +39,7 @@
  * @param ti_current Integer end of time-step
  */
 __attribute__((always_inline)) INLINE static void drift_gpart(
-    struct gpart *restrict gp, float dt, double timeBase, integertime_t ti_old,
+    struct gpart *restrict gp, double dt, double timeBase, integertime_t ti_old,
     integertime_t ti_current) {
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE static void drift_gpart(
  * @param ti_current Integer end of time-step
  */
 __attribute__((always_inline)) INLINE static void drift_part(
-    struct part *restrict p, struct xpart *restrict xp, float dt,
+    struct part *restrict p, struct xpart *restrict xp, double dt,
     double timeBase, integertime_t ti_old, integertime_t ti_current) {
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -119,7 +119,7 @@ __attribute__((always_inline)) INLINE static void drift_part(
  * @param ti_current Integer end of time-step
  */
 __attribute__((always_inline)) INLINE static void drift_spart(
-    struct spart *restrict sp, float dt, double timeBase, integertime_t ti_old,
+    struct spart *restrict sp, double dt, double timeBase, integertime_t ti_old,
     integertime_t ti_current) {
 
 #ifdef SWIFT_DEBUG_CHECKS
diff --git a/src/engine.c b/src/engine.c
index 414b40f959ac4d3ecb449759823a3631b9a657a3..4618d6b8be1ced8742c6e97465a91df9b9bb5db2 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -151,6 +151,7 @@ void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in,
 void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
 
   struct scheduler *s = &e->sched;
+  const int periodic = e->s->periodic;
   const int is_hydro = (e->policy & engine_policy_hydro);
   const int is_self_gravity = (e->policy & engine_policy_self_gravity);
   const int is_with_cooling = (e->policy & engine_policy_cooling);
@@ -186,18 +187,13 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
         c->grav_long_range = scheduler_addtask(
             s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL);
 
-        /* Gravity top-level periodic calculation */
-        c->grav_top_level = scheduler_addtask(s, task_type_grav_top_level,
-                                              task_subtype_none, 0, 0, c, NULL);
-
         /* Gravity recursive down-pass */
         c->grav_down = scheduler_addtask(s, task_type_grav_down,
                                          task_subtype_none, 0, 0, c, NULL);
 
+        if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost[0]);
         scheduler_addunlock(s, c->init_grav, c->grav_long_range);
-        scheduler_addunlock(s, c->init_grav, c->grav_top_level);
         scheduler_addunlock(s, c->grav_long_range, c->grav_down);
-        scheduler_addunlock(s, c->grav_top_level, c->grav_down);
         scheduler_addunlock(s, c->grav_down, c->kick2);
       }
 
@@ -1073,10 +1069,10 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 #endif
 
       /* Drift before you send */
-      if (ci->drift == NULL)
-        ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0,
-                                      0, ci, NULL);
-      scheduler_addunlock(s, ci->drift, t_xv);
+      if (ci->drift_part == NULL)
+        ci->drift_part = scheduler_addtask(s, task_type_drift_part,
+                                           task_subtype_none, 0, 0, ci, NULL);
+      scheduler_addunlock(s, ci->drift_part, t_xv);
 
       /* The super-cell's timestep task should unlock the send_ti task. */
       scheduler_addunlock(s, ci->super->timestep, t_ti);
@@ -1675,41 +1671,98 @@ void engine_make_self_gravity_tasks(struct engine *e) {
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
+  const int periodic = s->periodic;
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const int cdim_ghost[3] = {s->cdim[0] / 4 + 1, s->cdim[1] / 4 + 1,
+                             s->cdim[2] / 4 + 1};
   const double theta_crit_inv = e->gravity_properties->theta_crit_inv;
   struct cell *cells = s->cells_top;
-  const int nr_cells = s->nr_cells;
+  struct task **ghosts = NULL;
+  const int n_ghosts = cdim_ghost[0] * cdim_ghost[1] * cdim_ghost[2] * 2;
+
+  /* Create the top-level task if periodic */
+  if (periodic) {
+
+    /* Create the FFT task for this MPI rank */
+    s->grav_top_level = scheduler_addtask(sched, task_type_grav_top_level,
+                                          task_subtype_none, 0, 0, NULL, NULL);
+
+    /* Create a grid of ghosts to deal with the dependencies */
+    if ((ghosts = malloc(n_ghosts * sizeof(struct task *))) == 0)
+      error("Error allocating memory for gravity fft ghosts");
+
+    /* Make the ghosts implicit and add the dependencies */
+    for (int n = 0; n < n_ghosts / 2; ++n) {
+      ghosts[2 * n + 0] = scheduler_addtask(
+          sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL);
+      ghosts[2 * n + 1] = scheduler_addtask(
+          sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL);
+      ghosts[2 * n + 0]->implicit = 1;
+      ghosts[2 * n + 1]->implicit = 1;
+      scheduler_addunlock(sched, ghosts[2 * n + 0], s->grav_top_level);
+      scheduler_addunlock(sched, s->grav_top_level, ghosts[2 * n + 1]);
+    }
+  }
 
-  for (int cid = 0; cid < nr_cells; ++cid) {
+  /* Run through the higher level cells */
+  for (int i = 0; i < cdim[0]; i++) {
+    for (int j = 0; j < cdim[1]; j++) {
+      for (int k = 0; k < cdim[2]; k++) {
 
-    struct cell *ci = &cells[cid];
+        /* Get the cell */
+        const int cid = cell_getid(cdim, i, j, k);
+        struct cell *ci = &cells[cid];
 
-    /* Skip cells without gravity particles */
-    if (ci->gcount == 0) continue;
+        /* Skip cells without gravity particles */
+        if (ci->gcount == 0) continue;
 
-    /* Is that cell local ? */
-    if (ci->nodeID != nodeID) continue;
+        /* Is that cell local ? */
+        if (ci->nodeID != nodeID) continue;
 
-    /* If the cells is local build a self-interaction */
-    scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL);
+        /* If the cells is local build a self-interaction */
+        scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci,
+                          NULL);
+
+        /* Deal with periodicity dependencies */
+        const int ghost_id = cell_getid(cdim_ghost, i / 4, j / 4, k / 4);
+        if (ghost_id > n_ghosts) error("Invalid ghost_id");
+        if (periodic) {
+          ci->grav_ghost[0] = ghosts[2 * ghost_id + 0];
+          ci->grav_ghost[1] = ghosts[2 * ghost_id + 1];
+        }
 
-    /* Loop over every other cell */
-    for (int cjd = cid + 1; cjd < nr_cells; ++cjd) {
+        /* Loop over every other cell */
+        for (int ii = 0; ii < cdim[0]; ii++) {
+          for (int jj = 0; jj < cdim[1]; jj++) {
+            for (int kk = 0; kk < cdim[2]; kk++) {
+
+              /* Get the cell */
+              const int cjd = cell_getid(cdim, ii, jj, kk);
+              struct cell *cj = &cells[cjd];
 
-      struct cell *cj = &cells[cjd];
+              /* Avoid duplicates */
+              if (cid <= cjd) continue;
 
-      /* Skip cells without gravity particles */
-      if (cj->gcount == 0) continue;
+              /* Skip cells without gravity particles */
+              if (cj->gcount == 0) continue;
 
-      /* Is that neighbour local ? */
-      if (cj->nodeID != nodeID) continue;  // MATTHIEU
+              /* Is that neighbour local ? */
+              if (cj->nodeID != nodeID) continue;  // MATTHIEU
 
-      /* Are the cells to close for a MM interaction ? */
-      if (!gravity_multipole_accept(ci->multipole, cj->multipole,
-                                    theta_crit_inv, 1))
-        scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci,
-                          cj);
+              /* Are the cells to close for a MM interaction ? */
+              if (!gravity_multipole_accept(ci->multipole, cj->multipole,
+                                            theta_crit_inv, 1)) {
+
+                scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0,
+                                  0, ci, cj);
+              }
+            }
+          }
+        }
+      }
     }
   }
+  if (periodic) free(ghosts);
 }
 
 void engine_make_external_gravity_tasks(struct engine *e) {
@@ -1836,10 +1889,15 @@ void engine_count_and_link_tasks(struct engine *e) {
     }
 
     /* Link drift tasks to all the higher drift task. */
-    else if (t->type == task_type_drift) {
+    else if (t->type == task_type_drift_part) {
       for (struct cell *finger = t->ci->parent; finger != NULL;
            finger = finger->parent)
-        if (finger->drift != NULL) scheduler_addunlock(sched, t, finger->drift);
+        if (finger->drift_part != NULL) scheduler_addunlock(sched, t, finger->drift_part);
+    }
+    else if (t->type == task_type_drift_gpart) {
+      for (struct cell *finger = t->ci->parent; finger != NULL;
+           finger = finger->parent)
+        if (finger->drift_gpart != NULL) scheduler_addunlock(sched, t, finger->drift_gpart);
     }
 
     /* Link self tasks to cells. */
@@ -1930,7 +1988,7 @@ static inline void engine_make_external_gravity_dependencies(
     struct scheduler *sched, struct task *gravity, struct cell *c) {
 
   /* init --> external gravity --> kick */
-  scheduler_addunlock(sched, c->drift, gravity);
+  scheduler_addunlock(sched, c->drift_gpart, gravity);
   scheduler_addunlock(sched, gravity, c->super->kick2);
 }
 
@@ -1944,6 +2002,7 @@ void engine_link_gravity_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int nr_tasks = sched->nr_tasks;
+  const int periodic = e->s->periodic;
 
   for (int k = 0; k < nr_tasks; k++) {
 
@@ -1954,6 +2013,7 @@ void engine_link_gravity_tasks(struct engine *e) {
     if (t->type == task_type_self && t->subtype == task_subtype_grav) {
 
       engine_make_self_gravity_dependencies(sched, t, t->ci);
+      if (periodic) scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t);
     }
 
     /* Self-interaction for external gravity ? */
@@ -1969,11 +2029,15 @@ void engine_link_gravity_tasks(struct engine *e) {
       if (t->ci->nodeID == nodeID) {
 
         engine_make_self_gravity_dependencies(sched, t, t->ci);
+        if (periodic && t->ci->super < t->cj->super)
+          scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t);
       }
 
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
 
         engine_make_self_gravity_dependencies(sched, t, t->cj);
+        if (periodic && t->ci->super < t->cj->super)
+          scheduler_addunlock(sched, t->cj->super->grav_ghost[1], t);
       }
 
     }
@@ -2096,14 +2160,14 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
 
     /* Sort tasks depend on the drift of the cell. */
     if (t->type == task_type_sort && t->ci->nodeID == engine_rank) {
-      scheduler_addunlock(sched, t->ci->drift, t);
+      scheduler_addunlock(sched, t->ci->drift_part, t);
     }
 
     /* Self-interaction? */
     else if (t->type == task_type_self && t->subtype == task_subtype_density) {
 
       /* Make all density tasks depend on the drift. */
-      scheduler_addunlock(sched, t->ci->drift, t);
+      scheduler_addunlock(sched, t->ci->drift_part, t);
 
 #ifdef EXTRA_HYDRO_LOOP
       /* Start by constructing the task for the second  and third hydro loop */
@@ -2139,9 +2203,9 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
 
       /* Make all density tasks depend on the drift. */
       if (t->ci->nodeID == engine_rank)
-        scheduler_addunlock(sched, t->ci->drift, t);
+        scheduler_addunlock(sched, t->ci->drift_part, t);
       if (t->cj->nodeID == engine_rank)
-        scheduler_addunlock(sched, t->cj->drift, t);
+        scheduler_addunlock(sched, t->cj->drift_part, t);
 
 #ifdef EXTRA_HYDRO_LOOP
       /* Start by constructing the task for the second and third hydro loop */
@@ -2546,7 +2610,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
             error("bad flags in sort task.");
 #endif
           scheduler_activate(s, cj->sorts);
-          if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift);
+          if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part);
         }
       }
       /* Store current values of dx_max and h_max. */
@@ -2562,6 +2626,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         scheduler_activate(s, ci->recv_xv);
         if (cell_is_active(ci, e)) {
           scheduler_activate(s, ci->recv_rho);
+#ifdef EXTRA_HYDRO_LOOP
+          scheduler_activate(s, ci->recv_gradient);
+#endif
           scheduler_activate(s, ci->recv_ti);
         }
 
@@ -2574,11 +2641,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         scheduler_activate(s, l->t);
 
         /* Drift both cells, the foreign one at the level which it is sent. */
-        if (l->t->ci->drift)
-          scheduler_activate(s, l->t->ci->drift);
+        if (l->t->ci->drift_part)
+          scheduler_activate(s, l->t->ci->drift_part);
         else
           error("Drift task missing !");
-        if (t->type == task_type_pair) scheduler_activate(s, cj->drift);
+        if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part);
 
         if (cell_is_active(cj, e)) {
           for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
@@ -2587,6 +2654,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           if (l == NULL) error("Missing link to send_rho task.");
           scheduler_activate(s, l->t);
 
+#ifdef EXTRA_HYDRO_LOOP
+          for (l = cj->send_gradient;
+               l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_gradient task.");
+          scheduler_activate(s, l->t);
+#endif
+
           for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
                l = l->next)
             ;
@@ -2600,6 +2675,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         scheduler_activate(s, cj->recv_xv);
         if (cell_is_active(cj, e)) {
           scheduler_activate(s, cj->recv_rho);
+#ifdef EXTRA_HYDRO_LOOP
+          scheduler_activate(s, cj->recv_gradient);
+#endif
           scheduler_activate(s, cj->recv_ti);
         }
 
@@ -2612,11 +2690,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         scheduler_activate(s, l->t);
 
         /* Drift both cells, the foreign one at the level which it is sent. */
-        if (l->t->ci->drift)
-          scheduler_activate(s, l->t->ci->drift);
+        if (l->t->ci->drift_part)
+          scheduler_activate(s, l->t->ci->drift_part);
         else
           error("Drift task missing !");
-        if (t->type == task_type_pair) scheduler_activate(s, ci->drift);
+        if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part);
 
         if (cell_is_active(ci, e)) {
           for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
@@ -2625,6 +2703,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           if (l == NULL) error("Missing link to send_rho task.");
           scheduler_activate(s, l->t);
 
+#ifdef EXTRA_HYDRO_LOOP
+          for (l = ci->send_gradient;
+               l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_gradient task.");
+          scheduler_activate(s, l->t);
+#endif
+
           for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
                l = l->next)
             ;
@@ -2633,30 +2719,37 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         }
 
       } else if (t->type == task_type_pair) {
-        scheduler_activate(s, ci->drift);
-        scheduler_activate(s, cj->drift);
+        scheduler_activate(s, ci->drift_part);
+        scheduler_activate(s, cj->drift_part);
       }
 #else
       if (t->type == task_type_pair) {
-        scheduler_activate(s, ci->drift);
-        scheduler_activate(s, cj->drift);
+        scheduler_activate(s, ci->drift_part);
+        scheduler_activate(s, cj->drift_part);
       }
 #endif
     }
 
-    /* Kick/Drift? */
+    /* Kick/Drift/init ? */
     else if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
-             t->type == task_type_drift || t->type == task_type_init_grav) {
+             t->type == task_type_drift_part ||
+             t->type == task_type_drift_gpart ||
+             t->type == task_type_init_grav) {
       if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Gravity ? */
     else if (t->type == task_type_grav_down ||
-             t->type == task_type_grav_long_range ||
-             t->type == task_type_grav_top_level) {
+             t->type == task_type_grav_long_range) {
       if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
+    /* Periodic gravity ? */
+    else if (t->type == task_type_grav_top_level ||
+             t->type == task_type_grav_ghost) {
+      scheduler_activate(s, t);
+    }
+
     /* Time-step? */
     else if (t->type == task_type_timestep) {
       t->ci->updated = 0;
@@ -3034,7 +3127,7 @@ void engine_print_stats(struct engine *e) {
                           e->policy & engine_policy_self_gravity);
 
   /* Be verbose about this */
-  message("Saving statistics at t=%e.", e->time);
+  if (e->nodeID == 0) message("Saving statistics at t=%e.", e->time);
 #else
   if (e->verbose) message("Saving statistics at t=%e.", e->time);
 #endif
@@ -3083,10 +3176,12 @@ void engine_skip_force_and_kick(struct engine *e) {
     struct task *t = &tasks[i];
 
     /* Skip everything that updates the particles */
-    if (t->type == task_type_drift || t->type == task_type_kick1 ||
-        t->type == task_type_kick2 || t->type == task_type_timestep ||
-        t->subtype == task_subtype_force || t->subtype == task_subtype_grav ||
+    if (t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
+        t->type == task_type_kick1 || t->type == task_type_kick2 ||
+        t->type == task_type_timestep || t->subtype == task_subtype_force ||
+        t->subtype == task_subtype_grav ||
         t->type == task_type_grav_long_range ||
+        t->type == task_type_grav_ghost ||
         t->type == task_type_grav_top_level || t->type == task_type_grav_down ||
         t->type == task_type_cooling || t->type == task_type_sourceterms)
       t->skip = 1;
@@ -3337,8 +3432,8 @@ void engine_step(struct engine *e) {
 
     if (e->policy & engine_policy_reconstruct_mpoles)
       engine_reconstruct_multipoles(e);
-    else
-      engine_drift_top_multipoles(e);
+    // else
+    //  engine_drift_top_multipoles(e);
   }
 
   /* Print the number of active tasks ? */
@@ -3450,9 +3545,15 @@ int engine_is_done(struct engine *e) {
 void engine_unskip(struct engine *e) {
 
   const ticks tic = getticks();
+
+  /* Activate all the regular tasks */
   threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
+  /* And the top level gravity FFT one */
+  if (e->s->periodic && (e->policy & engine_policy_self_gravity))
+    scheduler_activate(&e->sched, e->s->grav_top_level);
+
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -3476,7 +3577,10 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements,
     struct cell *c = &cells[ind];
     if (c != NULL && c->nodeID == e->nodeID) {
       /* Drift all the particles */
-      cell_drift_particles(c, e);
+      cell_drift_part(c, e);
+
+      /* Drift all the g-particles */
+      cell_drift_gpart(c, e);
 
       /* Drift the multipoles */
       if (e->policy & engine_policy_self_gravity)
@@ -3502,10 +3606,15 @@ void engine_drift_all(struct engine *e) {
   threadpool_map(&e->threadpool, engine_do_drift_all_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
+  /* Synchronize particle positions */
+  space_synchronize_particle_positions(e->s);
+
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that all cells have been drifted to the current time. */
   space_check_drift_point(e->s, e->ti_current,
                           e->policy & engine_policy_self_gravity);
+  part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts,
+                    e->s->nr_gparts, e->s->nr_sparts, e->verbose);
 #endif
 
   if (e->verbose)
@@ -3804,7 +3913,7 @@ void engine_dump_snapshot(struct engine *e) {
                           e->policy & engine_policy_self_gravity);
 
   /* Be verbose about this */
-  message("writing snapshot at t=%e.", e->time);
+  if (e->nodeID == 0) message("writing snapshot at t=%e.", e->time);
 #else
   if (e->verbose) message("writing snapshot at t=%e.", e->time);
 #endif
diff --git a/src/equation_of_state.h b/src/equation_of_state.h
index 28c97c7b96b778c7bbb7bcbfb6ffe682ce54ba22..e51ed99519dc9c418e34789fcce95b5f28d69a99 100644
--- a/src/equation_of_state.h
+++ b/src/equation_of_state.h
@@ -275,7 +275,7 @@ gas_pressure_from_internal_energy(float density, float u) {
  */
 __attribute__((always_inline)) INLINE static float
 gas_internal_energy_from_pressure(float density, float pressure) {
-  return const_isothermal_energy;
+  return const_isothermal_internal_energy;
 }
 
 /**
diff --git a/src/gravity_properties.c b/src/gravity_properties.c
index 7b9b8cd7c35f8fa9b21ff34ce2589b5d45ce8393..b1098888b96cdef2205ed513e60a3799c63e8b9f 100644
--- a/src/gravity_properties.c
+++ b/src/gravity_properties.c
@@ -69,11 +69,9 @@ void gravity_props_print(const struct gravity_props *p) {
   message("Self-gravity softening:    epsilon=%.4f (Plummer equivalent: %.4f)",
           p->epsilon, p->epsilon / 3.);
 
-  if (p->a_smooth != gravity_props_default_a_smooth)
-    message("Self-gravity MM smoothing-scale: a_smooth=%f", p->a_smooth);
+  message("Self-gravity MM smoothing-scale: a_smooth=%f", p->a_smooth);
 
-  if (p->r_cut != gravity_props_default_r_cut)
-    message("Self-gravity MM cut-off: r_cut=%f", p->r_cut);
+  message("Self-gravity MM cut-off: r_cut=%f", p->r_cut);
 }
 
 #if defined(HAVE_HDF5)
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index 747c81a8e64c18a06b04160cfab326a3521c5901..91626749a89ede387547b6351dce59fa3569307a 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -293,7 +293,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
   p->force.h_dt = 0.0f;
 
   /* Reset maximal signal velocity */
-  p->force.v_sig = 0.0f;
+  p->force.v_sig = p->force.soundspeed;
 }
 
 /**
diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h
index 2e340a03b99ae51bc49a2e57456f4d6838d62f21..6d39c54d2ddc3571ac34c54fc9eede6f7dee6ac5 100644
--- a/src/hydro/Gizmo/hydro.h
+++ b/src/hydro/Gizmo/hydro.h
@@ -2,6 +2,7 @@
 /*******************************************************************************
  * This file is part of SWIFT.
  * Coypright (c) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *               2016, 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -24,9 +25,13 @@
 #include "equation_of_state.h"
 #include "hydro_gradients.h"
 #include "hydro_space.h"
+#include "hydro_unphysical.h"
+#include "hydro_velocities.h"
 #include "minmax.h"
 #include "riemann.h"
 
+//#define GIZMO_LLOYD_ITERATION
+
 /**
  * @brief Computes the hydro time-step of a given particle
  *
@@ -40,6 +45,10 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
 
   const float CFL_condition = hydro_properties->CFL_condition;
 
+#ifdef GIZMO_LLOYD_ITERATION
+  return CFL_condition;
+#endif
+
   if (p->timestepvars.vmax == 0.) {
     /* vmax can be zero in vacuum cells that only have vacuum neighbours */
     /* in this case, the time step should be limited by the maximally
@@ -47,7 +56,9 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
        the time step to a very large value */
     return FLT_MAX;
   } else {
-    return CFL_condition * p->h / fabsf(p->timestepvars.vmax);
+    const float psize = powf(p->geometry.volume / hydro_dimension_unit_sphere,
+                             hydro_dimension_inv);
+    return 2. * CFL_condition * psize / fabsf(p->timestepvars.vmax);
   }
 }
 
@@ -128,16 +139,27 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
                                  p->conserved.momentum[2] * p->primitives.v[2]);
 #endif
 
-#if defined(GIZMO_FIX_PARTICLES)
-  /* make sure the particles are initially at rest */
+#ifdef GIZMO_LLOYD_ITERATION
+  /* overwrite all variables to make sure they have safe values */
+  p->primitives.rho = 1.;
+  p->primitives.v[0] = 0.;
+  p->primitives.v[1] = 0.;
+  p->primitives.v[2] = 0.;
+  p->primitives.P = 1.;
+
+  p->conserved.mass = 1.;
+  p->conserved.momentum[0] = 0.;
+  p->conserved.momentum[1] = 0.;
+  p->conserved.momentum[2] = 0.;
+  p->conserved.energy = 1.;
+
   p->v[0] = 0.;
   p->v[1] = 0.;
   p->v[2] = 0.;
 #endif
 
-  xp->v_full[0] = p->v[0];
-  xp->v_full[1] = p->v[1];
-  xp->v_full[2] = p->v[2];
+  /* initialize the particle velocity based on the primitive fluid velocity */
+  hydro_velocities_init(p, xp);
 
   /* we cannot initialize wcorr in init_part, as init_part gets called every
      time the density loop is repeated, and the whole point of storing wcorr
@@ -169,6 +191,9 @@ __attribute__((always_inline)) INLINE static void hydro_init_part(
   p->geometry.matrix_E[2][0] = 0.0f;
   p->geometry.matrix_E[2][1] = 0.0f;
   p->geometry.matrix_E[2][2] = 0.0f;
+  p->geometry.centroid[0] = 0.0f;
+  p->geometry.centroid[1] = 0.0f;
+  p->geometry.centroid[2] = 0.0f;
   p->geometry.Atot = 0.0f;
 
   /* Set the active flag to active. */
@@ -226,6 +251,14 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
   p->geometry.matrix_E[2][1] = ihdim * p->geometry.matrix_E[2][1];
   p->geometry.matrix_E[2][2] = ihdim * p->geometry.matrix_E[2][2];
 
+  p->geometry.centroid[0] *= kernel_norm;
+  p->geometry.centroid[1] *= kernel_norm;
+  p->geometry.centroid[2] *= kernel_norm;
+
+  p->geometry.centroid[0] /= p->density.wcount;
+  p->geometry.centroid[1] /= p->density.wcount;
+  p->geometry.centroid[2] /= p->density.wcount;
+
   /* Check the condition number to see if we have a stable geometry. */
   float condition_number_E = 0.0f;
   int i, j;
@@ -249,12 +282,18 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
   float condition_number =
       hydro_dimension_inv * sqrtf(condition_number_E * condition_number_Einv);
 
-  if (condition_number > 100.0f) {
-    //    error("Condition number larger than 100!");
-    //    message("Condition number too large: %g (p->id: %llu)!",
-    //    condition_number, p->id);
+  if (condition_number > const_gizmo_max_condition_number &&
+      p->density.wcorr > const_gizmo_min_wcorr) {
+#ifdef GIZMO_PATHOLOGICAL_ERROR
+    error("Condition number larger than %g (%g)!",
+          const_gizmo_max_condition_number, condition_number);
+#endif
+#ifdef GIZMO_PATHOLOGICAL_WARNING
+    message("Condition number too large: %g (> %g, p->id: %llu)!",
+            condition_number, const_gizmo_max_condition_number, p->id);
+#endif
     /* add a correction to the number of neighbours for this particle */
-    p->density.wcorr *= 0.75;
+    p->density.wcorr *= const_gizmo_w_correction_factor;
   }
 
   hydro_gradients_init(p);
@@ -264,8 +303,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
   const float m = p->conserved.mass;
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (m == 0.) {
-    error("Mass is 0!");
+  if (m < 0.) {
+    error("Mass is negative!");
   }
 
   if (volume == 0.) {
@@ -278,15 +317,20 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
   momentum[1] = p->conserved.momentum[1];
   momentum[2] = p->conserved.momentum[2];
   p->primitives.rho = m / volume;
-  p->primitives.v[0] = momentum[0] / m;
-  p->primitives.v[1] = momentum[1] / m;
-  p->primitives.v[2] = momentum[2] / m;
+  if (m == 0.) {
+    p->primitives.v[0] = 0.;
+    p->primitives.v[1] = 0.;
+    p->primitives.v[2] = 0.;
+  } else {
+    p->primitives.v[0] = momentum[0] / m;
+    p->primitives.v[1] = momentum[1] / m;
+    p->primitives.v[2] = momentum[2] / m;
+  }
 
 #ifdef EOS_ISOTHERMAL_GAS
   /* although the pressure is not formally used anywhere if an isothermal eos
      has been selected, we still make sure it is set to the correct value */
-  p->primitives.P = const_isothermal_soundspeed * const_isothermal_soundspeed *
-                    p->primitives.rho;
+  p->primitives.P = gas_pressure_from_internal_energy(p->primitives.rho, 0.);
 #else
 
   float energy = p->conserved.energy;
@@ -304,12 +348,17 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
 #endif
 
   /* sanity checks */
-  /* it would probably be safer to throw a warning if netive densities or
-     pressures occur */
-  if (p->primitives.rho < 0.0f || p->primitives.P < 0.0f) {
-    p->primitives.rho = 0.0f;
-    p->primitives.P = 0.0f;
-  }
+  gizmo_check_physical_quantity("density", p->primitives.rho);
+  gizmo_check_physical_quantity("pressure", p->primitives.P);
+
+#ifdef GIZMO_LLOYD_ITERATION
+  /* overwrite primitive variables to make sure they still have safe values */
+  p->primitives.rho = 1.;
+  p->primitives.v[0] = 0.;
+  p->primitives.v[1] = 0.;
+  p->primitives.v[2] = 0.;
+  p->primitives.P = 1.;
+#endif
 
   /* Add a correction factor to wcount (to force a neighbour number increase if
      the geometry matrix is close to singular) */
@@ -330,8 +379,6 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
  *
  * @param p The particle to act upon.
  * @param xp The extended particle data to act upon.
- * @param ti_current Current integer time.
- * @param timeBase Conversion factor between integer time and physical time.
  */
 __attribute__((always_inline)) INLINE static void hydro_prepare_force(
     struct part* restrict p, struct xpart* restrict xp) {
@@ -340,10 +387,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
   p->timestepvars.vmax = 0.0f;
 
   /* Set the actual velocity of the particle */
-  /* if GIZMO_FIX_PARTICLES has been selected, v_full will always be zero */
-  p->force.v_full[0] = xp->v_full[0];
-  p->force.v_full[1] = xp->v_full[1];
-  p->force.v_full[2] = xp->v_full[2];
+  hydro_velocities_prepare_force(p, xp);
 }
 
 /**
@@ -364,6 +408,11 @@ __attribute__((always_inline)) INLINE static void hydro_end_gradient(
   p->gravity.mflux[0] = 0.0f;
   p->gravity.mflux[1] = 0.0f;
   p->gravity.mflux[2] = 0.0f;
+
+#ifdef GIZMO_LLOYD_ITERATION
+  /* reset the gradients to zero, as we don't want them */
+  hydro_gradients_init(p);
+#endif
 }
 
 /**
@@ -422,6 +471,10 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
     struct part* p, struct xpart* xp, float dt) {
 
+#ifdef GIZMO_LLOYD_ITERATION
+  return;
+#endif
+
   const float h_inv = 1.0f / p->h;
 
   /* Predict smoothing length */
@@ -432,8 +485,9 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
   else
     h_corr = expf(w1);
 
-  /* Limit the smoothing length correction. */
-  if (h_corr < 2.0f) {
+  /* Limit the smoothing length correction (and make sure it is always
+     positive). */
+  if (h_corr < 2.0f && h_corr > 0.) {
     p->h *= h_corr;
   }
 
@@ -483,22 +537,13 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
 
   /* set the variables that are used to drift the primitive variables */
 
-  /* Add normalization to h_dt. */
-  p->force.h_dt *= p->h * hydro_dimension_inv;
-
-  if (p->force.dt) {
+  if (p->force.dt > 0.) {
     p->du_dt = p->conserved.flux.energy / p->force.dt;
   } else {
     p->du_dt = 0.0f;
   }
 
-#if defined(GIZMO_FIX_PARTICLES)
-  p->du_dt = 0.0f;
-
-  /* disable the smoothing length update, since the smoothing lengths should
-     stay the same for all steps (particles don't move) */
-  p->force.h_dt = 0.0f;
-#endif
+  hydro_velocities_end_force(p);
 }
 
 /**
@@ -527,7 +572,12 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
   p->conserved.energy += p->conserved.flux.energy;
 #endif
 
+  gizmo_check_physical_quantity("mass", p->conserved.mass);
+  gizmo_check_physical_quantity("energy", p->conserved.energy);
+
 #ifdef SWIFT_DEBUG_CHECKS
+  /* Note that this check will only have effect if no GIZMO_UNPHYSICAL option
+     was selected. */
   if (p->conserved.mass < 0.) {
     error(
         "Negative mass after conserved variables update (mass: %g, dmass: %g)!",
@@ -535,7 +585,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
   }
 
   if (p->conserved.energy < 0.) {
-    error("Negative energy after conserved variables update!");
+    error(
+        "Negative energy after conserved variables update (energy: %g, "
+        "denergy: %g)!",
+        p->conserved.energy, p->conserved.flux.energy);
   }
 #endif
 
@@ -549,7 +602,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
     a_grav[2] = p->gpart->a_grav[2];
 
     /* Store the gravitational acceleration for later use. */
-    /* This is currently only used for output purposes. */
+    /* This is used for the prediction step. */
     p->gravity.old_a[0] = a_grav[0];
     p->gravity.old_a[1] = a_grav[1];
     p->gravity.old_a[2] = a_grav[2];
@@ -564,7 +617,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
     p->conserved.momentum[1] += dt * p->conserved.mass * a_grav[1];
     p->conserved.momentum[2] += dt * p->conserved.mass * a_grav[2];
 
-#if !defined(EOS_ISOTHERMAL_GAS) && defined(GIZMO_TOTAL_ENERGY)
+#if !defined(EOS_ISOTHERMAL_GAS)
     /* This part still needs to be tested! */
     p->conserved.energy += dt * (p->conserved.momentum[0] * a_grav[0] +
                                  p->conserved.momentum[1] * a_grav[1] +
@@ -585,45 +638,25 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
   p->conserved.flux.momentum[2] = 0.0f;
   p->conserved.flux.energy = 0.0f;
 
-#if defined(GIZMO_FIX_PARTICLES)
-  xp->v_full[0] = 0.;
-  xp->v_full[1] = 0.;
-  xp->v_full[2] = 0.;
-
-  p->v[0] = 0.;
-  p->v[1] = 0.;
-  p->v[2] = 0.;
-
-  if (p->gpart) {
-    p->gpart->v_full[0] = 0.;
-    p->gpart->v_full[1] = 0.;
-    p->gpart->v_full[2] = 0.;
-  }
-#else
-  /* Set particle movement */
-  if (p->conserved.mass > 0.) {
-    xp->v_full[0] = p->conserved.momentum[0] / p->conserved.mass;
-    xp->v_full[1] = p->conserved.momentum[1] / p->conserved.mass;
-    xp->v_full[2] = p->conserved.momentum[2] / p->conserved.mass;
-  } else {
-    /* vacuum particles don't move */
-    xp->v_full[0] = 0.;
-    xp->v_full[1] = 0.;
-    xp->v_full[2] = 0.;
-  }
+  hydro_velocities_set(p, xp);
+
+#ifdef GIZMO_LLOYD_ITERATION
+  /* reset conserved variables to safe values */
+  p->conserved.mass = 1.;
+  p->conserved.momentum[0] = 0.;
+  p->conserved.momentum[1] = 0.;
+  p->conserved.momentum[2] = 0.;
+  p->conserved.energy = 1.;
+
+  /* set the particle velocities to the Lloyd velocities */
+  /* note that centroid is the relative position of the centroid w.r.t. the
+     particle position (position - centroid) */
+  xp->v_full[0] = -p->geometry.centroid[0] / p->force.dt;
+  xp->v_full[1] = -p->geometry.centroid[1] / p->force.dt;
+  xp->v_full[2] = -p->geometry.centroid[2] / p->force.dt;
   p->v[0] = xp->v_full[0];
   p->v[1] = xp->v_full[1];
   p->v[2] = xp->v_full[2];
-
-  /* Update gpart! */
-  /* This is essential, as the gpart drift is done independently from the part
-     drift, and we don't want the gpart and the part to have different
-     positions! */
-  if (p->gpart) {
-    p->gpart->v_full[0] = xp->v_full[0];
-    p->gpart->v_full[1] = xp->v_full[1];
-    p->gpart->v_full[2] = xp->v_full[2];
-  }
 #endif
 
   /* reset wcorr */
diff --git a/src/hydro/Gizmo/hydro_gradients.h b/src/hydro/Gizmo/hydro_gradients.h
index a5c1e9038d0d3de6896afe773e3193a2304a6b6b..5ad6d87619a7629a703a8b9c03d089e69ffbdf7d 100644
--- a/src/hydro/Gizmo/hydro_gradients.h
+++ b/src/hydro/Gizmo/hydro_gradients.h
@@ -22,6 +22,7 @@
 #define SWIFT_HYDRO_GRADIENTS_H
 
 #include "hydro_slope_limiters.h"
+#include "hydro_unphysical.h"
 #include "riemann.h"
 
 #if defined(GRADIENTS_SPH)
@@ -98,6 +99,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
   float xij_j[3];
   int k;
   float xfac;
+  float a_grav_i[3], a_grav_j[3];
 
   /* perform gradient reconstruction in space and time */
   /* space */
@@ -139,37 +141,38 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
            pj->primitives.gradients.P[1] * xij_j[1] +
            pj->primitives.gradients.P[2] * xij_j[2];
 
+  a_grav_i[0] = pi->gravity.old_a[0];
+  a_grav_i[1] = pi->gravity.old_a[1];
+  a_grav_i[2] = pi->gravity.old_a[2];
+
+  a_grav_i[0] += pi->gravity.grad_a[0][0] * xij_i[0] +
+                 pi->gravity.grad_a[0][1] * xij_i[1] +
+                 pi->gravity.grad_a[0][2] * xij_i[2];
+  a_grav_i[1] += pi->gravity.grad_a[1][0] * xij_i[0] +
+                 pi->gravity.grad_a[1][1] * xij_i[1] +
+                 pi->gravity.grad_a[1][2] * xij_i[2];
+  a_grav_i[2] += pi->gravity.grad_a[2][0] * xij_i[0] +
+                 pi->gravity.grad_a[2][1] * xij_i[1] +
+                 pi->gravity.grad_a[2][2] * xij_i[2];
+
+  a_grav_j[0] = pj->gravity.old_a[0];
+  a_grav_j[1] = pj->gravity.old_a[1];
+  a_grav_j[2] = pj->gravity.old_a[2];
+
+  a_grav_j[0] += pj->gravity.grad_a[0][0] * xij_j[0] +
+                 pj->gravity.grad_a[0][1] * xij_j[1] +
+                 pj->gravity.grad_a[0][2] * xij_j[2];
+  a_grav_j[1] += pj->gravity.grad_a[1][0] * xij_j[0] +
+                 pj->gravity.grad_a[1][1] * xij_j[1] +
+                 pj->gravity.grad_a[1][2] * xij_j[2];
+  a_grav_j[2] += pj->gravity.grad_a[2][0] * xij_j[0] +
+                 pj->gravity.grad_a[2][1] * xij_j[1] +
+                 pj->gravity.grad_a[2][2] * xij_j[2];
+
   hydro_slope_limit_face(Wi, Wj, dWi, dWj, xij_i, xij_j, r);
 
   /* time */
   if (Wi[0] > 0.0f) {
-#ifdef EOS_ISOTHERMAL_GAS
-    dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] +
-                             Wi[2] * pi->primitives.gradients.rho[1] +
-                             Wi[3] * pi->primitives.gradients.rho[2] +
-                             Wi[0] * (pi->primitives.gradients.v[0][0] +
-                                      pi->primitives.gradients.v[1][1] +
-                                      pi->primitives.gradients.v[2][2]));
-    dWi[1] -= 0.5 * mindt *
-              (Wi[1] * pi->primitives.gradients.v[0][0] +
-               Wi[2] * pi->primitives.gradients.v[0][1] +
-               Wi[3] * pi->primitives.gradients.v[0][2] +
-               const_isothermal_soundspeed * const_isothermal_soundspeed *
-                   pi->primitives.gradients.rho[0] / Wi[0]);
-    dWi[2] -= 0.5 * mindt *
-              (Wi[1] * pi->primitives.gradients.v[1][0] +
-               Wi[2] * pi->primitives.gradients.v[1][1] +
-               Wi[3] * pi->primitives.gradients.v[1][2] +
-               const_isothermal_soundspeed * const_isothermal_soundspeed *
-                   pi->primitives.gradients.rho[1] / Wi[0]);
-    dWi[3] -= 0.5 * mindt *
-              (Wi[1] * pi->primitives.gradients.v[2][0] +
-               Wi[2] * pi->primitives.gradients.v[2][1] +
-               Wi[3] * pi->primitives.gradients.v[2][2] +
-               const_isothermal_soundspeed * const_isothermal_soundspeed *
-                   pi->primitives.gradients.rho[2] / Wi[0]);
-/* we don't care about P in this case */
-#else
     dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] +
                              Wi[2] * pi->primitives.gradients.rho[1] +
                              Wi[3] * pi->primitives.gradients.rho[2] +
@@ -195,36 +198,13 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
                hydro_gamma * Wi[4] * (pi->primitives.gradients.v[0][0] +
                                       pi->primitives.gradients.v[1][1] +
                                       pi->primitives.gradients.v[2][2]));
-#endif
+
+    dWi[1] += 0.5 * mindt * a_grav_i[0];
+    dWi[2] += 0.5 * mindt * a_grav_i[1];
+    dWi[3] += 0.5 * mindt * a_grav_i[2];
   }
 
   if (Wj[0] > 0.0f) {
-#ifdef EOS_ISOTHERMAL_GAS
-    dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] +
-                             Wj[2] * pj->primitives.gradients.rho[1] +
-                             Wj[3] * pj->primitives.gradients.rho[2] +
-                             Wj[0] * (pj->primitives.gradients.v[0][0] +
-                                      pj->primitives.gradients.v[1][1] +
-                                      pj->primitives.gradients.v[2][2]));
-    dWj[1] -= 0.5 * mindt *
-              (Wj[1] * pj->primitives.gradients.v[0][0] +
-               Wj[2] * pj->primitives.gradients.v[0][1] +
-               Wj[3] * pj->primitives.gradients.v[0][2] +
-               const_isothermal_soundspeed * const_isothermal_soundspeed *
-                   pj->primitives.gradients.rho[0] / Wj[0]);
-    dWj[2] -= 0.5 * mindt *
-              (Wj[1] * pj->primitives.gradients.v[1][0] +
-               Wj[2] * pj->primitives.gradients.v[1][1] +
-               Wj[3] * pj->primitives.gradients.v[1][2] +
-               const_isothermal_soundspeed * const_isothermal_soundspeed *
-                   pj->primitives.gradients.rho[1] / Wj[0]);
-    dWj[3] -= 0.5 * mindt *
-              (Wj[1] * pj->primitives.gradients.v[2][0] +
-               Wj[2] * pj->primitives.gradients.v[2][1] +
-               Wj[3] * pj->primitives.gradients.v[2][2] +
-               const_isothermal_soundspeed * const_isothermal_soundspeed *
-                   pj->primitives.gradients.rho[2] / Wj[0]);
-#else
     dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] +
                              Wj[2] * pj->primitives.gradients.rho[1] +
                              Wj[3] * pj->primitives.gradients.rho[2] +
@@ -250,36 +230,28 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict(
                hydro_gamma * Wj[4] * (pj->primitives.gradients.v[0][0] +
                                       pj->primitives.gradients.v[1][1] +
                                       pj->primitives.gradients.v[2][2]));
-#endif
-  }
 
-  if (-dWi[0] > Wi[0]) {
-    Wi[0] = 0.0f;
-  } else {
-    Wi[0] += dWi[0];
+    dWj[1] += 0.5 * mindt * a_grav_j[0];
+    dWj[2] += 0.5 * mindt * a_grav_j[1];
+    dWj[3] += 0.5 * mindt * a_grav_j[2];
   }
+
+  Wi[0] += dWi[0];
   Wi[1] += dWi[1];
   Wi[2] += dWi[2];
   Wi[3] += dWi[3];
-  if (-dWi[4] > Wi[4]) {
-    Wi[4] = 0.0f;
-  } else {
-    Wi[4] += dWi[4];
-  }
+  Wi[4] += dWi[4];
 
-  if (-dWj[0] > Wj[0]) {
-    Wj[0] = 0.0f;
-  } else {
-    Wj[0] += dWj[0];
-  }
+  Wj[0] += dWj[0];
   Wj[1] += dWj[1];
   Wj[2] += dWj[2];
   Wj[3] += dWj[3];
-  if (-dWj[4] > Wj[4]) {
-    Wj[4] = 0.0f;
-  } else {
-    Wj[4] += dWj[4];
-  }
+  Wj[4] += dWj[4];
+
+  gizmo_check_physical_quantity("density", Wi[0]);
+  gizmo_check_physical_quantity("pressure", Wi[4]);
+  gizmo_check_physical_quantity("density", Wj[0]);
+  gizmo_check_physical_quantity("pressure", Wj[4]);
 }
 
 #endif  // SWIFT_HYDRO_GRADIENTS_H
diff --git a/src/hydro/Gizmo/hydro_gradients_gizmo.h b/src/hydro/Gizmo/hydro_gradients_gizmo.h
index aa6e4406b94e7a5cafcd0ca556162476003477de..ee3ad6919f81f042ceacc5db8b4e818d63c90266 100644
--- a/src/hydro/Gizmo/hydro_gradients_gizmo.h
+++ b/src/hydro/Gizmo/hydro_gradients_gizmo.h
@@ -45,6 +45,18 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_init(
   p->primitives.gradients.P[1] = 0.0f;
   p->primitives.gradients.P[2] = 0.0f;
 
+  p->gravity.grad_a[0][0] = 0.0f;
+  p->gravity.grad_a[0][1] = 0.0f;
+  p->gravity.grad_a[0][2] = 0.0f;
+
+  p->gravity.grad_a[1][0] = 0.0f;
+  p->gravity.grad_a[1][1] = 0.0f;
+  p->gravity.grad_a[1][2] = 0.0f;
+
+  p->gravity.grad_a[2][0] = 0.0f;
+  p->gravity.grad_a[2][1] = 0.0f;
+  p->gravity.grad_a[2][2] = 0.0f;
+
   hydro_slope_limit_cell_init(p);
 }
 
@@ -93,56 +105,146 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
   xi = r * hi_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
-  /* Compute gradients for pi */
-  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
-   * definition of dx */
-  pi->primitives.gradients.rho[0] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.rho[1] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.rho[2] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.v[0][0] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[0][1] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[0][2] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[1][0] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[1][1] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[1][2] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[2][0] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[2][1] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[2][2] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.P[0] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.P[1] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.P[2] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  if (pi->density.wcorr > const_gizmo_min_wcorr) {
+    /* Compute gradients for pi */
+    /* there is a sign difference w.r.t. eqn. (6) because of the inverse
+     * definition of dx */
+    pi->primitives.gradients.rho[0] +=
+        (Wi[0] - Wj[0]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.rho[1] +=
+        (Wi[0] - Wj[0]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.rho[2] +=
+        (Wi[0] - Wj[0]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->primitives.gradients.v[0][0] +=
+        (Wi[1] - Wj[1]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.v[0][1] +=
+        (Wi[1] - Wj[1]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.v[0][2] +=
+        (Wi[1] - Wj[1]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+    pi->primitives.gradients.v[1][0] +=
+        (Wi[2] - Wj[2]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.v[1][1] +=
+        (Wi[2] - Wj[2]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.v[1][2] +=
+        (Wi[2] - Wj[2]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+    pi->primitives.gradients.v[2][0] +=
+        (Wi[3] - Wj[3]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.v[2][1] +=
+        (Wi[3] - Wj[3]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.v[2][2] +=
+        (Wi[3] - Wj[3]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->primitives.gradients.P[0] +=
+        (Wi[4] - Wj[4]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.P[1] +=
+        (Wi[4] - Wj[4]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.P[2] +=
+        (Wi[4] - Wj[4]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->gravity.grad_a[0][0] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->gravity.grad_a[0][1] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->gravity.grad_a[0][2] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->gravity.grad_a[1][0] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->gravity.grad_a[1][1] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->gravity.grad_a[1][2] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->gravity.grad_a[2][0] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->gravity.grad_a[2][1] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->gravity.grad_a[2][2] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  } else {
+    /* The gradient matrix was not well-behaved, switch to SPH gradients */
+
+    pi->primitives.gradients.rho[0] -=
+        wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+    pi->primitives.gradients.rho[1] -=
+        wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+    pi->primitives.gradients.rho[2] -=
+        wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+    pi->primitives.gradients.v[0][0] -=
+        wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pi->primitives.gradients.v[0][1] -=
+        wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pi->primitives.gradients.v[0][2] -=
+        wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+    pi->primitives.gradients.v[1][0] -=
+        wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pi->primitives.gradients.v[1][1] -=
+        wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pi->primitives.gradients.v[1][2] -=
+        wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+    pi->primitives.gradients.v[2][0] -=
+        wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+    pi->primitives.gradients.v[2][1] -=
+        wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+    pi->primitives.gradients.v[2][2] -=
+        wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+    pi->primitives.gradients.P[0] -=
+        wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+    pi->primitives.gradients.P[1] -=
+        wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+    pi->primitives.gradients.P[2] -=
+        wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+    pi->gravity.grad_a[0][0] -=
+        wi_dx * dx[0] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+    pi->gravity.grad_a[0][1] -=
+        wi_dx * dx[1] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+    pi->gravity.grad_a[0][2] -=
+        wi_dx * dx[2] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+
+    pi->gravity.grad_a[1][0] -=
+        wi_dx * dx[0] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+    pi->gravity.grad_a[1][1] -=
+        wi_dx * dx[1] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+    pi->gravity.grad_a[1][2] -=
+        wi_dx * dx[2] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+
+    pi->gravity.grad_a[2][0] -=
+        wi_dx * dx[0] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+    pi->gravity.grad_a[2][1] -=
+        wi_dx * dx[1] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+    pi->gravity.grad_a[2][2] -=
+        wi_dx * dx[2] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+  }
 
   hydro_slope_limit_cell_collect(pi, pj, r);
 
@@ -151,57 +253,146 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect(
   xj = r * hj_inv;
   kernel_deval(xj, &wj, &wj_dx);
 
-  /* Compute gradients for pj */
-  /* there is no sign difference w.r.t. eqn. (6) because dx is now what we
-   * want
-   * it to be */
-  pj->primitives.gradients.rho[0] +=
-      (Wi[0] - Wj[0]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.rho[1] +=
-      (Wi[0] - Wj[0]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.rho[2] +=
-      (Wi[0] - Wj[0]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-
-  pj->primitives.gradients.v[0][0] +=
-      (Wi[1] - Wj[1]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.v[0][1] +=
-      (Wi[1] - Wj[1]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.v[0][2] +=
-      (Wi[1] - Wj[1]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-  pj->primitives.gradients.v[1][0] +=
-      (Wi[2] - Wj[2]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.v[1][1] +=
-      (Wi[2] - Wj[2]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.v[1][2] +=
-      (Wi[2] - Wj[2]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-  pj->primitives.gradients.v[2][0] +=
-      (Wi[3] - Wj[3]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.v[2][1] +=
-      (Wi[3] - Wj[3]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.v[2][2] +=
-      (Wi[3] - Wj[3]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
-
-  pj->primitives.gradients.P[0] +=
-      (Wi[4] - Wj[4]) * wj *
-      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
-  pj->primitives.gradients.P[1] +=
-      (Wi[4] - Wj[4]) * wj *
-      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
-  pj->primitives.gradients.P[2] +=
-      (Wi[4] - Wj[4]) * wj *
-      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+  if (pj->density.wcorr > const_gizmo_min_wcorr) {
+    /* Compute gradients for pj */
+    /* there is no sign difference w.r.t. eqn. (6) because dx is now what we
+     * want
+     * it to be */
+    pj->primitives.gradients.rho[0] +=
+        (Wi[0] - Wj[0]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->primitives.gradients.rho[1] +=
+        (Wi[0] - Wj[0]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->primitives.gradients.rho[2] +=
+        (Wi[0] - Wj[0]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+    pj->primitives.gradients.v[0][0] +=
+        (Wi[1] - Wj[1]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->primitives.gradients.v[0][1] +=
+        (Wi[1] - Wj[1]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->primitives.gradients.v[0][2] +=
+        (Wi[1] - Wj[1]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+    pj->primitives.gradients.v[1][0] +=
+        (Wi[2] - Wj[2]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->primitives.gradients.v[1][1] +=
+        (Wi[2] - Wj[2]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->primitives.gradients.v[1][2] +=
+        (Wi[2] - Wj[2]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+    pj->primitives.gradients.v[2][0] +=
+        (Wi[3] - Wj[3]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->primitives.gradients.v[2][1] +=
+        (Wi[3] - Wj[3]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->primitives.gradients.v[2][2] +=
+        (Wi[3] - Wj[3]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+    pj->primitives.gradients.P[0] +=
+        (Wi[4] - Wj[4]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->primitives.gradients.P[1] +=
+        (Wi[4] - Wj[4]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->primitives.gradients.P[2] +=
+        (Wi[4] - Wj[4]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+    pj->gravity.grad_a[0][0] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->gravity.grad_a[0][1] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->gravity.grad_a[0][2] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+    pj->gravity.grad_a[1][0] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->gravity.grad_a[1][1] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->gravity.grad_a[1][2] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+    pj->gravity.grad_a[2][0] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wj *
+        (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+    pj->gravity.grad_a[2][1] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wj *
+        (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+    pj->gravity.grad_a[2][2] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wj *
+        (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+  } else {
+    /* SPH gradients */
+
+    pj->primitives.gradients.rho[0] -=
+        wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+    pj->primitives.gradients.rho[1] -=
+        wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+    pj->primitives.gradients.rho[2] -=
+        wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+    pj->primitives.gradients.v[0][0] -=
+        wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pj->primitives.gradients.v[0][1] -=
+        wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pj->primitives.gradients.v[0][2] -=
+        wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+    pj->primitives.gradients.v[1][0] -=
+        wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pj->primitives.gradients.v[1][1] -=
+        wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pj->primitives.gradients.v[1][2] -=
+        wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pj->primitives.gradients.v[2][0] -=
+        wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+    pj->primitives.gradients.v[2][1] -=
+        wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+    pj->primitives.gradients.v[2][2] -=
+        wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+    pj->primitives.gradients.P[0] -=
+        wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+    pj->primitives.gradients.P[1] -=
+        wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+    pj->primitives.gradients.P[2] -=
+        wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+    pj->gravity.grad_a[0][0] -=
+        wj_dx * dx[0] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+    pj->gravity.grad_a[0][1] -=
+        wj_dx * dx[1] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+    pj->gravity.grad_a[0][2] -=
+        wj_dx * dx[2] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+
+    pj->gravity.grad_a[1][0] -=
+        wj_dx * dx[0] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+    pj->gravity.grad_a[1][1] -=
+        wj_dx * dx[1] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+    pj->gravity.grad_a[1][2] -=
+        wj_dx * dx[2] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+
+    pj->gravity.grad_a[2][0] -=
+        wj_dx * dx[0] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+    pj->gravity.grad_a[2][1] -=
+        wj_dx * dx[1] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+    pj->gravity.grad_a[2][2] -=
+        wj_dx * dx[2] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+  }
 
   hydro_slope_limit_cell_collect(pj, pi, r);
 }
@@ -250,56 +441,145 @@ hydro_gradients_nonsym_collect(float r2, float *dx, float hi, float hj,
   xi = r * hi_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
-  /* Compute gradients for pi */
-  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
-   * definition of dx */
-  pi->primitives.gradients.rho[0] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.rho[1] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.rho[2] +=
-      (Wi[0] - Wj[0]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.v[0][0] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[0][1] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[0][2] +=
-      (Wi[1] - Wj[1]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[1][0] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[1][1] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[1][2] +=
-      (Wi[2] - Wj[2]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-  pi->primitives.gradients.v[2][0] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.v[2][1] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.v[2][2] +=
-      (Wi[3] - Wj[3]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
-
-  pi->primitives.gradients.P[0] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
-  pi->primitives.gradients.P[1] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
-  pi->primitives.gradients.P[2] +=
-      (Wi[4] - Wj[4]) * wi *
-      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  if (pi->density.wcorr > const_gizmo_min_wcorr) {
+    /* Compute gradients for pi */
+    /* there is a sign difference w.r.t. eqn. (6) because of the inverse
+     * definition of dx */
+    pi->primitives.gradients.rho[0] +=
+        (Wi[0] - Wj[0]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.rho[1] +=
+        (Wi[0] - Wj[0]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.rho[2] +=
+        (Wi[0] - Wj[0]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->primitives.gradients.v[0][0] +=
+        (Wi[1] - Wj[1]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.v[0][1] +=
+        (Wi[1] - Wj[1]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.v[0][2] +=
+        (Wi[1] - Wj[1]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+    pi->primitives.gradients.v[1][0] +=
+        (Wi[2] - Wj[2]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.v[1][1] +=
+        (Wi[2] - Wj[2]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.v[1][2] +=
+        (Wi[2] - Wj[2]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+    pi->primitives.gradients.v[2][0] +=
+        (Wi[3] - Wj[3]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.v[2][1] +=
+        (Wi[3] - Wj[3]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.v[2][2] +=
+        (Wi[3] - Wj[3]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->primitives.gradients.P[0] +=
+        (Wi[4] - Wj[4]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->primitives.gradients.P[1] +=
+        (Wi[4] - Wj[4]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->primitives.gradients.P[2] +=
+        (Wi[4] - Wj[4]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->gravity.grad_a[0][0] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->gravity.grad_a[0][1] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->gravity.grad_a[0][2] +=
+        (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->gravity.grad_a[1][0] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->gravity.grad_a[1][1] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->gravity.grad_a[1][2] +=
+        (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+    pi->gravity.grad_a[2][0] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi *
+        (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+    pi->gravity.grad_a[2][1] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi *
+        (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+    pi->gravity.grad_a[2][2] +=
+        (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi *
+        (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  } else {
+    /* Gradient matrix is not well-behaved, switch to SPH gradients */
+
+    pi->primitives.gradients.rho[0] -=
+        wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+    pi->primitives.gradients.rho[1] -=
+        wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+    pi->primitives.gradients.rho[2] -=
+        wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+    pi->primitives.gradients.v[0][0] -=
+        wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pi->primitives.gradients.v[0][1] -=
+        wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pi->primitives.gradients.v[0][2] -=
+        wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+    pi->primitives.gradients.v[1][0] -=
+        wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pi->primitives.gradients.v[1][1] -=
+        wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+    pi->primitives.gradients.v[1][2] -=
+        wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+    pi->primitives.gradients.v[2][0] -=
+        wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+    pi->primitives.gradients.v[2][1] -=
+        wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+    pi->primitives.gradients.v[2][2] -=
+        wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+    pi->primitives.gradients.P[0] -=
+        wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+    pi->primitives.gradients.P[1] -=
+        wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+    pi->primitives.gradients.P[2] -=
+        wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+    pi->gravity.grad_a[0][0] -=
+        wi_dx * dx[0] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+    pi->gravity.grad_a[0][1] -=
+        wi_dx * dx[1] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+    pi->gravity.grad_a[0][2] -=
+        wi_dx * dx[2] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r;
+
+    pi->gravity.grad_a[1][0] -=
+        wi_dx * dx[0] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+    pi->gravity.grad_a[1][1] -=
+        wi_dx * dx[1] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+    pi->gravity.grad_a[1][2] -=
+        wi_dx * dx[2] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r;
+
+    pi->gravity.grad_a[2][0] -=
+        wi_dx * dx[0] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+    pi->gravity.grad_a[2][1] -=
+        wi_dx * dx[1] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+    pi->gravity.grad_a[2][2] -=
+        wi_dx * dx[2] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r;
+  }
 
   hydro_slope_limit_cell_collect(pi, pj, r);
 }
@@ -319,23 +599,73 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_finalize(
   ih = 1.0f / h;
   const float ihdim = pow_dimension(ih);
 
-  p->primitives.gradients.rho[0] *= ihdim;
-  p->primitives.gradients.rho[1] *= ihdim;
-  p->primitives.gradients.rho[2] *= ihdim;
-
-  p->primitives.gradients.v[0][0] *= ihdim;
-  p->primitives.gradients.v[0][1] *= ihdim;
-  p->primitives.gradients.v[0][2] *= ihdim;
-  p->primitives.gradients.v[1][0] *= ihdim;
-  p->primitives.gradients.v[1][1] *= ihdim;
-  p->primitives.gradients.v[1][2] *= ihdim;
-  p->primitives.gradients.v[2][0] *= ihdim;
-  p->primitives.gradients.v[2][1] *= ihdim;
-  p->primitives.gradients.v[2][2] *= ihdim;
-
-  p->primitives.gradients.P[0] *= ihdim;
-  p->primitives.gradients.P[1] *= ihdim;
-  p->primitives.gradients.P[2] *= ihdim;
+  if (p->density.wcorr > const_gizmo_min_wcorr) {
+    p->primitives.gradients.rho[0] *= ihdim;
+    p->primitives.gradients.rho[1] *= ihdim;
+    p->primitives.gradients.rho[2] *= ihdim;
+
+    p->primitives.gradients.v[0][0] *= ihdim;
+    p->primitives.gradients.v[0][1] *= ihdim;
+    p->primitives.gradients.v[0][2] *= ihdim;
+    p->primitives.gradients.v[1][0] *= ihdim;
+    p->primitives.gradients.v[1][1] *= ihdim;
+    p->primitives.gradients.v[1][2] *= ihdim;
+    p->primitives.gradients.v[2][0] *= ihdim;
+    p->primitives.gradients.v[2][1] *= ihdim;
+    p->primitives.gradients.v[2][2] *= ihdim;
+
+    p->primitives.gradients.P[0] *= ihdim;
+    p->primitives.gradients.P[1] *= ihdim;
+    p->primitives.gradients.P[2] *= ihdim;
+
+    p->gravity.grad_a[0][0] *= ihdim;
+    p->gravity.grad_a[0][1] *= ihdim;
+    p->gravity.grad_a[0][2] *= ihdim;
+
+    p->gravity.grad_a[1][0] *= ihdim;
+    p->gravity.grad_a[1][1] *= ihdim;
+    p->gravity.grad_a[1][2] *= ihdim;
+
+    p->gravity.grad_a[2][0] *= ihdim;
+    p->gravity.grad_a[2][1] *= ihdim;
+    p->gravity.grad_a[2][2] *= ihdim;
+  } else {
+    const float ihdimp1 = pow_dimension_plus_one(ih);
+
+    float volume = p->geometry.volume;
+
+    /* finalize gradients by multiplying with volume */
+    p->primitives.gradients.rho[0] *= ihdimp1 * volume;
+    p->primitives.gradients.rho[1] *= ihdimp1 * volume;
+    p->primitives.gradients.rho[2] *= ihdimp1 * volume;
+
+    p->primitives.gradients.v[0][0] *= ihdimp1 * volume;
+    p->primitives.gradients.v[0][1] *= ihdimp1 * volume;
+    p->primitives.gradients.v[0][2] *= ihdimp1 * volume;
+
+    p->primitives.gradients.v[1][0] *= ihdimp1 * volume;
+    p->primitives.gradients.v[1][1] *= ihdimp1 * volume;
+    p->primitives.gradients.v[1][2] *= ihdimp1 * volume;
+    p->primitives.gradients.v[2][0] *= ihdimp1 * volume;
+    p->primitives.gradients.v[2][1] *= ihdimp1 * volume;
+    p->primitives.gradients.v[2][2] *= ihdimp1 * volume;
+
+    p->primitives.gradients.P[0] *= ihdimp1 * volume;
+    p->primitives.gradients.P[1] *= ihdimp1 * volume;
+    p->primitives.gradients.P[2] *= ihdimp1 * volume;
+
+    p->gravity.grad_a[0][0] *= ihdimp1 * volume;
+    p->gravity.grad_a[0][1] *= ihdimp1 * volume;
+    p->gravity.grad_a[0][2] *= ihdimp1 * volume;
+
+    p->gravity.grad_a[1][0] *= ihdimp1 * volume;
+    p->gravity.grad_a[1][1] *= ihdimp1 * volume;
+    p->gravity.grad_a[1][2] *= ihdimp1 * volume;
+
+    p->gravity.grad_a[2][0] *= ihdimp1 * volume;
+    p->gravity.grad_a[2][1] *= ihdimp1 * volume;
+    p->gravity.grad_a[2][2] *= ihdimp1 * volume;
+  }
 
   hydro_slope_limit_cell(p);
 }
diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h
index d707e0ee1b5707086393ea206ea9f0f60f9c1853..8798dc859a790a83ab7a3b6f1709b1302f574581 100644
--- a/src/hydro/Gizmo/hydro_iact.h
+++ b/src/hydro/Gizmo/hydro_iact.h
@@ -23,6 +23,8 @@
 #include "hydro_gradients.h"
 #include "riemann.h"
 
+#define GIZMO_VOLUME_CORRECTION
+
 /**
  * @brief Calculate the volume interaction between particle i and particle j
  *
@@ -62,6 +64,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
   for (k = 0; k < 3; k++)
     for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi;
 
+  pi->geometry.centroid[0] -= dx[0] * wi;
+  pi->geometry.centroid[1] -= dx[1] * wi;
+  pi->geometry.centroid[2] -= dx[2] * wi;
+
   /* Compute density of pj. */
   h_inv = 1.0 / hj;
   xj = r * h_inv;
@@ -74,6 +80,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
   pj->geometry.volume += wj;
   for (k = 0; k < 3; k++)
     for (l = 0; l < 3; l++) pj->geometry.matrix_E[k][l] += dx[k] * dx[l] * wj;
+
+  pj->geometry.centroid[0] += dx[0] * wj;
+  pj->geometry.centroid[1] += dx[1] * wj;
+  pj->geometry.centroid[2] += dx[2] * wj;
 }
 
 /**
@@ -117,6 +127,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
   pi->geometry.volume += wi;
   for (k = 0; k < 3; k++)
     for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi;
+
+  pi->geometry.centroid[0] -= dx[0] * wi;
+  pi->geometry.centroid[1] -= dx[1] * wi;
+  pi->geometry.centroid[2] -= dx[2] * wi;
 }
 
 /**
@@ -325,14 +339,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
 
   /* calculate the maximal signal velocity */
   if (Wi[0] > 0.0f && Wj[0] > 0.0f) {
-#ifdef EOS_ISOTHERMAL_GAS
-    /* we use a value that is slightly higher than necessary, since the correct
-       value does not always work */
-    vmax = 2.5 * const_isothermal_soundspeed;
-#else
     vmax =
         sqrtf(hydro_gamma * Wi[4] / Wi[0]) + sqrtf(hydro_gamma * Wj[4] / Wj[0]);
-#endif
   } else {
     vmax = 0.0f;
   }
@@ -381,23 +389,63 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   /* Compute area */
   /* eqn. (7) */
   Anorm = 0.0f;
-  for (k = 0; k < 3; k++) {
-    /* we add a minus sign since dx is pi->x - pj->x */
-    A[k] = -Vi * (Bi[k][0] * dx[0] + Bi[k][1] * dx[1] + Bi[k][2] * dx[2]) * wi *
-               hi_inv_dim -
-           Vj * (Bj[k][0] * dx[0] + Bj[k][1] * dx[1] + Bj[k][2] * dx[2]) * wj *
-               hj_inv_dim;
-    Anorm += A[k] * A[k];
+  if (pi->density.wcorr > const_gizmo_min_wcorr &&
+      pj->density.wcorr > const_gizmo_min_wcorr) {
+    /* in principle, we use Vi and Vj as weights for the left and right
+       contributions to the generalized surface vector.
+       However, if Vi and Vj are very different (because they have very
+       different
+       smoothing lengths), then the expressions below are more stable. */
+    float Xi = Vi;
+    float Xj = Vj;
+#ifdef GIZMO_VOLUME_CORRECTION
+    if (fabsf(Vi - Vj) / fminf(Vi, Vj) > 1.5 * hydro_dimension) {
+      Xi = (Vi * hj + Vj * hi) / (hi + hj);
+      Xj = Xi;
+    }
+#endif
+    for (k = 0; k < 3; k++) {
+      /* we add a minus sign since dx is pi->x - pj->x */
+      A[k] = -Xi * (Bi[k][0] * dx[0] + Bi[k][1] * dx[1] + Bi[k][2] * dx[2]) *
+                 wj * hj_inv_dim -
+             Xj * (Bj[k][0] * dx[0] + Bj[k][1] * dx[1] + Bj[k][2] * dx[2]) *
+                 wi * hi_inv_dim;
+      Anorm += A[k] * A[k];
+    }
+  } else {
+    /* ill condition gradient matrix: revert to SPH face area */
+    Anorm = -(hidp1 * Vi * Vi * wi_dx + hjdp1 * Vj * Vj * wj_dx) * ri;
+    A[0] = -Anorm * dx[0];
+    A[1] = -Anorm * dx[1];
+    A[2] = -Anorm * dx[2];
+    Anorm *= Anorm * r2;
   }
 
-  if (!Anorm) {
+  if (Anorm == 0.) {
     /* if the interface has no area, nothing happens and we return */
     /* continuing results in dividing by zero and NaN's... */
     return;
   }
 
-  /* compute the normal vector of the interface */
   Anorm = sqrtf(Anorm);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* For stability reasons, we do require A and dx to have opposite
+     directions (basically meaning that the surface normal for the surface
+     always points from particle i to particle j, as it would in a real
+     moving-mesh code). If not, our scheme is no longer upwind and hence can
+     become unstable. */
+  float dA_dot_dx = A[0] * dx[0] + A[1] * dx[1] + A[2] * dx[2];
+  /* In GIZMO, Phil Hopkins reverts to an SPH integration scheme if this
+     happens. We curently just ignore this case and display a message. */
+  const float rdim = pow_dimension(r);
+  if (dA_dot_dx > 1.e-6 * rdim) {
+    message("Ill conditioned gradient matrix (%g %g %g %g %g)!", dA_dot_dx,
+            Anorm, Vi, Vj, r);
+  }
+#endif
+
+  /* compute the normal vector of the interface */
   for (k = 0; k < 3; k++) n_unit[k] = A[k] / Anorm;
 
   /* Compute interface position (relative to pi, since we don't need the actual
@@ -436,43 +484,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
   /* we don't need to rotate, we can use the unit vector in the Riemann problem
    * itself (see GIZMO) */
 
-  if (Wi[0] < 0.0f || Wj[0] < 0.0f || Wi[4] < 0.0f || Wj[4] < 0.0f) {
-    printf("mindt: %g\n", mindt);
-    printf("WL: %g %g %g %g %g\n", pi->primitives.rho, pi->primitives.v[0],
-           pi->primitives.v[1], pi->primitives.v[2], pi->primitives.P);
-#ifdef USE_GRADIENTS
-    printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
-#endif
-    printf("gradWL[0]: %g %g %g\n", pi->primitives.gradients.rho[0],
-           pi->primitives.gradients.rho[1], pi->primitives.gradients.rho[2]);
-    printf("gradWL[1]: %g %g %g\n", pi->primitives.gradients.v[0][0],
-           pi->primitives.gradients.v[0][1], pi->primitives.gradients.v[0][2]);
-    printf("gradWL[2]: %g %g %g\n", pi->primitives.gradients.v[1][0],
-           pi->primitives.gradients.v[1][1], pi->primitives.gradients.v[1][2]);
-    printf("gradWL[3]: %g %g %g\n", pi->primitives.gradients.v[2][0],
-           pi->primitives.gradients.v[2][1], pi->primitives.gradients.v[2][2]);
-    printf("gradWL[4]: %g %g %g\n", pi->primitives.gradients.P[0],
-           pi->primitives.gradients.P[1], pi->primitives.gradients.P[2]);
-    printf("WL': %g %g %g %g %g\n", Wi[0], Wi[1], Wi[2], Wi[3], Wi[4]);
-    printf("WR: %g %g %g %g %g\n", pj->primitives.rho, pj->primitives.v[0],
-           pj->primitives.v[1], pj->primitives.v[2], pj->primitives.P);
-#ifdef USE_GRADIENTS
-    printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
-#endif
-    printf("gradWR[0]: %g %g %g\n", pj->primitives.gradients.rho[0],
-           pj->primitives.gradients.rho[1], pj->primitives.gradients.rho[2]);
-    printf("gradWR[1]: %g %g %g\n", pj->primitives.gradients.v[0][0],
-           pj->primitives.gradients.v[0][1], pj->primitives.gradients.v[0][2]);
-    printf("gradWR[2]: %g %g %g\n", pj->primitives.gradients.v[1][0],
-           pj->primitives.gradients.v[1][1], pj->primitives.gradients.v[1][2]);
-    printf("gradWR[3]: %g %g %g\n", pj->primitives.gradients.v[2][0],
-           pj->primitives.gradients.v[2][1], pj->primitives.gradients.v[2][2]);
-    printf("gradWR[4]: %g %g %g\n", pj->primitives.gradients.P[0],
-           pj->primitives.gradients.P[1], pj->primitives.gradients.P[2]);
-    printf("WR': %g %g %g %g %g\n", Wj[0], Wj[1], Wj[2], Wj[3], Wj[4]);
-    error("Negative density or pressure!\n");
-  }
-
   float totflux[5];
   riemann_solve_for_flux(Wi, Wj, n_unit, vij, totflux);
 
diff --git a/src/hydro/Gizmo/hydro_io.h b/src/hydro/Gizmo/hydro_io.h
index 236106a1fb04cc2e5b84f997a2389d583ce17cff..3d58be2f47c4e1904aaac5f69d1862f1d453e488 100644
--- a/src/hydro/Gizmo/hydro_io.h
+++ b/src/hydro/Gizmo/hydro_io.h
@@ -127,7 +127,7 @@ float convert_Etot(struct engine* e, struct part* p) {
 void hydro_write_particles(struct part* parts, struct io_props* list,
                            int* num_fields) {
 
-  *num_fields = 14;
+  *num_fields = 11;
 
   /* List what we want to write */
   list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
@@ -143,22 +143,16 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
                                               parts, primitives.P, convert_u);
   list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1,
                                  UNIT_CONV_NO_UNITS, parts, id);
-  list[6] = io_make_output_field("Acceleration", FLOAT, 3,
-                                 UNIT_CONV_ACCELERATION, parts, a_hydro);
-  list[7] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts,
+  list[6] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts,
                                  primitives.rho);
-  list[8] = io_make_output_field("Volume", FLOAT, 1, UNIT_CONV_VOLUME, parts,
-                                 geometry.volume);
-  list[9] = io_make_output_field("GradDensity", FLOAT, 3, UNIT_CONV_DENSITY,
-                                 parts, primitives.gradients.rho);
-  list[10] = io_make_output_field_convert_part(
+  list[7] = io_make_output_field_convert_part(
       "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A);
-  list[11] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE,
-                                  parts, primitives.P);
-  list[12] =
+  list[8] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE,
+                                 parts, primitives.P);
+  list[9] =
       io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY,
                                         parts, conserved.energy, convert_Etot);
-  list[13] = io_make_output_field("GravAcceleration", FLOAT, 3,
+  list[10] = io_make_output_field("GravAcceleration", FLOAT, 3,
                                   UNIT_CONV_ACCELERATION, parts, gravity.old_a);
 }
 
diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h
index d552a3f7e86031311098293845f1aa11270c417f..6c96004847ae23b46ec3f5182f742e0e84f1118d 100644
--- a/src/hydro/Gizmo/hydro_part.h
+++ b/src/hydro/Gizmo/hydro_part.h
@@ -148,6 +148,9 @@ struct part {
     /* Total surface area of the particle. */
     float Atot;
 
+    /* Centroid of the "cell". */
+    float centroid[3];
+
   } geometry;
 
   /* Variables used for timestep calculation (currently not used). */
@@ -201,6 +204,8 @@ struct part {
     /* Previous value of the gravitational acceleration. */
     float old_a[3];
 
+    float grad_a[3][3];
+
     /* Previous value of the mass flux vector. */
     float old_mflux[3];
 
diff --git a/src/hydro/Gizmo/hydro_slope_limiters_face.h b/src/hydro/Gizmo/hydro_slope_limiters_face.h
index 7ae5dd2eb073d9aae8ab6f2efffdf8df15b4bb4a..ba96063d661a93a4efc4069ff7e7269a4ac58c3b 100644
--- a/src/hydro/Gizmo/hydro_slope_limiters_face.h
+++ b/src/hydro/Gizmo/hydro_slope_limiters_face.h
@@ -53,14 +53,22 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0,
   if ((phimax + delta1) * phimax > 0.0f) {
     phiplus = phimax + delta1;
   } else {
-    phiplus = phimax / (1.0f + delta1 / fabs(phimax));
+    if (phimax != 0.) {
+      phiplus = phimax / (1.0f + delta1 / fabs(phimax));
+    } else {
+      phiplus = 0.;
+    }
   }
 
   /* if sign(phimin-delta1) == sign(phimin) */
   if ((phimin - delta1) * phimin > 0.0f) {
     phiminus = phimin - delta1;
   } else {
-    phiminus = phimin / (1.0f + delta1 / fabs(phimin));
+    if (phimin != 0.) {
+      phiminus = phimin / (1.0f + delta1 / fabs(phimin));
+    } else {
+      phiminus = 0.;
+    }
   }
 
   if (phi_i < phi_j) {
diff --git a/src/hydro/Gizmo/hydro_unphysical.h b/src/hydro/Gizmo/hydro_unphysical.h
new file mode 100644
index 0000000000000000000000000000000000000000..517e3e0918ad340580e270477c0a166590546850
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_unphysical.h
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_HYDRO_UNPHYSICAL_H
+#define SWIFT_HYDRO_UNPHYSICAL_H
+
+#if defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE)
+
+#if defined(GIZMO_UNPHYSICAL_ERROR)
+
+/*! @brief Crash whenever an unphysical value is detected. */
+#define gizmo_unphysical_message(name, quantity) \
+  error("Unphysical " name " detected (%g)!", quantity);
+
+#elif defined(GIZMO_UNPHYSICAL_WARNING)
+
+/*! @brief Show a warning whenever an unphysical value is detected. */
+#define gizmo_unphysical_message(name, quantity) \
+  message("Unphysical " name " detected (%g), reset to 0!", quantity);
+
+#else
+
+/*! @brief Don't tell anyone an unphysical value was detected. */
+#define gizmo_unphysical_message(name, quantity)
+
+#endif
+
+#define gizmo_check_physical_quantity(name, quantity) \
+  if (quantity < 0.) {                                \
+    gizmo_unphysical_message(name, quantity);         \
+    quantity = 0.;                                    \
+  }
+
+#else  // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE)
+
+#define gizmo_check_physical_quantity(name, quantity)
+
+#endif  // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE)
+
+#endif  // SWIFT_HYDRO_UNPHYSICAL_H
diff --git a/src/hydro/Gizmo/hydro_velocities.h b/src/hydro/Gizmo/hydro_velocities.h
new file mode 100644
index 0000000000000000000000000000000000000000..08ba1f972b2f7a7b8a01ac4750c50a36f69784d0
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_velocities.h
@@ -0,0 +1,162 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_HYDRO_VELOCITIES_H
+#define SWIFT_HYDRO_VELOCITIES_H
+
+/**
+ * @brief Initialize the GIZMO particle velocities before the start of the
+ * actual run based on the initial value of the primitive velocity.
+ *
+ * @param p The particle to act upon.
+ * @param xp The extended particle data to act upon.
+ */
+__attribute__((always_inline)) INLINE static void hydro_velocities_init(
+    struct part* restrict p, struct xpart* restrict xp) {
+
+#ifdef GIZMO_FIX_PARTICLES
+  p->v[0] = 0.;
+  p->v[1] = 0.;
+  p->v[2] = 0.;
+#else
+  p->v[0] = p->primitives.v[0];
+  p->v[1] = p->primitives.v[1];
+  p->v[2] = p->primitives.v[2];
+#endif
+
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+}
+
+/**
+ * @brief Set the particle velocity field that will be used to deboost fluid
+ * velocities during the force loop.
+ *
+ * @param p The particle to act upon.
+ * @param xp The extended particel data to act upon.
+ */
+__attribute__((always_inline)) INLINE static void
+hydro_velocities_prepare_force(struct part* restrict p,
+                               const struct xpart* restrict xp) {
+
+#ifndef GIZMO_FIX_PARTICLES
+  p->force.v_full[0] = xp->v_full[0];
+  p->force.v_full[1] = xp->v_full[1];
+  p->force.v_full[2] = xp->v_full[2];
+#endif
+}
+
+/**
+ * @brief Set the variables that will be used to update the smoothing length
+ * during the drift (these will depend on the movement of the particles).
+ *
+ * @param p The particle to act upon.
+ */
+__attribute__((always_inline)) INLINE static void hydro_velocities_end_force(
+    struct part* restrict p) {
+
+#ifdef GIZMO_FIX_PARTICLES
+  /* disable the smoothing length update, since the smoothing lengths should
+     stay the same for all steps (particles don't move) */
+  p->force.h_dt = 0.0f;
+#else
+  /* Add normalization to h_dt. */
+  p->force.h_dt *= p->h * hydro_dimension_inv;
+#endif
+}
+
+/**
+ * @brief Set the velocity of a GIZMO particle, based on the values of its
+ * primitive variables and the geometry of its mesh-free "cell".
+ *
+ * @param p The particle to act upon.
+ * @param xp The extended particle data to act upon.
+ */
+__attribute__((always_inline)) INLINE static void hydro_velocities_set(
+    struct part* restrict p, struct xpart* restrict xp) {
+
+/* We first set the particle velocity. */
+#ifdef GIZMO_FIX_PARTICLES
+
+  p->v[0] = 0.;
+  p->v[1] = 0.;
+  p->v[2] = 0.;
+
+#else  // GIZMO_FIX_PARTICLES
+
+  if (p->conserved.mass > 0. && p->primitives.rho > 0.) {
+    /* Normal case: set particle velocity to fluid velocity. */
+    p->v[0] = p->conserved.momentum[0] / p->conserved.mass;
+    p->v[1] = p->conserved.momentum[1] / p->conserved.mass;
+    p->v[2] = p->conserved.momentum[2] / p->conserved.mass;
+
+#ifdef GIZMO_STEER_MOTION
+
+    /* Add a correction to the velocity to keep particle positions close enough
+       to
+       the centroid of their mesh-free "cell". */
+    /* The correction term below is the same one described in Springel (2010).
+     */
+    float ds[3];
+    ds[0] = p->geometry.centroid[0];
+    ds[1] = p->geometry.centroid[1];
+    ds[2] = p->geometry.centroid[2];
+    const float d = sqrtf(ds[0] * ds[0] + ds[1] * ds[1] + ds[2] * ds[2]);
+    const float R = get_radius_dimension_sphere(p->geometry.volume);
+    const float eta = 0.25;
+    const float etaR = eta * R;
+    const float xi = 1.;
+    const float soundspeed =
+        sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho);
+    /* We only apply the correction if the offset between centroid and position
+       is
+       too large. */
+    if (d > 0.9 * etaR) {
+      float fac = xi * soundspeed / d;
+      if (d < 1.1 * etaR) {
+        fac *= 5. * (d - 0.9 * etaR) / etaR;
+      }
+      p->v[0] -= ds[0] * fac;
+      p->v[1] -= ds[1] * fac;
+      p->v[2] -= ds[2] * fac;
+    }
+
+#endif  // GIZMO_STEER_MOTION
+  } else {
+    /* Vacuum particles have no fluid velocity. */
+    p->v[0] = 0.;
+    p->v[1] = 0.;
+    p->v[2] = 0.;
+  }
+
+#endif  // GIZMO_FIX_PARTICLES
+
+  /* Now make sure all velocity variables are up to date. */
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+
+  if (p->gpart) {
+    p->gpart->v_full[0] = p->v[0];
+    p->gpart->v_full[1] = p->v[1];
+    p->gpart->v_full[2] = p->v[2];
+  }
+}
+
+#endif  // SWIFT_HYDRO_VELOCITIES_H
diff --git a/src/minmax.h b/src/minmax.h
index a53093663c79cf4280d136747663552e49c7f1b2..9d92cd71d849dba615fdb05bc342014e0593d989 100644
--- a/src/minmax.h
+++ b/src/minmax.h
@@ -43,18 +43,4 @@
     _a > _b ? _a : _b;            \
   })
 
-/**
- * @brief Limits the value of x to be between a and b
- *
- * Only wraps once. If x > 2b, the returned value will be larger than b.
- * Similarly for x < -b.
- */
-#define box_wrap(x, a, b)                               \
-  ({                                                    \
-    const __typeof__(x) _x = (x);                       \
-    const __typeof__(a) _a = (a);                       \
-    const __typeof__(b) _b = (b);                       \
-    _x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \
-  })
-
 #endif /* SWIFT_MINMAX_H */
diff --git a/src/multipole.h b/src/multipole.h
index b9d49dcf0fc3b605849f7b058aef14843b73517d..23f5194a30b7316aac15073cba36dc404efa21c1 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -28,7 +28,6 @@
 #include <string.h>
 
 /* Includes. */
-//#include "active.h"
 #include "align.h"
 #include "const.h"
 #include "error.h"
@@ -37,8 +36,8 @@
 #include "gravity_softened_derivatives.h"
 #include "inline.h"
 #include "kernel_gravity.h"
-#include "minmax.h"
 #include "part.h"
+#include "periodic.h"
 #include "vector_power.h"
 
 #define multipole_align 128
diff --git a/src/partition.c b/src/partition.c
index 499efab263a9031b0116f073af8cebd5fef0c2eb..c57918745c11d2858b40eefc218e2551e635d6fb 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -524,7 +524,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
         t->type != task_type_sub_self && t->type != task_type_sub_self &&
         t->type != task_type_ghost && t->type != task_type_kick1 &&
         t->type != task_type_kick2 && t->type != task_type_timestep &&
-        t->type != task_type_drift)
+        t->type != task_type_drift_part && t->type != task_type_drift_gpart)
       continue;
 
     /* Get the task weight. */
@@ -557,7 +557,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
     /* Different weights for different tasks. */
     if (t->type == task_type_ghost || t->type == task_type_kick1 ||
         t->type == task_type_kick2 || t->type == task_type_timestep ||
-        t->type == task_type_drift) {
+        t->type == task_type_drift_part || t->type == task_type_drift_gpart) {
       /* Particle updates add only to vertex weight. */
       if (taskvweights) weights_v[cid] += w;
 
diff --git a/src/periodic.h b/src/periodic.h
new file mode 100644
index 0000000000000000000000000000000000000000..5874b8742e89c5c93727111adb5b289cff4cb6a6
--- /dev/null
+++ b/src/periodic.h
@@ -0,0 +1,75 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016   Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_PERIODIC_H
+#define SWIFT_PERIODIC_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Includes. */
+#include "inline.h"
+
+/**
+ * @brief Limits the value of x to be between a and b
+ *
+ * Only wraps once. If x > 2b, the returned value will be larger than b.
+ * Similarly for x < -b.
+ */
+#define box_wrap(x, a, b)                               \
+  ({                                                    \
+    const __typeof__(x) _x = (x);                       \
+    const __typeof__(a) _a = (a);                       \
+    const __typeof__(b) _b = (b);                       \
+    _x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \
+  })
+
+/**
+ * @brief Find the smallest distance dx along one axis within a box of size
+ * box_size
+ *
+ * This macro evaluates its arguments exactly once.
+ *
+ * Only wraps once. If dx > 2b, the returned value will be larger than b.
+ * Similarly for dx < -b.
+ *
+ */
+__attribute__((always_inline)) INLINE static double nearest(double dx,
+                                                            double box_size) {
+  return dx > 0.5 * box_size ? (dx - box_size)
+                             : ((dx < -0.5 * box_size) ? (dx + box_size) : dx);
+}
+
+/**
+ * @brief Find the smallest distance dx along one axis within a box of size
+ * box_size
+ *
+ * This macro evaluates its arguments exactly once.
+ *
+ * Only wraps once. If dx > 2b, the returned value will be larger than b.
+ * Similarly for dx < -b.
+ *
+ */
+__attribute__((always_inline)) INLINE static float nearestf(float dx,
+                                                            float box_size) {
+  return dx > 0.5f * box_size
+             ? (dx - box_size)
+             : ((dx < -0.5f * box_size) ? (dx + box_size) : dx);
+}
+
+#endif /* SWIFT_PERIODIC_H */
diff --git a/src/potential/sine_wave/potential.h b/src/potential/sine_wave/potential.h
index e2e2b8ffcc170c28a5facc8373a81746811a9991..1a4ee8aae8238c5db4c99eacb9e96bd967bcc7c4 100644
--- a/src/potential/sine_wave/potential.h
+++ b/src/potential/sine_wave/potential.h
@@ -43,6 +43,9 @@ struct external_potential {
   /*! Amplitude of the sine wave. */
   double amplitude;
 
+  /*! Growth time of the potential. */
+  double growth_time;
+
   /*! Time-step limiting factor. */
   double timestep_limit;
 };
@@ -76,7 +79,13 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
     double time, const struct external_potential* restrict potential,
     const struct phys_const* restrict phys_const, struct gpart* restrict g) {
 
-  g->a_grav[0] = potential->amplitude * sin(2. * M_PI * g->x[0]) /
+  float Acorr = 1.;
+
+  if (time < potential->growth_time) {
+    Acorr = time / potential->growth_time;
+  }
+
+  g->a_grav[0] = potential->amplitude * Acorr * sin(2. * M_PI * g->x[0]) /
                  phys_const->const_newton_G;
 }
 
@@ -114,6 +123,8 @@ static INLINE void potential_init_backend(
 
   potential->amplitude =
       parser_get_param_double(parameter_file, "SineWavePotential:amplitude");
+  potential->growth_time = parser_get_opt_param_double(
+      parameter_file, "SineWavePotential:growth_time", 0.);
   potential->timestep_limit = parser_get_param_double(
       parameter_file, "SineWavePotential:timestep_limit");
 }
diff --git a/src/riemann.h b/src/riemann.h
index 685d40708e598249151f6cbe13be016edea79553..ab6d162514326778e8d6478e07c9bae2947a7c2a 100644
--- a/src/riemann.h
+++ b/src/riemann.h
@@ -25,10 +25,8 @@
 #if defined(RIEMANN_SOLVER_EXACT)
 
 #define RIEMANN_SOLVER_IMPLEMENTATION "Exact Riemann solver (Toro 2009)"
-#if defined(EOS_IDEAL_GAS)
+#if defined(EOS_IDEAL_GAS) || defined(EOS_ISOTHERMAL_GAS)
 #include "riemann/riemann_exact.h"
-#elif defined(EOS_ISOTHERMAL_GAS)
-#include "riemann/riemann_exact_isothermal.h"
 #else
 #error "The Exact Riemann solver is incompatible with this equation of state!"
 #endif
diff --git a/src/runner.c b/src/runner.c
index 8131062cb297157ee21f015edd4fb566a16ff91a..208b59114263ff9300417962fa43c32e0ef0512c 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -53,6 +53,7 @@
 #include "hydro_properties.h"
 #include "kick.h"
 #include "minmax.h"
+#include "runner_doiact_fft.h"
 #include "runner_doiact_vec.h"
 #include "scheduler.h"
 #include "sort_part.h"
@@ -333,7 +334,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup,
   TIMER_TIC;
 
   /* Check that the particles have been moved to the current time */
-  if (!cell_is_drifted(c, r->e)) error("Sorting un-drifted cell");
+  if (!cell_are_part_drifted(c, r->e)) error("Sorting un-drifted cell");
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Make sure the sort flags are consistent (downward). */
@@ -842,19 +843,35 @@ void runner_do_unskip_mapper(void *map_data, int num_elements,
   }
 }
 /**
- * @brief Drift particles in real space.
+ * @brief Drift all part in a cell.
  *
  * @param r The runner thread.
  * @param c The cell.
  * @param timer Are we timing this ?
  */
-void runner_do_drift_particles(struct runner *r, struct cell *c, int timer) {
+void runner_do_drift_part(struct runner *r, struct cell *c, int timer) {
 
   TIMER_TIC;
 
-  cell_drift_particles(c, r->e);
+  cell_drift_part(c, r->e);
 
-  if (timer) TIMER_TOC(timer_drift);
+  if (timer) TIMER_TOC(timer_drift_part);
+}
+
+/**
+ * @brief Drift all gpart in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) {
+
+  TIMER_TIC;
+
+  cell_drift_gpart(c, r->e);
+
+  if (timer) TIMER_TOC(timer_drift_gpart);
 }
 
 /**
@@ -1525,7 +1542,7 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
   /* ... and store. */
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
-  c->ti_old = ti_current;
+  c->ti_old_part = ti_current;
   c->h_max = h_max;
 
   if (timer) TIMER_TOC(timer_dorecv_part);
@@ -1599,7 +1616,7 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
   /* ... and store. */
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
-  c->ti_old = ti_current;
+  c->ti_old_gpart = ti_current;
 
   if (timer) TIMER_TOC(timer_dorecv_gpart);
 
@@ -1672,7 +1689,7 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) {
   /* ... and store. */
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
-  c->ti_old = ti_current;
+  c->ti_old_gpart = ti_current;
 
   if (timer) TIMER_TOC(timer_dorecv_spart);
 
@@ -1730,15 +1747,18 @@ void *runner_main(void *data) {
 #ifdef SWIFT_DEBUG_CHECKS
       t->ti_run = e->ti_current;
 #ifndef WITH_MPI
-      if (ci == NULL && cj == NULL) {
+      if (t->type == task_type_grav_top_level) {
+        if (ci != NULL || cj != NULL)
+          error("Top-level gravity task associated with a cell");
+      } else if (ci == NULL && cj == NULL) {
 
         error("Task not associated with cells!");
-
       } else if (cj == NULL) { /* self */
 
         if (!cell_is_active(ci, e) && t->type != task_type_sort &&
             t->type != task_type_send && t->type != task_type_recv &&
-            t->type != task_type_kick1 && t->type != task_type_drift)
+            t->type != task_type_kick1 && t->type != task_type_drift_part &&
+            t->type != task_type_drift_gpart)
           error(
               "Task (type='%s/%s') should have been skipped ti_current=%lld "
               "c->ti_end_min=%lld",
@@ -1868,8 +1888,11 @@ void *runner_main(void *data) {
           runner_do_extra_ghost(r, ci, 1);
           break;
 #endif
-        case task_type_drift:
-          runner_do_drift_particles(r, ci, 1);
+        case task_type_drift_part:
+          runner_do_drift_part(r, ci, 1);
+          break;
+        case task_type_drift_gpart:
+          runner_do_drift_gpart(r, ci, 1);
           break;
         case task_type_kick1:
           runner_do_kick1(r, ci, 1);
@@ -1896,6 +1919,8 @@ void *runner_main(void *data) {
             runner_do_recv_part(r, ci, 1, 1);
           } else if (t->subtype == task_subtype_rho) {
             runner_do_recv_part(r, ci, 1, 1);
+          } else if (t->subtype == task_subtype_gradient) {
+            runner_do_recv_part(r, ci, 1, 1);
           } else if (t->subtype == task_subtype_gpart) {
             runner_do_recv_gpart(r, ci, 1);
           } else if (t->subtype == task_subtype_spart) {
@@ -1907,14 +1932,11 @@ void *runner_main(void *data) {
           }
           break;
 #endif
-        case task_type_grav_mm:
-          // runner_do_grav_mm(r, t->ci, 1);
-          break;
         case task_type_grav_down:
           runner_do_grav_down(r, t->ci, 1);
           break;
         case task_type_grav_top_level:
-          // runner_do_grav_top_level(r);
+          runner_do_grav_fft(r, 1);
           break;
         case task_type_grav_long_range:
           runner_do_grav_long_range(r, t->ci, 1);
diff --git a/src/runner.h b/src/runner.h
index 32b8a16fe0ec67772089fc7283833d9814f5525f..facadf1608fb7e06af952eedbf1151fa68530bef 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -63,13 +63,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer);
 void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer);
 void runner_do_sort(struct runner *r, struct cell *c, int flag, int cleanup,
                     int clock);
-void runner_do_drift_particles(struct runner *r, struct cell *c, int timer);
+void runner_do_drift_part(struct runner *r, struct cell *c, int timer);
+void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer);
 void runner_do_kick1(struct runner *r, struct cell *c, int timer);
 void runner_do_kick2(struct runner *r, struct cell *c, int timer);
 void runner_do_end_force(struct runner *r, struct cell *c, int timer);
 void runner_do_init(struct runner *r, struct cell *c, int timer);
 void runner_do_cooling(struct runner *r, struct cell *c, int timer);
 void runner_do_grav_external(struct runner *r, struct cell *c, int timer);
+void runner_do_grav_fft(struct runner *r, int timer);
 void *runner_main(void *data);
 void runner_do_unskip_mapper(void *map_data, int num_elements,
                              void *extra_data);
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 8e46b0aa33c9e3537ab7a94c3042f54b3e6ea24d..b96c25cf8d71875ad588e46db4c5fb7b53fa7753 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -899,7 +899,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
   /* Anything to do here? */
   if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
 
-  if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
+  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
     error("Interacting undrifted cells.");
 
   /* Get the sort ID. */
@@ -1145,7 +1145,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   /* Anything to do here? */
   if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
 
-  if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
+  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
     error("Interacting undrifted cells.");
 
   /* Get the shift ID. */
@@ -1597,7 +1597,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
   if (!cell_is_active(c, e)) return;
 
-  if (!cell_is_drifted(c, e)) error("Interacting undrifted cell.");
+  if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
 
   struct part *restrict parts = c->parts;
   const int count = c->count;
@@ -1846,7 +1846,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
   if (!cell_is_active(c, e)) return;
 
-  if (!cell_is_drifted(c, e)) error("Cell is not drifted");
+  if (!cell_are_part_drifted(c, e)) error("Cell is not drifted");
 
   struct part *restrict parts = c->parts;
   const int count = c->count;
@@ -2276,7 +2276,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
 
     /* Make sure both cells are drifted to the current timestep. */
-    if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
+    if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
       error("Interacting undrifted cells.");
 
     /* Do any of the cells need to be sorted first? */
@@ -2330,7 +2330,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
   else {
 
     /* Drift the cell to the current timestep if needed. */
-    if (!cell_is_drifted(ci, r->e)) error("Interacting undrifted cell.");
+    if (!cell_are_part_drifted(ci, r->e)) error("Interacting undrifted cell.");
 
 #if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \
     defined(GADGET2_SPH)
@@ -2581,7 +2581,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
 
     /* Make sure both cells are drifted to the current timestep. */
-    if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
+    if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
       error("Interacting undrifted cells.");
 
     /* Do any of the cells need to be sorted first? */
@@ -3198,7 +3198,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
     else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
 
       /* Do any of the cells need to be drifted first? */
-      if (!cell_is_drifted(cj, e)) error("Cell should be drifted!");
+      if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
 
       DOPAIR_SUBSET(r, ci, parts, ind, count, cj);
     }
diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c
index 076ec2578361127266c637cc5ac224609b702c66..a3e3f38fba920c0c58d600bb25feda88d4a3cf84 100644
--- a/src/runner_doiact_fft.c
+++ b/src/runner_doiact_fft.c
@@ -31,6 +31,287 @@
 #include "runner_doiact_fft.h"
 
 /* Local includes. */
+#include "engine.h"
+#include "error.h"
 #include "runner.h"
+#include "space.h"
+#include "timers.h"
 
-void runner_do_grav_fft(struct runner *r) {}
+#ifdef HAVE_FFTW
+
+/**
+ * @brief Returns 1D index of a 3D NxNxN array using row-major style.
+ *
+ * @param i Index along x.
+ * @param j Index along y.
+ * @param k Index along z.
+ * @param N Size of the array along one axis.
+ */
+__attribute__((always_inline)) INLINE static int row_major_id(int i, int j,
+                                                              int k, int N) {
+  return ((i % N) * N * N + (j % N) * N + (k % N));
+}
+
+/**
+ * @brief Assigns a given multipole to a density mesh using the CIC method.
+ *
+ * @param m The #multipole.
+ * @param rho The density mesh.
+ * @param N the size of the mesh along one axis.
+ * @param fac The width of a mesh cell.
+ */
+__attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC(
+    const struct gravity_tensors* m, double* rho, int N, double fac) {
+
+  int i = (int)(fac * m->CoM[0]);
+  if (i >= N) i = N - 1;
+  const double dx = fac * m->CoM[0] - i;
+  const double tx = 1. - dx;
+
+  int j = (int)(fac * m->CoM[1]);
+  if (j >= N) j = N - 1;
+  const double dy = fac * m->CoM[1] - j;
+  const double ty = 1. - dy;
+
+  int k = (int)(fac * m->CoM[2]);
+  if (k >= N) k = N - 1;
+  const double dz = fac * m->CoM[2] - k;
+  const double tz = 1. - dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (i < 0 || i >= N) error("Invalid multipole position in x");
+  if (j < 0 || j >= N) error("Invalid multipole position in y");
+  if (k < 0 || k >= N) error("Invalid multipole position in z");
+#endif
+
+  /* CIC ! */
+  rho[row_major_id(i + 0, j + 0, k + 0, N)] += m->m_pole.M_000 * tx * ty * tz;
+  rho[row_major_id(i + 0, j + 0, k + 1, N)] += m->m_pole.M_000 * tx * ty * dz;
+  rho[row_major_id(i + 0, j + 1, k + 0, N)] += m->m_pole.M_000 * tx * dy * tz;
+  rho[row_major_id(i + 0, j + 1, k + 1, N)] += m->m_pole.M_000 * tx * dy * dz;
+  rho[row_major_id(i + 1, j + 0, k + 0, N)] += m->m_pole.M_000 * dx * ty * tz;
+  rho[row_major_id(i + 1, j + 0, k + 1, N)] += m->m_pole.M_000 * dx * ty * dz;
+  rho[row_major_id(i + 1, j + 1, k + 0, N)] += m->m_pole.M_000 * dx * dy * tz;
+  rho[row_major_id(i + 1, j + 1, k + 1, N)] += m->m_pole.M_000 * dx * dy * dz;
+}
+
+/**
+ * @brief Computes the potential on a multipole from a given mesh using the CIC
+ * method.
+ *
+ * @param m The #multipole.
+ * @param pot The potential mesh.
+ * @param N the size of the mesh along one axis.
+ * @param fac width of a mesh cell.
+ */
+__attribute__((always_inline)) INLINE static void mesh_to_multipole_CIC(
+    struct gravity_tensors* m, double* pot, int N, double fac) {
+
+  int i = (int)(fac * m->CoM[0]);
+  if (i >= N) i = N - 1;
+  const double dx = fac * m->CoM[0] - i;
+  const double tx = 1. - dx;
+
+  int j = (int)(fac * m->CoM[1]);
+  if (j >= N) j = N - 1;
+  const double dy = fac * m->CoM[1] - j;
+  const double ty = 1. - dy;
+
+  int k = (int)(fac * m->CoM[2]);
+  if (k >= N) k = N - 1;
+  const double dz = fac * m->CoM[2] - k;
+  const double tz = 1. - dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (i < 0 || i >= N) error("Invalid multipole position in x");
+  if (j < 0 || j >= N) error("Invalid multipole position in y");
+  if (k < 0 || k >= N) error("Invalid multipole position in z");
+#endif
+
+  /* CIC ! */
+  m->pot.F_000 += pot[row_major_id(i + 0, j + 0, k + 0, N)] * tx * ty * tz;
+  m->pot.F_000 += pot[row_major_id(i + 0, j + 0, k + 1, N)] * tx * ty * dz;
+  m->pot.F_000 += pot[row_major_id(i + 0, j + 1, k + 0, N)] * tx * dy * tz;
+  m->pot.F_000 += pot[row_major_id(i + 0, j + 1, k + 1, N)] * tx * dy * dz;
+  m->pot.F_000 += pot[row_major_id(i + 1, j + 0, k + 0, N)] * dx * ty * tz;
+  m->pot.F_000 += pot[row_major_id(i + 1, j + 0, k + 1, N)] * dx * ty * dz;
+  m->pot.F_000 += pot[row_major_id(i + 1, j + 1, k + 0, N)] * dx * dy * tz;
+  m->pot.F_000 += pot[row_major_id(i + 1, j + 1, k + 1, N)] * dx * dy * dz;
+}
+
+#endif
+
+/**
+ * @brief Computes the potential on the top multipoles using a Fourier transform
+ *
+ * @param r The #runner task
+ * @param timer Are we timing this ?
+ */
+void runner_do_grav_fft(struct runner* r, int timer) {
+
+#ifdef HAVE_FFTW
+
+  const struct engine* e = r->e;
+  const struct space* s = e->s;
+  const integertime_t ti_current = e->ti_current;
+  const double a_smooth = e->gravity_properties->a_smooth;
+  const double box_size = s->dim[0];
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+
+  TIMER_TIC;
+
+  if (cdim[0] != cdim[1] || cdim[0] != cdim[2]) error("Non-square mesh");
+
+  /* Some useful constants */
+  const int N = cdim[0];
+  const int N_half = N / 2;
+  const double cell_fac = N / box_size;
+
+  /* Recover the list of top-level multipoles */
+  const int nr_cells = s->nr_cells;
+  struct gravity_tensors* restrict multipoles = s->multipoles_top;
+  struct cell* cells = s->cells_top;
+
+  /* Make sure everything has been drifted to the current point */
+  for (int i = 0; i < nr_cells; ++i)
+    if (cells[i].ti_old_multipole != ti_current)
+      cell_drift_multipole(&cells[i], e);
+  // error("Top-level multipole %d not drifted", i);
+
+  /* Allocates some memory for the density mesh */
+  double* restrict rho = fftw_alloc_real(N * N * N);
+  if (rho == NULL) error("Error allocating memory for density mesh");
+
+  /* Allocates some memory for the mesh in Fourier space */
+  fftw_complex* restrict frho = fftw_alloc_complex(N * N * (N_half + 1));
+  if (frho == NULL)
+    error("Error allocating memory for transform of density mesh");
+
+  /* Prepare the FFT library */
+  fftw_plan forward_plan = fftw_plan_dft_r2c_3d(
+      N, N, N, rho, frho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
+  fftw_plan inverse_plan = fftw_plan_dft_c2r_3d(
+      N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
+
+  /* Do a CIC mesh assignment of the multipoles */
+  bzero(rho, N * N * N * sizeof(double));
+  for (int i = 0; i < nr_cells; ++i)
+    multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac);
+
+  /* Fourier transform to go to magic-land */
+  fftw_execute(forward_plan);
+
+  /* frho now contains the Fourier transform of the density field */
+  /* frho contains NxNx(N/2+1) complex numbers */
+
+  /* Some common factors */
+  const double green_fac = -1. / (M_PI * box_size);
+  const double a_smooth2 = 4. * M_PI * a_smooth * a_smooth / ((double)(N * N));
+  const double k_fac = M_PI / (double)N;
+
+  /* Now de-convolve the CIC kernel and apply the Green function */
+  for (int i = 0; i < N; ++i) {
+
+    /* kx component of vector in Fourier space and 1/sinc(kx) */
+    const int kx = (i > N_half ? i - N : i);
+    const double kx_d = (double)kx;
+    const double fx = k_fac * kx_d;
+    const double sinc_kx_inv = (kx != 0) ? fx / sin(fx) : 1.;
+
+    for (int j = 0; j < N; ++j) {
+
+      /* ky component of vector in Fourier space and 1/sinc(ky) */
+      const int ky = (j > N_half ? j - N : j);
+      const double ky_d = (double)ky;
+      const double fy = k_fac * ky_d;
+      const double sinc_ky_inv = (ky != 0) ? fy / sin(fy) : 1.;
+
+      for (int k = 0; k < N_half + 1; ++k) {
+
+        /* kz component of vector in Fourier space and 1/sinc(kz) */
+        const int kz = (k > N_half ? k - N : k);
+        const double kz_d = (double)kz;
+        const double fz = k_fac * kz_d;
+        const double sinc_kz_inv = (kz != 0) ? fz / sin(fz) : 1.;
+
+        /* Norm of vector in Fourier space */
+        const double k2 = (kx_d * kx_d + ky_d * ky_d + kz_d * kz_d);
+
+        /* Avoid FPEs... */
+        if (k2 == 0.) continue;
+
+        /* Green function */
+        const double green_cor = green_fac * exp(-k2 * a_smooth2) / k2;
+
+        /* Deconvolution of CIC */
+        const double CIC_cor = sinc_kx_inv * sinc_ky_inv * sinc_kz_inv;
+        const double CIC_cor2 = CIC_cor * CIC_cor;
+        const double CIC_cor4 = CIC_cor2 * CIC_cor2;
+
+        /* Combined correction */
+        const double total_cor = green_cor * CIC_cor4;
+
+        /* Apply to the mesh */
+        const int index = N * (N_half + 1) * i + (N_half + 1) * j + k;
+        frho[index][0] *= total_cor;
+        frho[index][1] *= total_cor;
+      }
+    }
+  }
+
+  /* Correct singularity at (0,0,0) */
+  frho[0][0] = 0.;
+  frho[0][1] = 0.;
+
+  /* Fourier transform to come back from magic-land */
+  fftw_execute(inverse_plan);
+
+  /* rho now contains the potential */
+  /* This array is now again NxNxN real numbers */
+
+  /* Get the potential from the mesh using CIC */
+  for (int i = 0; i < nr_cells; ++i)
+    mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac);
+
+  /* Clean-up the mess */
+  fftw_destroy_plan(forward_plan);
+  fftw_destroy_plan(inverse_plan);
+  fftw_free(rho);
+  fftw_free(frho);
+
+  /* Time the whole thing */
+  if (timer) TIMER_TOC(timer_dograv_top_level);
+
+#else
+  error("No FFTW library found. Cannot compute periodic long-range forces.");
+#endif
+}
+
+#ifdef HAVE_FFTW
+void print_array(double* array, int N) {
+
+  for (int k = N - 1; k >= 0; --k) {
+    printf("--- z = %d ---------\n", k);
+    for (int j = N - 1; j >= 0; --j) {
+      for (int i = 0; i < N; ++i) {
+        printf("%f ", array[i * N * N + j * N + k]);
+      }
+      printf("\n");
+    }
+  }
+}
+
+void print_carray(fftw_complex* array, int N) {
+
+  for (int k = N - 1; k >= 0; --k) {
+    printf("--- z = %d ---------\n", k);
+    for (int j = N - 1; j >= 0; --j) {
+      for (int i = 0; i < N; ++i) {
+        printf("(%f %f) ", array[i * N * N + j * N + k][0],
+               array[i * N * N + j * N + k][1]);
+      }
+      printf("\n");
+    }
+  }
+}
+#endif /* HAVE_FFTW */
diff --git a/src/runner_doiact_fft.h b/src/runner_doiact_fft.h
index 263662383fb465dcf945e55494a569289b009ff9..e9836311e71803952969b9c9316e8c81676d2dd8 100644
--- a/src/runner_doiact_fft.h
+++ b/src/runner_doiact_fft.h
@@ -21,6 +21,6 @@
 
 struct runner;
 
-void runner_do_grav_fft(struct runner *r);
+void runner_do_grav_fft(struct runner *r, int timer);
 
 #endif /* SWIFT_RUNNER_DOIACT_FFT_H */
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 13a55344d773e7fba000d680eae9866dffdd88e1..a66cc5e0c9ed241aba3bb1b4329016b8e505e280 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -182,8 +182,8 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
   if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
 
   /* Let's start by drifting things */
-  if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e);
-  if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e);
+  if (!cell_are_gpart_drifted(ci, e)) cell_drift_gpart(ci, e);
+  if (!cell_are_gpart_drifted(cj, e)) cell_drift_gpart(cj, e);
 
 #if ICHECK > 0
   for (int pid = 0; pid < gcount_i; pid++) {
@@ -318,7 +318,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) {
   if (!cell_is_active(c, e)) return;
 
   /* Do we need to start by drifting things ? */
-  if (!cell_is_drifted(c, e)) cell_drift_particles(c, e);
+  if (!cell_are_gpart_drifted(c, e)) cell_drift_gpart(c, e);
 
 #if ICHECK > 0
   for (int pid = 0; pid < gcount; pid++) {
@@ -429,6 +429,11 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
 
   /* Sanity check */
   if (ci == cj) error("Pair interaction between a cell and itself.");
+
+  if (cell_is_active(ci, e) && ci->ti_old_multipole != e->ti_current)
+    error("ci->multipole not drifted.");
+  if (cell_is_active(cj, e) && cj->ti_old_multipole != e->ti_current)
+    error("cj->multipole not drifted.");
 #endif
 
 #if ICHECK > 0
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 6a0ee98a7f46feb8d6754e010034156c04bf4d66..23b66ddcc11dd5cbd52da354b7051af799250e3c 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -381,7 +381,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
 
   if (!cell_is_active(c, e)) return;
 
-  if (!cell_is_drifted(c, e)) error("Interacting undrifted cell.");
+  if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
 
   /* Get the particle cache from the runner and re-allocate
    * the cache if it is not big enough for the cell. */
@@ -604,370 +604,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
 #endif /* WITH_VECTORIZATION */
 }
 
-/**
- * @brief Compute the cell self-interaction (non-symmetric) using vector
- * intrinsics with two particle pis at a time.
- *
- * CURRENTLY BROKEN DO NOT USE.
- *
- * @param r The #runner.
- * @param c The #cell.
- */
-__attribute__((always_inline)) INLINE void runner_doself1_density_vec_2(
-    struct runner *r, struct cell *restrict c) {
-
-#ifdef WITH_VECTORIZATION
-  const struct engine *e = r->e;
-  int doi_mask;
-  int doi2_mask;
-  struct part *restrict pi;
-  struct part *restrict pi2;
-  int count_align;
-
-  vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
-  vector v_hi2, v_vix2, v_viy2, v_viz2, v_hig2_2, v2_r2;
-
-  TIMER_TIC
-
-  if (!cell_is_active(c, e)) return;
-
-  if (!cell_is_drifted(c, e)) cell_drift_particles(c, e);
-
-  /* TODO: Need to find two active particles, not just one. */
-
-  struct part *restrict parts = c->parts;
-  const int count = c->count;
-
-  /* Get the particle cache from the runner and re-allocate
-   * the cache if it is not big enough for the cell. */
-  struct cache *restrict cell_cache = &r->ci_cache;
-
-  if (cell_cache->count < count) {
-    cache_init(cell_cache, count);
-  }
-
-  /* Read the particles from the cell and store them locally in the cache. */
-  cache_read_particles(c, &r->ci_cache);
-
-  /* Create two secondary caches. */
-  int icount = 0, icount_align = 0;
-  struct c2_cache int_cache;
-
-  int icount2 = 0, icount_align2 = 0;
-  struct c2_cache int_cache2;
-
-  /* Loop over the particles in the cell. */
-  for (int pid = 0; pid < count; pid += 2) {
-
-    /* Get a pointer to the ith particle and next i particle. */
-    pi = &parts[pid];
-    pi2 = &parts[pid + 1];
-
-    /* Is the ith particle active? */
-    if (!part_is_active(pi, e)) continue;
-
-    vector pix, piy, piz;
-    vector pix2, piy2, piz2;
-
-    const float hi = cell_cache->h[pid];
-    const float hi2 = cell_cache->h[pid + 1];
-
-    /* Fill pi position vector. */
-    pix.v = vec_set1(cell_cache->x[pid]);
-    piy.v = vec_set1(cell_cache->y[pid]);
-    piz.v = vec_set1(cell_cache->z[pid]);
-    v_hi.v = vec_set1(hi);
-    v_vix.v = vec_set1(cell_cache->vx[pid]);
-    v_viy.v = vec_set1(cell_cache->vy[pid]);
-    v_viz.v = vec_set1(cell_cache->vz[pid]);
-
-    pix2.v = vec_set1(cell_cache->x[pid + 1]);
-    piy2.v = vec_set1(cell_cache->y[pid + 1]);
-    piz2.v = vec_set1(cell_cache->z[pid + 1]);
-    v_hi2.v = vec_set1(hi2);
-    v_vix2.v = vec_set1(cell_cache->vx[pid + 1]);
-    v_viy2.v = vec_set1(cell_cache->vy[pid + 1]);
-    v_viz2.v = vec_set1(cell_cache->vz[pid + 1]);
-
-    const float hig2 = hi * hi * kernel_gamma2;
-    const float hig2_2 = hi2 * hi2 * kernel_gamma2;
-    v_hig2.v = vec_set1(hig2);
-    v_hig2_2.v = vec_set1(hig2_2);
-
-    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
-        curlvySum, curlvzSum;
-    vector rhoSum2, rho_dhSum2, wcountSum2, wcount_dhSum2, div_vSum2,
-        curlvxSum2, curlvySum2, curlvzSum2;
-
-    vector v_hi_inv, v_hi_inv2;
-
-    v_hi_inv = vec_reciprocal(v_hi);
-    v_hi_inv2 = vec_reciprocal(v_hi2);
-
-    rhoSum.v = vec_setzero();
-    rho_dhSum.v = vec_setzero();
-    wcountSum.v = vec_setzero();
-    wcount_dhSum.v = vec_setzero();
-    div_vSum.v = vec_setzero();
-    curlvxSum.v = vec_setzero();
-    curlvySum.v = vec_setzero();
-    curlvzSum.v = vec_setzero();
-
-    rhoSum2.v = vec_setzero();
-    rho_dhSum2.v = vec_setzero();
-    wcountSum2.v = vec_setzero();
-    wcount_dhSum2.v = vec_setzero();
-    div_vSum2.v = vec_setzero();
-    curlvxSum2.v = vec_setzero();
-    curlvySum2.v = vec_setzero();
-    curlvzSum2.v = vec_setzero();
-
-    /* Pad cache if there is a serial remainder. */
-    count_align = count;
-    int rem = count % (NUM_VEC_PROC * VEC_SIZE);
-    if (rem != 0) {
-      int pad = (NUM_VEC_PROC * VEC_SIZE) - rem;
-
-      count_align += pad;
-      /* Set positions to the same as particle pi so when the r2 > 0 mask is
-       * applied these extra contributions are masked out.*/
-      for (int i = count; i < count_align; i++) {
-        cell_cache->x[i] = pix.f[0];
-        cell_cache->y[i] = piy.f[0];
-        cell_cache->z[i] = piz.f[0];
-      }
-    }
-
-    vector pjx, pjy, pjz;
-    vector pjvx, pjvy, pjvz, mj;
-    vector pjx2, pjy2, pjz2;
-    vector pjvx2, pjvy2, pjvz2, mj2;
-
-    /* Find all of particle pi's interacions and store needed values in
-     * secondary cache.*/
-    for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
-
-      /* Load 2 sets of vectors from the particle cache. */
-      pjx.v = vec_load(&cell_cache->x[pjd]);
-      pjy.v = vec_load(&cell_cache->y[pjd]);
-      pjz.v = vec_load(&cell_cache->z[pjd]);
-      pjvx.v = vec_load(&cell_cache->vx[pjd]);
-      pjvy.v = vec_load(&cell_cache->vy[pjd]);
-      pjvz.v = vec_load(&cell_cache->vz[pjd]);
-      mj.v = vec_load(&cell_cache->m[pjd]);
-
-      pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]);
-      pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]);
-      pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]);
-      pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]);
-      pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]);
-      pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]);
-      mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]);
-
-      /* Compute the pairwise distance. */
-      vector v_dx_tmp, v_dy_tmp, v_dz_tmp;
-      vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2;
-      vector v_dx2_tmp, v_dy2_tmp, v_dz2_tmp;
-      vector v_dx2_tmp2, v_dy2_tmp2, v_dz2_tmp2, v2_r2_2;
-
-      v_dx_tmp.v = vec_sub(pix.v, pjx.v);
-      v_dy_tmp.v = vec_sub(piy.v, pjy.v);
-      v_dz_tmp.v = vec_sub(piz.v, pjz.v);
-      v_dx_tmp2.v = vec_sub(pix.v, pjx2.v);
-      v_dy_tmp2.v = vec_sub(piy.v, pjy2.v);
-      v_dz_tmp2.v = vec_sub(piz.v, pjz2.v);
-
-      v_dx2_tmp.v = vec_sub(pix2.v, pjx.v);
-      v_dy2_tmp.v = vec_sub(piy2.v, pjy.v);
-      v_dz2_tmp.v = vec_sub(piz2.v, pjz.v);
-      v_dx2_tmp2.v = vec_sub(pix2.v, pjx2.v);
-      v_dy2_tmp2.v = vec_sub(piy2.v, pjy2.v);
-      v_dz2_tmp2.v = vec_sub(piz2.v, pjz2.v);
-
-      v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v);
-      v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v);
-      v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v);
-      v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v);
-      v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v);
-      v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v);
-
-      v2_r2.v = vec_mul(v_dx2_tmp.v, v_dx2_tmp.v);
-      v2_r2.v = vec_fma(v_dy2_tmp.v, v_dy2_tmp.v, v2_r2.v);
-      v2_r2.v = vec_fma(v_dz2_tmp.v, v_dz2_tmp.v, v2_r2.v);
-      v2_r2_2.v = vec_mul(v_dx2_tmp2.v, v_dx2_tmp2.v);
-      v2_r2_2.v = vec_fma(v_dy2_tmp2.v, v_dy2_tmp2.v, v2_r2_2.v);
-      v2_r2_2.v = vec_fma(v_dz2_tmp2.v, v_dz2_tmp2.v, v2_r2_2.v);
-
-/* Form a mask from r2 < hig2 and r2 > 0.*/
-#ifdef HAVE_AVX512_F
-      // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check;
-      KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check;
-      KNL_MASK_16 doi2_mask_check, doi2_mask2, doi2_mask2_check;
-
-      doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero());
-      doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v);
-
-      doi2_mask_check = vec_cmp_gt(v2_r2.v, vec_setzero());
-      doi2_mask = vec_cmp_lt(v2_r2.v, v_hig2_2.v);
-
-      doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero());
-      doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v);
-
-      doi2_mask2_check = vec_cmp_gt(v2_r2_2.v, vec_setzero());
-      doi2_mask2 = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v);
-
-      doi_mask = doi_mask & doi_mask_check;
-      doi_mask2 = doi_mask2 & doi_mask2_check;
-
-      doi2_mask = doi2_mask & doi2_mask_check;
-      doi2_mask2 = doi2_mask2 & doi2_mask2_check;
-#else
-      vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check;
-      int doi_mask2;
-
-      vector v_doi2_mask, v_doi2_mask_check, v_doi2_mask2, v_doi2_mask2_check;
-      int doi2_mask2;
-
-      v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero());
-      v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v);
-
-      v_doi2_mask_check.v = vec_cmp_gt(v2_r2.v, vec_setzero());
-      v_doi2_mask.v = vec_cmp_lt(v2_r2.v, v_hig2_2.v);
-
-      v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero());
-      v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v);
-
-      v_doi2_mask2_check.v = vec_cmp_gt(v2_r2_2.v, vec_setzero());
-      v_doi2_mask2.v = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v);
-
-      doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v));
-      doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v));
-      doi2_mask = vec_cmp_result(vec_and(v_doi2_mask.v, v_doi2_mask_check.v));
-      doi2_mask2 =
-          vec_cmp_result(vec_and(v_doi2_mask2.v, v_doi2_mask2_check.v));
-#endif /* HAVE_AVX512_F */
-
-      /* Hit or miss? */
-      // if (doi_mask) {
-      storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp,
-                        &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache,
-                        &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum,
-                        &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv,
-                        v_vix, v_viy, v_viz);
-      //}
-      // if (doi2_mask) {
-      storeInteractions(
-          doi2_mask, pjd, &v2_r2, &v_dx2_tmp, &v_dy2_tmp, &v_dz2_tmp, &mj,
-          &pjvx, &pjvy, &pjvz, cell_cache, &int_cache2, &icount2, &rhoSum2,
-          &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2,
-          &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, v_viz2);
-      //}
-      /* Hit or miss? */
-      // if (doi_mask2) {
-      storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2,
-                        &v_dy_tmp2, &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2,
-                        cell_cache, &int_cache, &icount, &rhoSum, &rho_dhSum,
-                        &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum,
-                        &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
-      //}
-      // if (doi2_mask2) {
-      storeInteractions(doi2_mask2, pjd + VEC_SIZE, &v2_r2_2, &v_dx2_tmp2,
-                        &v_dy2_tmp2, &v_dz2_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2,
-                        cell_cache, &int_cache2, &icount2, &rhoSum2,
-                        &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2,
-                        &curlvxSum2, &curlvySum2, &curlvzSum2, v_hi_inv2,
-                        v_vix2, v_viy2, v_viz2);
-      //}
-    }
-
-    /* Perform padded vector remainder interactions if any are present. */
-    calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum,
-                        &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum,
-                        &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
-                        &icount_align);
-
-    calcRemInteractions(&int_cache2, icount2, &rhoSum2, &rho_dhSum2,
-                        &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2,
-                        &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2,
-                        v_viz2, &icount_align2);
-
-    /* Initialise masks to true incase remainder interactions have been
-     * performed. */
-    vector int_mask, int_mask2;
-    vector int2_mask, int2_mask2;
-#ifdef HAVE_AVX512_F
-    KNL_MASK_16 knl_mask = 0xFFFF;
-    KNL_MASK_16 knl_mask2 = 0xFFFF;
-    int_mask.m = vec_setint1(0xFFFFFFFF);
-    int_mask2.m = vec_setint1(0xFFFFFFFF);
-    int2_mask.m = vec_setint1(0xFFFFFFFF);
-    int2_mask2.m = vec_setint1(0xFFFFFFFF);
-#else
-    int_mask.m = vec_setint1(0xFFFFFFFF);
-    int_mask2.m = vec_setint1(0xFFFFFFFF);
-
-    int2_mask.m = vec_setint1(0xFFFFFFFF);
-    int2_mask2.m = vec_setint1(0xFFFFFFFF);
-#endif
-
-    /* Perform interaction with 2 vectors. */
-    for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
-      runner_iact_nonsym_2_vec_density(
-          &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
-          &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
-          &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
-          &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum,
-          &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2,
-#ifdef HAVE_AVX512_F
-          knl_mask, knl_mask2);
-#else
-          0, 0);
-#endif
-    }
-
-    for (int pjd = 0; pjd < icount_align2; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
-      runner_iact_nonsym_2_vec_density(
-          &int_cache2.r2q[pjd], &int_cache2.dxq[pjd], &int_cache2.dyq[pjd],
-          &int_cache2.dzq[pjd], v_hi_inv2, v_vix2, v_viy2, v_viz2,
-          &int_cache2.vxq[pjd], &int_cache2.vyq[pjd], &int_cache2.vzq[pjd],
-          &int_cache2.mq[pjd], &rhoSum2, &rho_dhSum2, &wcountSum2,
-          &wcount_dhSum2, &div_vSum2, &curlvxSum2, &curlvySum2, &curlvzSum2,
-          int2_mask, int2_mask2,
-#ifdef HAVE_AVX512_F
-          knl_mask, knl_mask2);
-#else
-          0, 0);
-#endif
-    }
-    /* Perform horizontal adds on vector sums and store result in particle pi.
-     */
-    VEC_HADD(rhoSum, pi->rho);
-    VEC_HADD(rho_dhSum, pi->density.rho_dh);
-    VEC_HADD(wcountSum, pi->density.wcount);
-    VEC_HADD(wcount_dhSum, pi->density.wcount_dh);
-    VEC_HADD(div_vSum, pi->density.div_v);
-    VEC_HADD(curlvxSum, pi->density.rot_v[0]);
-    VEC_HADD(curlvySum, pi->density.rot_v[1]);
-    VEC_HADD(curlvzSum, pi->density.rot_v[2]);
-
-    VEC_HADD(rhoSum2, pi2->rho);
-    VEC_HADD(rho_dhSum2, pi2->density.rho_dh);
-    VEC_HADD(wcountSum2, pi2->density.wcount);
-    VEC_HADD(wcount_dhSum2, pi2->density.wcount_dh);
-    VEC_HADD(div_vSum2, pi2->density.div_v);
-    VEC_HADD(curlvxSum2, pi2->density.rot_v[0]);
-    VEC_HADD(curlvySum2, pi2->density.rot_v[1]);
-    VEC_HADD(curlvzSum2, pi2->density.rot_v[2]);
-
-    /* Reset interaction count. */
-    icount = 0;
-    icount2 = 0;
-  } /* loop over all particles. */
-
-  TIMER_TOC(timer_doself_density);
-#endif /* WITH_VECTORIZATION */
-}
-
 /**
  * @brief Compute the density interactions between a cell pair (non-symmetric)
  * using vector intrinsics.
@@ -989,7 +625,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
   /* Anything to do here? */
   if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
 
-  if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
+  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
     error("Interacting undrifted cells.");
 
   /* Get the sort ID. */
diff --git a/src/scheduler.c b/src/scheduler.c
index fe8776e4fb7a70506116c3c4e3c9a710c7bef937..60ae9c25f5178a40ca1e1d4aa2f5782cc28bbc11 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -152,22 +152,12 @@ void scheduler_add_subcell_tasks(struct scheduler *s, struct cell *c,
 }
 
 /**
- * @brief Split a task if too large.
+ * @brief Split a hydrodynamic task if too large.
  *
  * @param t The #task
  * @param s The #scheduler we are working in.
  */
-static void scheduler_splittask(struct task *t, struct scheduler *s) {
-
-  /* Static constants. */
-  static const int pts[7][8] = {
-      {-1, 12, 10, 9, 4, 3, 1, 0},     {-1, -1, 11, 10, 5, 4, 2, 1},
-      {-1, -1, -1, 12, 7, 6, 4, 3},    {-1, -1, -1, -1, 8, 7, 5, 4},
-      {-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10},
-      {-1, -1, -1, -1, -1, -1, -1, 12}};
-  static const float sid_scale[13] = {
-      0.1897f, 0.4025f, 0.1897f, 0.4025f, 0.5788f, 0.4025f, 0.1897f,
-      0.4025f, 0.1897f, 0.4025f, 0.5788f, 0.4025f, 0.5788f};
+static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
 
   /* Iterate on this task until we're done with it. */
   int redo = 1;
@@ -177,11 +167,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
     redo = 0;
 
     /* Non-splittable task? */
-    if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) ||
-        ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) ||
-        ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) ||
-        ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) ||
-        ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID)) {
+    if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL)) {
       t->type = task_type_none;
       t->subtype = task_subtype_none;
       t->cj = NULL;
@@ -194,7 +180,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
 
       /* Get a handle on the cell involved. */
       struct cell *ci = t->ci;
-      const double hi = ci->dmin;
+      const double width = ci->dmin;
 
       /* Foreign task? */
       if (ci->nodeID != s->nodeID) {
@@ -202,27 +188,16 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
         break;
       }
 
-      /* Is this cell even split? */
-      if (ci->split && 2.f * kernel_gamma * ci->h_max * space_stretch < hi) {
+      /* Is this cell even split and the task does not violate h ? */
+      if (ci->split && 2.f * kernel_gamma * ci->h_max * space_stretch < width) {
 
         /* Make a sub? */
         if (scheduler_dosub && /* Note division here to avoid overflow */
-            ((ci->count > 0 && ci->count < space_subsize / ci->count) ||
-             (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount))) {
+            (ci->count > 0 && ci->count < space_subsize / ci->count)) {
 
           /* convert to a self-subtask. */
           t->type = task_type_sub_self;
 
-          /* Make sure we have a drift task (MATTHIEU temp. fix for gravity) */
-          if (t->subtype == task_subtype_grav ||
-              t->subtype == task_subtype_external_grav) {
-            lock_lock(&ci->lock);
-            if (ci->drift == NULL)
-              ci->drift = scheduler_addtask(s, task_type_drift,
-                                            task_subtype_none, 0, 0, ci, NULL);
-            lock_unlock_blind(&ci->lock);
-          }
-
           /* Depend on local sorts on this cell and its sub-cells. */
           scheduler_add_subcell_tasks(s, ci, t);
 
@@ -238,44 +213,41 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
           t->ci = ci->progeny[first_child];
           for (int k = first_child + 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
                                     ci->progeny[k], NULL),
                   s);
 
-          /* Make a task for each pair of progeny unless it's ext. gravity. */
-          if (t->subtype != task_subtype_external_grav) {
-
-            for (int j = 0; j < 8; j++)
-              if (ci->progeny[j] != NULL)
-                for (int k = j + 1; k < 8; k++)
-                  if (ci->progeny[k] != NULL)
-                    scheduler_splittask(
-                        scheduler_addtask(s, task_type_pair, t->subtype,
-                                          pts[j][k], 0, ci->progeny[j],
-                                          ci->progeny[k]),
-                        s);
-          }
+          /* Make a task for each pair of progeny */
+          for (int j = 0; j < 8; j++)
+            if (ci->progeny[j] != NULL)
+              for (int k = j + 1; k < 8; k++)
+                if (ci->progeny[k] != NULL)
+                  scheduler_splittask_hydro(
+                      scheduler_addtask(s, task_type_pair, t->subtype,
+                                        sub_sid_flag[j][k], 0, ci->progeny[j],
+                                        ci->progeny[k]),
+                      s);
         }
-      }
+      } /* Cell is split */
 
-      /* Otherwise, make sure the self task has a drift task. */
+      /* Otherwise, make sure the self task has a drift task */
       else {
         lock_lock(&ci->lock);
-        if (ci->drift == NULL)
-          ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none,
-                                        0, 0, ci, NULL);
+
+        if (ci->drift_part == NULL)
+          ci->drift_part = scheduler_addtask(s, task_type_drift_part,
+                                             task_subtype_none, 0, 0, ci, NULL);
         lock_unlock_blind(&ci->lock);
       }
+    } /* Self interaction */
 
-      /* Pair interaction? */
-    } else if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
+    /* Pair interaction? */
+    else if (t->type == task_type_pair) {
 
       /* Get a handle on the cells involved. */
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
-      const double hi = ci->dmin;
-      const double hj = cj->dmin;
 
       /* Foreign task? */
       if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
@@ -288,10 +260,13 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
       double shift[3];
       const int sid = space_getsid(s->space, &ci, &cj, shift);
 
+      const double width_i = ci->dmin;
+      const double width_j = cj->dmin;
+
       /* Should this task be split-up? */
       if (ci->split && cj->split &&
-          2.f * kernel_gamma * space_stretch * ci->h_max < hi &&
-          2.f * kernel_gamma * space_stretch * cj->h_max < hj) {
+          2.f * kernel_gamma * space_stretch * ci->h_max < width_i &&
+          2.f * kernel_gamma * space_stretch * cj->h_max < width_j) {
 
         /* Replace by a single sub-task? */
         if (scheduler_dosub &&
@@ -324,15 +299,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[6];
               t->cj = cj->progeny[0];
               t->flags = 1;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
                                     ci->progeny[7], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                     ci->progeny[6], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
                                     ci->progeny[7], cj->progeny[0]),
                   s);
@@ -348,15 +323,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[5];
               t->cj = cj->progeny[0];
               t->flags = 3;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
                                     ci->progeny[7], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                     ci->progeny[5], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
                                     ci->progeny[7], cj->progeny[0]),
                   s);
@@ -366,63 +341,63 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[4];
               t->cj = cj->progeny[0];
               t->flags = 4;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
                                     ci->progeny[5], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
                                     ci->progeny[6], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
                                     ci->progeny[7], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
                                     ci->progeny[4], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
                                     ci->progeny[5], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
                                     ci->progeny[6], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
                                     ci->progeny[7], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
                                     ci->progeny[4], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
                                     ci->progeny[5], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
                                     ci->progeny[6], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
                                     ci->progeny[7], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                     ci->progeny[4], cj->progeny[3]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
                                     ci->progeny[5], cj->progeny[3]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
                                     ci->progeny[6], cj->progeny[3]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
                                     ci->progeny[7], cj->progeny[3]),
                   s);
@@ -432,15 +407,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[4];
               t->cj = cj->progeny[1];
               t->flags = 5;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
                                     ci->progeny[6], cj->progeny[3]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
                                     ci->progeny[4], cj->progeny[3]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
                                     ci->progeny[6], cj->progeny[1]),
                   s);
@@ -456,15 +431,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[4];
               t->cj = cj->progeny[3];
               t->flags = 6;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
                                     ci->progeny[5], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
                                     ci->progeny[4], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
                                     ci->progeny[5], cj->progeny[3]),
                   s);
@@ -480,15 +455,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[3];
               t->cj = cj->progeny[0];
               t->flags = 9;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
                                     ci->progeny[7], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                     ci->progeny[3], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
                                     ci->progeny[7], cj->progeny[0]),
                   s);
@@ -498,63 +473,63 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[2];
               t->cj = cj->progeny[0];
               t->flags = 10;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
                                     ci->progeny[3], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
                                     ci->progeny[6], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
                                     ci->progeny[7], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
                                     ci->progeny[2], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
                                     ci->progeny[3], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
                                     ci->progeny[6], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
                                     ci->progeny[7], cj->progeny[1]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
                                     ci->progeny[2], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
                                     ci->progeny[3], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
                                     ci->progeny[6], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
                                     ci->progeny[7], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                     ci->progeny[2], cj->progeny[5]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
                                     ci->progeny[3], cj->progeny[5]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
                                     ci->progeny[6], cj->progeny[5]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
                                     ci->progeny[7], cj->progeny[5]),
                   s);
@@ -564,15 +539,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[2];
               t->cj = cj->progeny[1];
               t->flags = 11;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
                                     ci->progeny[6], cj->progeny[5]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
                                     ci->progeny[2], cj->progeny[5]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
                                     ci->progeny[6], cj->progeny[1]),
                   s);
@@ -582,63 +557,63 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
               t->ci = ci->progeny[1];
               t->cj = cj->progeny[0];
               t->flags = 12;
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
                                     ci->progeny[3], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
                                     ci->progeny[5], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
                                     ci->progeny[7], cj->progeny[0]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
                                     ci->progeny[1], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
                                     ci->progeny[3], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
                                     ci->progeny[5], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
                                     ci->progeny[7], cj->progeny[2]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
                                     ci->progeny[1], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
                                     ci->progeny[3], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
                                     ci->progeny[5], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
                                     ci->progeny[7], cj->progeny[4]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                     ci->progeny[1], cj->progeny[6]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
                                     ci->progeny[3], cj->progeny[6]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
                                     ci->progeny[5], cj->progeny[6]),
                   s);
-              scheduler_splittask(
+              scheduler_splittask_hydro(
                   scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
                                     ci->progeny[7], cj->progeny[6]),
                   s);
@@ -663,7 +638,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
                 struct task *tl =
                     scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                       ci->progeny[j], cj->progeny[k]);
-                scheduler_splittask(tl, s);
+                scheduler_splittask_hydro(tl, s);
                 tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift);
               }
 
@@ -672,9 +647,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
 
         /* Create the drift and sort for ci. */
         lock_lock(&ci->lock);
-        if (ci->drift == NULL && ci->nodeID == engine_rank)
-          ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none,
-                                        0, 0, ci, NULL);
+        if (ci->drift_part == NULL && ci->nodeID == engine_rank)
+          ci->drift_part = scheduler_addtask(s, task_type_drift_part,
+                                             task_subtype_none, 0, 0, ci, NULL);
         if (ci->sorts == NULL)
           ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none,
                                         1 << sid, 0, ci, NULL);
@@ -683,11 +658,11 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
         lock_unlock_blind(&ci->lock);
         scheduler_addunlock(s, ci->sorts, t);
 
-        /* Create the sort for cj. */
+        /* Create the drift and sort for cj. */
         lock_lock(&cj->lock);
-        if (cj->drift == NULL && cj->nodeID == engine_rank)
-          cj->drift = scheduler_addtask(s, task_type_drift, task_subtype_none,
-                                        0, 0, cj, NULL);
+        if (cj->drift_part == NULL && cj->nodeID == engine_rank)
+          cj->drift_part = scheduler_addtask(s, task_type_drift_part,
+                                             task_subtype_none, 0, 0, cj, NULL);
         if (cj->sorts == NULL)
           cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none,
                                         1 << sid, 0, cj, NULL);
@@ -696,19 +671,142 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
         lock_unlock_blind(&cj->lock);
         scheduler_addunlock(s, cj->sorts, t);
       }
-
     } /* pair interaction? */
+  }   /* iterate over the current task. */
+}
+
+/**
+ * @brief Split a gravity task if too large.
+ *
+ * @param t The #task
+ * @param s The #scheduler we are working in.
+ */
+static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
+
+  /* Iterate on this task until we're done with it. */
+  int redo = 1;
+  while (redo) {
+
+    /* Reset the redo flag. */
+    redo = 0;
+
+    /* Non-splittable task? */
+    if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL)) {
+      t->type = task_type_none;
+      t->subtype = task_subtype_none;
+      t->cj = NULL;
+      t->skip = 1;
+      break;
+    }
+
+    /* Self-interaction? */
+    if (t->type == task_type_self) {
+
+      /* Get a handle on the cell involved. */
+      struct cell *ci = t->ci;
+
+      /* Foreign task? */
+      if (ci->nodeID != s->nodeID) {
+        t->skip = 1;
+        break;
+      }
+
+      /* Is this cell even split? */
+      if (ci->split) {
+
+        /* Make a sub? */
+        if (scheduler_dosub && /* Note division here to avoid overflow */
+            (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount)) {
+
+          /* convert to a self-subtask. */
+          t->type = task_type_sub_self;
+
+          /* Make sure we have a drift task (MATTHIEU temp. fix) */
+          lock_lock(&ci->lock);
+          if (ci->drift_gpart == NULL)
+            ci->drift_gpart = scheduler_addtask(
+                s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL);
+          lock_unlock_blind(&ci->lock);
 
-    /* Long-range gravity interaction ? */
-    else if (t->type == task_type_grav_mm) {
+          /* Otherwise, make tasks explicitly. */
+        } else {
+
+          /* Take a step back (we're going to recycle the current task)... */
+          redo = 1;
+
+          /* Add the self tasks. */
+          int first_child = 0;
+          while (ci->progeny[first_child] == NULL) first_child++;
+          t->ci = ci->progeny[first_child];
+          for (int k = first_child + 1; k < 8; k++)
+            if (ci->progeny[k] != NULL)
+              scheduler_splittask_gravity(
+                  scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
+                                    ci->progeny[k], NULL),
+                  s);
+
+          /* Make a task for each pair of progeny */
+          if (t->subtype != task_subtype_external_grav) {
+            for (int j = 0; j < 8; j++)
+              if (ci->progeny[j] != NULL)
+                for (int k = j + 1; k < 8; k++)
+                  if (ci->progeny[k] != NULL)
+                    scheduler_splittask_gravity(
+                        scheduler_addtask(s, task_type_pair, t->subtype,
+                                          sub_sid_flag[j][k], 0, ci->progeny[j],
+                                          ci->progeny[k]),
+                        s);
+          }
+        }
+      } /* Cell is split */
+
+      /* Otherwise, make sure the self task has a drift task */
+      else {
+
+        lock_lock(&ci->lock);
+
+        if (ci->drift_gpart == NULL)
+          ci->drift_gpart = scheduler_addtask(
+              s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL);
+        lock_unlock_blind(&ci->lock);
+      }
+    } /* Self interaction */
+
+    /* Pair interaction? */
+    else if (t->type == task_type_pair) {
 
       /* Get a handle on the cells involved. */
       struct cell *ci = t->ci;
+      struct cell *cj = t->cj;
+
+      /* Foreign task? */
+      if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
+        t->skip = 1;
+        break;
+      }
+
+      /* Should this task be split-up? */
+      if (ci->split && cj->split) {
+
+        // MATTHIEU: nothing here for now
+
+      } else {
 
-      /* Safety thing */
-      if (ci->gcount == 0) t->type = task_type_none;
+        /* Create the drift for ci. */
+        lock_lock(&ci->lock);
+        if (ci->drift_gpart == NULL && ci->nodeID == engine_rank)
+          ci->drift_gpart = scheduler_addtask(
+              s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL);
+        lock_unlock_blind(&ci->lock);
 
-    } /* gravity interaction? */
+        /* Create the drift for cj. */
+        lock_lock(&cj->lock);
+        if (cj->drift_gpart == NULL && cj->nodeID == engine_rank)
+          cj->drift_gpart = scheduler_addtask(
+              s, task_type_drift_gpart, task_subtype_none, 0, 0, cj, NULL);
+        lock_unlock_blind(&cj->lock);
+      }
+    } /* pair interaction? */
   }   /* iterate over the current task. */
 }
 
@@ -728,7 +826,20 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements,
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct task *t = &tasks[ind];
-    scheduler_splittask(t, s);
+
+    /* Invoke the correct splitting strategy */
+    if (t->subtype == task_subtype_density) {
+      scheduler_splittask_hydro(t, s);
+    } else if (t->subtype == task_subtype_external_grav) {
+      scheduler_splittask_gravity(t, s);
+    } else if (t->subtype == task_subtype_grav) {
+      scheduler_splittask_gravity(t, s);
+    } else if (t->type == task_type_grav_top_level ||
+               t->type == task_type_grav_ghost) {
+      // MATTHIEU: for the future
+    } else {
+      error("Unexpected task sub-type");
+    }
   }
 }
 
@@ -821,7 +932,8 @@ void scheduler_set_unlocks(struct scheduler *s) {
     /* Check that we are not overflowing */
     if (counts[s->unlock_ind[k]] < 0)
       error("Task unlocking more than %d other tasks!",
-            (1 << (sizeof(short int) - 1)) - 1);
+            (1 << (8 * sizeof(short int) - 1)) - 1);
+
 #endif
   }
 
@@ -1004,9 +1116,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
   int *tid = s->tasks_ind;
   struct task *tasks = s->tasks;
   const int nodeID = s->nodeID;
-  const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788,
-                               0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
-                               0.5788, 0.4025, 0.5788};
   const float wscale = 0.001;
   const ticks tic = getticks();
 
@@ -1053,9 +1162,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
       case task_type_ghost:
         if (t->ci == t->ci->super) cost = wscale * t->ci->count;
         break;
-      case task_type_drift:
+      case task_type_drift_part:
         cost = wscale * t->ci->count;
         break;
+      case task_type_drift_gpart:
+        cost = wscale * t->ci->gcount;
+        break;
       case task_type_kick1:
         cost = wscale * t->ci->count;
         break;
@@ -1148,6 +1260,15 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements,
  */
 void scheduler_start(struct scheduler *s) {
 
+/* Reset all task debugging timers */
+#ifdef SWIFT_DEBUG_TASKS
+  for (int i = 0; i < s->nr_tasks; ++i) {
+    s->tasks[i].tic = 0;
+    s->tasks[i].toc = 0;
+    s->tasks[i].rid = -1;
+  }
+#endif
+
   /* Re-wait the tasks. */
   if (s->active_count > 1000) {
     threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tid_active,
@@ -1174,6 +1295,11 @@ void scheduler_start(struct scheduler *s) {
       /* Don't check MPI stuff */
       if (t->type == task_type_send || t->type == task_type_recv) continue;
 
+      /* Don't check the FFT task */
+      if (t->type == task_type_grav_top_level ||
+          t->type == task_type_grav_ghost)
+        continue;
+
       if (ci == NULL && cj == NULL) {
 
         error("Task not associated with cells!");
@@ -1181,7 +1307,8 @@ void scheduler_start(struct scheduler *s) {
       } else if (cj == NULL) { /* self */
 
         if (ci->ti_end_min == ti_current && t->skip &&
-            t->type != task_type_sort && t->type != task_type_drift && t->type)
+            t->type != task_type_sort && t->type != task_type_drift_part &&
+            t->type != task_type_drift_gpart)
           error(
               "Task (type='%s/%s') should not have been skipped "
               "ti_current=%lld "
@@ -1276,7 +1403,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_ghost:
       case task_type_kick1:
       case task_type_kick2:
-      case task_type_drift:
+      case task_type_drift_part:
+      case task_type_drift_gpart:
       case task_type_timestep:
         qid = t->ci->super->owner;
         break;
@@ -1295,7 +1423,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
                           MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD,
                           &t->req);
         } else if (t->subtype == task_subtype_xv ||
-                   t->subtype == task_subtype_rho) {
+                   t->subtype == task_subtype_rho ||
+                   t->subtype == task_subtype_gradient) {
           err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
                           t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
           // message( "receiving %i parts with tag=%i from %i to %i." ,
@@ -1330,7 +1459,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
                           MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD,
                           &t->req);
         } else if (t->subtype == task_subtype_xv ||
-                   t->subtype == task_subtype_rho) {
+                   t->subtype == task_subtype_rho ||
+                   t->subtype == task_subtype_gradient) {
 #ifdef SWIFT_DEBUG_CHECKS
           for (int k = 0; k < t->ci->count; k++)
             if (t->ci->parts[k].ti_drift != s->space->e->ti_current)
diff --git a/src/sort_part.h b/src/sort_part.h
index a243fcdfae8ec0aba606000e26bc18d35601215c..74116d7a8cada31c0663d5c5b70cfa978b11af8b 100644
--- a/src/sort_part.h
+++ b/src/sort_part.h
@@ -83,6 +83,18 @@ static const int sortlistID[27] = {
     /* (  1 ,  1 ,  0 ) */ 1,
     /* (  1 ,  1 ,  1 ) */ 0};
 
+/* Ratio of particles interacting assuming a uniform distribution */
+static const float sid_scale[13] = {0.1897f, 0.4025f, 0.1897f, 0.4025f, 0.5788f,
+                                    0.4025f, 0.1897f, 0.4025f, 0.1897f, 0.4025f,
+                                    0.5788f, 0.4025f, 0.5788f};
+
+/* Sid flags for every sub-pair of a self task. */
+static const int sub_sid_flag[7][8] = {
+    {-1, 12, 10, 9, 4, 3, 1, 0},     {-1, -1, 11, 10, 5, 4, 2, 1},
+    {-1, -1, -1, 12, 7, 6, 4, 3},    {-1, -1, -1, -1, 8, 7, 5, 4},
+    {-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10},
+    {-1, -1, -1, -1, -1, -1, -1, 12}};
+
 /**
  * @brief Determines whether a pair of cells are corner to corner.
  *
diff --git a/src/space.c b/src/space.c
index d0f629d90dd56ffb621705c9b9718331cf5eff4b..fd6476743ce39d130c6e00737af03f5d34216381 100644
--- a/src/space.c
+++ b/src/space.c
@@ -205,7 +205,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->gradient = NULL;
     c->force = NULL;
     c->grav = NULL;
-    c->dx_max = 0.0f;
+    c->dx_max_part = 0.0f;
+    c->dx_max_gpart = 0.0f;
     c->dx_max_sort = 0.0f;
     c->sorted = 0;
     c->count = 0;
@@ -219,10 +220,12 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->kick1 = NULL;
     c->kick2 = NULL;
     c->timestep = NULL;
-    c->drift = NULL;
+    c->drift_part = NULL;
+    c->drift_gpart = NULL;
     c->cooling = NULL;
     c->sourceterms = NULL;
-    c->grav_top_level = NULL;
+    c->grav_ghost[0] = NULL;
+    c->grav_ghost[1] = NULL;
     c->grav_long_range = NULL;
     c->grav_down = NULL;
     c->super = c;
@@ -422,7 +425,8 @@ void space_regrid(struct space *s, int verbose) {
           c->gcount = 0;
           c->scount = 0;
           c->super = c;
-          c->ti_old = ti_old;
+          c->ti_old_part = ti_old;
+          c->ti_old_gpart = ti_old;
           c->ti_old_multipole = ti_old;
           if (s->gravity) c->multipole = &s->multipoles_top[cid];
         }
@@ -892,8 +896,9 @@ void space_rebuild(struct space *s, int verbose) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that the links are correct */
-  part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
-                    nr_sparts, verbose);
+  if ((nr_gparts > 0 && nr_parts > 0) || (nr_gparts > 0 && nr_sparts > 0))
+    part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
+                      nr_sparts, verbose);
 #endif
 
   /* Hook the cells up to the parts. */
@@ -904,7 +909,8 @@ void space_rebuild(struct space *s, int verbose) {
   struct spart *sfinger = s->sparts;
   for (int k = 0; k < s->nr_cells; k++) {
     struct cell *restrict c = &cells_top[k];
-    c->ti_old = ti_old;
+    c->ti_old_part = ti_old;
+    c->ti_old_gpart = ti_old;
     c->ti_old_multipole = ti_old;
     c->parts = finger;
     c->xparts = xfinger;
@@ -2013,7 +2019,8 @@ void space_split_recursive(struct space *s, struct cell *c,
       cp->count = 0;
       cp->gcount = 0;
       cp->scount = 0;
-      cp->ti_old = c->ti_old;
+      cp->ti_old_part = c->ti_old_part;
+      cp->ti_old_gpart = c->ti_old_gpart;
       cp->ti_old_multipole = c->ti_old_multipole;
       cp->loc[0] = c->loc[0];
       cp->loc[1] = c->loc[1];
@@ -2027,8 +2034,9 @@ void space_split_recursive(struct space *s, struct cell *c,
       if (k & 1) cp->loc[2] += cp->width[2];
       cp->depth = c->depth + 1;
       cp->split = 0;
-      cp->h_max = 0.0;
-      cp->dx_max = 0.f;
+      cp->h_max = 0.f;
+      cp->dx_max_part = 0.f;
+      cp->dx_max_gpart = 0.f;
       cp->dx_max_sort = 0.f;
       cp->nodeID = c->nodeID;
       cp->parent = c;
@@ -2425,6 +2433,58 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) {
   }
 }
 
+void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts,
+                                                 void *extra_data) {
+  /* Unpack the data */
+  struct gpart *restrict gparts = (struct gpart *)map_data;
+  struct space *s = (struct space *)extra_data;
+
+  for (int k = 0; k < nr_gparts; k++) {
+
+    /* Get the particle */
+    const struct gpart *restrict gp = &gparts[k];
+
+    if (gp->type == swift_type_dark_matter)
+      continue;
+
+    else if (gp->type == swift_type_gas) {
+
+      /* Get it's gassy friend */
+      struct part *p = &s->parts[-gp->id_or_neg_offset];
+      struct xpart *xp = &s->xparts[-gp->id_or_neg_offset];
+
+      /* Synchronize positions and velocities */
+      p->x[0] = gp->x[0];
+      p->x[1] = gp->x[1];
+      p->x[2] = gp->x[2];
+
+      xp->v_full[0] = gp->v_full[0];
+      xp->v_full[1] = gp->v_full[1];
+      xp->v_full[2] = gp->v_full[2];
+    }
+
+    else if (gp->type == swift_type_star) {
+
+      /* Get it's stellar friend */
+      struct spart *sp = &s->sparts[-gp->id_or_neg_offset];
+
+      /* Synchronize positions */
+      sp->x[0] = gp->x[0];
+      sp->x[1] = gp->x[1];
+      sp->x[2] = gp->x[2];
+    }
+  }
+}
+
+void space_synchronize_particle_positions(struct space *s) {
+
+  if ((s->nr_gparts > 0 && s->nr_parts > 0) ||
+      (s->nr_gparts > 0 && s->nr_sparts > 0))
+    threadpool_map(&s->e->threadpool,
+                   space_synchronize_particle_positions_mapper, s->gparts,
+                   s->nr_gparts, sizeof(struct gpart), 1000, (void *)s);
+}
+
 /**
  * @brief Initialises all the particles by setting them into a valid state
  *
@@ -2879,7 +2939,8 @@ void space_check_drift_point(struct space *s, integertime_t ti_drift,
                              int multipole) {
 #ifdef SWIFT_DEBUG_CHECKS
   /* Recursively check all cells */
-  space_map_cells_pre(s, 1, cell_check_particle_drift_point, &ti_drift);
+  space_map_cells_pre(s, 1, cell_check_part_drift_point, &ti_drift);
+  space_map_cells_pre(s, 1, cell_check_gpart_drift_point, &ti_drift);
   if (multipole)
     space_map_cells_pre(s, 1, cell_check_multipole_drift_point, &ti_drift);
 #else
diff --git a/src/space.h b/src/space.h
index c5f588563e5a9fb4b6cb73ac1446514f8149794f..e8e8600349c97ff8a60f0fcf2964d6ec514a7589 100644
--- a/src/space.h
+++ b/src/space.h
@@ -130,6 +130,9 @@ struct space {
   /*! The s-particle data (cells have pointers to this). */
   struct spart *sparts;
 
+  /*! The top-level FFT task */
+  struct task *grav_top_level;
+
   /*! General-purpose lock for this space. */
   swift_lock_type lock;
 
@@ -206,6 +209,7 @@ void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
                                  int verbose);
 void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells,
                                  int verbose);
+void space_synchronize_particle_positions(struct space *s);
 void space_do_parts_sort();
 void space_do_gparts_sort();
 void space_do_sparts_sort();
diff --git a/src/swift.h b/src/swift.h
index 7f1b19b6066c2d55df1cb9101172ae94c9085583..20397eb24df478cba65a0e35d686b402f1d8ee70 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -45,6 +45,7 @@
 #include "parser.h"
 #include "part.h"
 #include "partition.h"
+#include "periodic.h"
 #include "physical_constants.h"
 #include "potential.h"
 #include "profiler.h"
diff --git a/src/task.c b/src/task.c
index e8c35e49a57595a6415c60ce7071ae1c0e3f09b7..43da1d35680783d977ea743dd4f43c52f0f291bc 100644
--- a/src/task.c
+++ b/src/task.c
@@ -47,27 +47,15 @@
 #include "lock.h"
 
 /* Task type names. */
-const char *taskID_names[task_type_count] = {"none",
-                                             "sort",
-                                             "self",
-                                             "pair",
-                                             "sub_self",
-                                             "sub_pair",
-                                             "init_grav",
-                                             "ghost",
-                                             "extra_ghost",
-                                             "drift",
-                                             "kick1",
-                                             "kick2",
-                                             "timestep",
-                                             "send",
-                                             "recv",
-                                             "grav_top_level",
-                                             "grav_long_range",
-                                             "grav_mm",
-                                             "grav_down",
-                                             "cooling",
-                                             "sourceterms"};
+const char *taskID_names[task_type_count] = {
+    "none",       "sort",           "self",
+    "pair",       "sub_self",       "sub_pair",
+    "init_grav",  "ghost",          "extra_ghost",
+    "drift_part", "drift_gpart",    "kick1",
+    "kick2",      "timestep",       "send",
+    "recv",       "grav_top_level", "grav_long_range",
+    "grav_ghost", "grav_mm",        "grav_down",
+    "cooling",    "sourceterms"};
 
 /* Sub-task type names. */
 const char *subtaskID_names[task_subtype_count] = {
@@ -132,6 +120,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       return task_action_none;
       break;
 
+    case task_type_drift_part:
     case task_type_sort:
     case task_type_ghost:
     case task_type_extra_ghost:
@@ -169,7 +158,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
     case task_type_timestep:
     case task_type_send:
     case task_type_recv:
-    case task_type_drift:
       if (t->ci->count > 0 && t->ci->gcount > 0)
         return task_action_all;
       else if (t->ci->count > 0)
@@ -187,8 +175,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       return task_action_multipole;
       break;
 
+    case task_type_drift_gpart:
     case task_type_grav_down:
       return task_action_gpart;
+      break;
 
     default:
       error("Unknown task_action for task");
@@ -286,15 +276,19 @@ void task_unlock(struct task *t) {
     case task_type_kick1:
     case task_type_kick2:
     case task_type_timestep:
-    case task_type_drift:
       cell_unlocktree(ci);
       cell_gunlocktree(ci);
       break;
 
+    case task_type_drift_part:
     case task_type_sort:
       cell_unlocktree(ci);
       break;
 
+    case task_type_drift_gpart:
+      cell_gunlocktree(ci);
+      break;
+
     case task_type_self:
     case task_type_sub_self:
       if (subtype == task_subtype_grav) {
@@ -323,7 +317,6 @@ void task_unlock(struct task *t) {
       cell_munlocktree(ci);
       break;
 
-    case task_type_grav_top_level:
     case task_type_grav_long_range:
     case task_type_grav_mm:
       cell_munlocktree(ci);
@@ -372,7 +365,6 @@ int task_lock(struct task *t) {
     case task_type_kick1:
     case task_type_kick2:
     case task_type_timestep:
-    case task_type_drift:
       if (ci->hold || ci->ghold) return 0;
       if (cell_locktree(ci) != 0) return 0;
       if (cell_glocktree(ci) != 0) {
@@ -381,10 +373,17 @@ int task_lock(struct task *t) {
       }
       break;
 
+    case task_type_drift_part:
     case task_type_sort:
+      if (ci->hold) return 0;
       if (cell_locktree(ci) != 0) return 0;
       break;
 
+    case task_type_drift_gpart:
+      if (ci->ghold) return 0;
+      if (cell_glocktree(ci) != 0) return 0;
+      break;
+
     case task_type_self:
     case task_type_sub_self:
       if (subtype == task_subtype_grav) {
@@ -442,7 +441,6 @@ int task_lock(struct task *t) {
       }
       break;
 
-    case task_type_grav_top_level:
     case task_type_grav_long_range:
     case task_type_grav_mm:
       /* Lock the m-poles */
diff --git a/src/task.h b/src/task.h
index 049f86bdd6b4baf0856745b2b53acda5cca8c9e1..052f3e8036381441e283d3f7847d09e98ec1dac2 100644
--- a/src/task.h
+++ b/src/task.h
@@ -47,7 +47,8 @@ enum task_types {
   task_type_init_grav,
   task_type_ghost,
   task_type_extra_ghost,
-  task_type_drift,
+  task_type_drift_part,
+  task_type_drift_gpart,
   task_type_kick1,
   task_type_kick2,
   task_type_timestep,
@@ -55,6 +56,7 @@ enum task_types {
   task_type_recv,
   task_type_grav_top_level,
   task_type_grav_long_range,
+  task_type_grav_ghost,
   task_type_grav_mm,
   task_type_grav_down,
   task_type_cooling,
diff --git a/src/timers.c b/src/timers.c
index aa42eee14fc0df3edd5a18340c092b8eea2ffac1..62eac20596a082e411ced61a86f32bef9edcb636 100644
--- a/src/timers.c
+++ b/src/timers.c
@@ -40,7 +40,8 @@ const char* timers_names[timer_count] = {
     "prepare",
     "init",
     "init_grav",
-    "drift",
+    "drift_part",
+    "drift_gpart",
     "kick1",
     "kick2",
     "timestep",
@@ -58,6 +59,7 @@ const char* timers_names[timer_count] = {
     "dopair_grav_pp",
     "dograv_external",
     "dograv_down",
+    "dograv_top_level",
     "dograv_long_range",
     "dosource",
     "dosub_self_density",
diff --git a/src/timers.h b/src/timers.h
index 08e983a947bc57d9dcc7a432df92c2a4b0a1f7d7..9248be4f3048e468deed476f822947eed3c4ce56 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -41,7 +41,8 @@ enum {
   timer_prepare,
   timer_init,
   timer_init_grav,
-  timer_drift,
+  timer_drift_part,
+  timer_drift_gpart,
   timer_kick1,
   timer_kick2,
   timer_timestep,
@@ -59,6 +60,7 @@ enum {
   timer_dopair_grav_pp,
   timer_dograv_external,
   timer_dograv_down,
+  timer_dograv_top_level,
   timer_dograv_long_range,
   timer_dosource,
   timer_dosub_self_density,
diff --git a/src/xmf.c b/src/xmf.c
index 7292606c9f013601db1e9e9b35ee843dea63f785..ca4ffe5157599dd5a45295dcfa59f9420753f5cf 100644
--- a/src/xmf.c
+++ b/src/xmf.c
@@ -181,6 +181,52 @@ void xmf_write_groupfooter(FILE* xmfFile, enum part_type ptype) {
           part_type_names[ptype]);
 }
 
+/**
+ * @brief Returns the precision of a given dataset type
+ */
+int xmf_precision(enum IO_DATA_TYPE type) {
+  switch (type) {
+    case FLOAT:
+      return 4;
+      break;
+    case DOUBLE:
+      return 8;
+      break;
+    case ULONGLONG:
+    case LONGLONG:
+      return 8;
+      break;
+    case CHAR:
+      return 1;
+      break;
+    default:
+      error("Unsupported type");
+  }
+  return 0;
+}
+
+/**
+ * @brief Returns the Xdmf type name of a given dataset type
+ */
+const char* xmf_type(enum IO_DATA_TYPE type) {
+  switch (type) {
+    case FLOAT:
+    case DOUBLE:
+      return "Float";
+      break;
+    case ULONGLONG:
+    case LONGLONG:
+      return "Int";
+      break;
+    case CHAR:
+      return "Char";
+      break;
+    default:
+      error("Unsupported type");
+  }
+  return "";
+}
+
 /**
  * @brief Writes the lines corresponding to an array of the HDF5 output
  *
@@ -203,13 +249,15 @@ void xmf_write_line(FILE* xmfFile, const char* fileName,
           name, dim == 1 ? "Scalar" : "Vector");
   if (dim == 1)
     fprintf(xmfFile,
-            "<DataItem Dimensions=\"%zu\" NumberType=\"Double\" "
+            "<DataItem Dimensions=\"%zu\" NumberType=\"%s\" "
             "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
-            N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name);
+            N, xmf_type(type), xmf_precision(type), fileName, partTypeGroupName,
+            name);
   else
     fprintf(xmfFile,
-            "<DataItem Dimensions=\"%zu %d\" NumberType=\"Double\" "
+            "<DataItem Dimensions=\"%zu %d\" NumberType=\"%s\" "
             "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
-            N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name);
+            N, dim, xmf_type(type), xmf_precision(type), fileName,
+            partTypeGroupName, name);
   fprintf(xmfFile, "</Attribute>\n");
 }
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a51b8eb82a17313818ff956ca3f15a232df8df65..7c45ead22f77da7e0aa53e03051c7351cc97f550 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -22,7 +22,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS
 # List of programs and scripts to run in the test suite
 TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \
         testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh  \
-        testParser.sh testSPHStep test125cells.sh testFFT \
+        testParser.sh testSPHStep test125cells.sh test125cellsPerturbed.sh testFFT \
         testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \
         testMatrixInversion testThreadpool testDump testLogger \
         testVoronoi1D testVoronoi2D testVoronoi3D
@@ -92,6 +92,8 @@ testLogger_SOURCES = testLogger.c
 # Files necessary for distribution
 EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
 	     test27cells.sh test27cellsPerturbed.sh testParser.sh \
-	     test125cells.sh testParserInput.yaml difffloat.py \
-	     tolerance_125.dat tolerance_27_normal.dat tolerance_27_perturbed.dat \
-	     tolerance_pair_normal.dat tolerance_pair_perturbed.dat
+	     test125cells.sh test125cellsPerturbed.sh testParserInput.yaml difffloat.py \
+	     tolerance_125_normal.dat tolerance_125_perturbed.dat \
+             tolerance_27_normal.dat tolerance_27_perturbed.dat \
+	     tolerance_pair_normal.dat tolerance_pair_perturbed.dat \
+	     fft_params.yml
diff --git a/tests/difffloat.py b/tests/difffloat.py
index e0f0864372264899c6de1bf2f83ab678b7dd9ead..0bdc706a1c44ee6c42c54ad37e93f634742e06bc 100644
--- a/tests/difffloat.py
+++ b/tests/difffloat.py
@@ -35,13 +35,18 @@ file1 = sys.argv[1]
 file2 = sys.argv[2]
 number_to_check = -1
 
-if len(sys.argv) == 5:
-    number_to_check = int(sys.argv[4])
-
 fileTol = ""
 if len(sys.argv) >= 4:
     fileTol = sys.argv[3]
 
+if len(sys.argv) >= 5:
+    number_to_check = int(sys.argv[4])
+
+if len(sys.argv) == 6:
+    ignoreSmallRhoDh = int(sys.argv[5])
+else:
+    ignoreSmallRhoDh = 0
+    
 data1 = loadtxt(file1)
 data2 = loadtxt(file2)
 if fileTol != "":
@@ -102,8 +107,11 @@ for i in range(n_lines_to_check):
             print ""
             error = True
 
-        if abs(data1[i,j]) < 1e-6 and + abs(data2[i,j]) < 1e-6 : continue
-            
+        if abs(data1[i,j]) < 4e-6 and abs(data2[i,j]) < 4e-6 : continue
+
+        # Ignore pathological cases with rho_dh
+        if ignoreSmallRhoDh and j == 8 and abs(data1[i,j]) < 2e-4: continue
+        
         if( rel_diff > 1.1*relTol[j]):
             print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j)
             print "%10s:           a = %e"%("File 1", data1[i,j])
diff --git a/tests/fft_params.yml b/tests/fft_params.yml
new file mode 100644
index 0000000000000000000000000000000000000000..05d6d8f0b0578d11645fc1d78c1a6322160ae87a
--- /dev/null
+++ b/tests/fft_params.yml
@@ -0,0 +1,10 @@
+Scheduler:
+  max_top_level_cells: 64
+  
+# Parameters for the self-gravity scheme
+Gravity:
+  eta:                   0.025    # Constant dimensionless multiplier for time integration. 
+  theta:                 0.7      # Opening angle (Multipole acceptance criterion)
+  epsilon:               0.00001  # Softening length (in internal units).
+  a_smooth:              0.
+  r_cut:                 0.
diff --git a/tests/test125cells.c b/tests/test125cells.c
index 168b4838eab5b27f359ab927a7bae2240919e82f..e4c73b5e75df56436d277d719b3b83a179924a6f 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -1,3 +1,4 @@
+
 /*******************************************************************************
  * This file is part of SWIFT.
  * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
@@ -236,11 +237,13 @@ void reset_particles(struct cell *c, struct hydro_space *hs,
  * separation.
  * @param density The density of the fluid.
  * @param partId The running counter of IDs.
+ * @param pert The perturbation to apply to the particles in the cell in units
+ *of the inter-particle separation.
  * @param vel The type of velocity field.
  * @param press The type of pressure field.
  */
 struct cell *make_cell(size_t n, const double offset[3], double size, double h,
-                       double density, long long *partId,
+                       double density, long long *partId, double pert,
                        enum velocity_field vel, enum pressure_field press) {
 
   const size_t count = n * n * n;
@@ -263,9 +266,15 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
   for (size_t x = 0; x < n; ++x) {
     for (size_t y = 0; y < n; ++y) {
       for (size_t z = 0; z < n; ++z) {
-        part->x[0] = offset[0] + size * (x + 0.5) / (float)n;
-        part->x[1] = offset[1] + size * (y + 0.5) / (float)n;
-        part->x[2] = offset[2] + size * (z + 0.5) / (float)n;
+        part->x[0] =
+            offset[0] +
+            size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[1] =
+            offset[1] +
+            size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[2] =
+            offset[2] +
+            size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
         part->h = size * h / (float)n;
 
 #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH)
@@ -315,7 +324,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
   cell->h_max = h;
   cell->count = count;
   cell->gcount = 0;
-  cell->dx_max = 0.;
+  cell->dx_max_part = 0.;
   cell->dx_max_sort = 0.;
   cell->width[0] = size;
   cell->width[1] = size;
@@ -324,7 +333,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
   cell->loc[1] = offset[1];
   cell->loc[2] = offset[2];
 
-  cell->ti_old = 8;
+  cell->ti_old_part = 8;
   cell->ti_end_min = 8;
   cell->ti_end_max = 8;
   cell->ti_sort = 0;
@@ -354,8 +363,8 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
 
   /* Write header */
   fprintf(file,
-          "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s "
-          "%8s %8s %8s %8s %8s\n",
+          "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %13s %13s "
+          "%13s %13s %13s %8s %8s\n",
           "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "h", "rho",
           "div_v", "S", "u", "P", "c", "a_x", "a_y", "a_z", "h_dt", "v_sig",
           "dS/dt", "du/dt");
@@ -367,7 +376,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
     fprintf(file,
             "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
             "%8.5f "
-            "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n",
+            "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n",
             main_cell->parts[pid].id, main_cell->parts[pid].x[0],
             main_cell->parts[pid].x[1], main_cell->parts[pid].x[2],
             main_cell->parts[pid].v[0], main_cell->parts[pid].v[1],
@@ -406,7 +415,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
       fprintf(file,
               "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
               "%8.5f %8.5f "
-              "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n",
+              "%8.5f %8.5f %13f %13f %13f %13f %13f %8.5f %8.5f\n",
               solution[pid].id, solution[pid].x[0], solution[pid].x[1],
               solution[pid].x[2], solution[pid].v[0], solution[pid].v[1],
               solution[pid].v[2], solution[pid].h, solution[pid].rho,
@@ -432,6 +441,7 @@ int main(int argc, char *argv[]) {
 
   size_t runs = 0, particles = 0;
   double h = 1.23485, size = 1., rho = 2.5;
+  double perturbation = 0.;
   char outputFileNameExtension[200] = "";
   char outputFileName[200] = "";
   enum velocity_field vel = velocity_zero;
@@ -462,6 +472,9 @@ int main(int argc, char *argv[]) {
       case 'r':
         sscanf(optarg, "%zu", &runs);
         break;
+      case 'd':
+        sscanf(optarg, "%lf", &perturbation);
+        break;
       case 'm':
         sscanf(optarg, "%lf", &rho);
         break;
@@ -491,6 +504,7 @@ int main(int argc, char *argv[]) {
         "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
         "\n-m rho             - Physical density in the cell"
         "\n-s size            - Physical size of the cell"
+        "\n-d pert            - Perturbation to apply to the particles [0,1["
         "\n-v type (0,1,2,3)  - Velocity field: (zero, constant, divergent, "
         "rotating)"
         "\n-p type (0,1,2)    - Pressure field: (constant, gradient divergent)"
@@ -525,9 +539,9 @@ int main(int argc, char *argv[]) {
   /* Build the infrastructure */
   struct space space;
   space.periodic = 1;
-  space.dim[0] = 3.;
-  space.dim[1] = 3.;
-  space.dim[2] = 3.;
+  space.dim[0] = 5.;
+  space.dim[1] = 5.;
+  space.dim[2] = 5.;
   hydro_space_init(&space.hs, &space);
 
   struct phys_const prog_const;
@@ -535,7 +549,8 @@ int main(int argc, char *argv[]) {
 
   struct hydro_props hp;
   hp.target_neighbours = pow_dimension(h) * kernel_norm;
-  hp.delta_neighbours = 2.;
+  hp.delta_neighbours = 4.;
+  hp.h_max = FLT_MAX;
   hp.max_smoothing_iterations = 1;
   hp.CFL_condition = 0.1;
 
@@ -565,8 +580,8 @@ int main(int argc, char *argv[]) {
         const double offset[3] = {i * size, j * size, k * size};
 
         /* Construct it */
-        cells[i * 25 + j * 5 + k] =
-            make_cell(particles, offset, size, h, rho, &partId, vel, press);
+        cells[i * 25 + j * 5 + k] = make_cell(
+            particles, offset, size, h, rho, &partId, perturbation, vel, press);
 
         /* Store the inner cells */
         if (i > 0 && i < 4 && j > 0 && j < 4 && k > 0 && k < 4) {
@@ -592,8 +607,13 @@ int main(int argc, char *argv[]) {
     const ticks tic = getticks();
 
     /* Initialise the particles */
-    for (int j = 0; j < 125; ++j)
-      runner_do_drift_particles(&runner, cells[j], 0);
+    for (int j = 0; j < 125; ++j) runner_do_drift_part(&runner, cells[j], 0);
+
+    /* Reset particles. */
+    for (int i = 0; i < 125; ++i) {
+      for (int n = 0; n < cells[i]->count; ++n)
+        hydro_init_part(&cells[i]->parts[n], &space.hs);
+    }
 
     /* First, sort stuff */
     for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0);
@@ -670,6 +690,12 @@ int main(int argc, char *argv[]) {
               outputFileNameExtension);
       dump_particle_fields(outputFileName, main_cell, solution, 0);
     }
+
+    /* Reset stuff */
+    for (int i = 0; i < 125; ++i) {
+      for (int n = 0; n < cells[i]->count; ++n)
+        hydro_init_part(&cells[i]->parts[n], &space.hs);
+    }
   }
 
   /* Output timing */
diff --git a/tests/test125cells.sh.in b/tests/test125cells.sh.in
index 1d3b0db75d70bf2d5047f71b183812702305df75..d6d3ddc5b6b61bbd493c94005fd500a93ae7a01d 100755
--- a/tests/test125cells.sh.in
+++ b/tests/test125cells.sh.in
@@ -7,15 +7,25 @@ do
 
 	rm -f brute_force_125_standard.dat swift_dopair_125_standard.dat
 
+	echo "Running ./test125cells -n 6 -r 1 -v $v -p $p -f standard"
 	./test125cells -n 6 -r 1 -v $v -p $p -f standard
 
 	if [ -e brute_force_125_standard.dat ]
 	then
-	    python @srcdir@/difffloat.py brute_force_125_standard.dat swift_dopair_125_standard.dat @srcdir@/tolerance_125.dat 6
+	    if python @srcdir@/difffloat.py brute_force_125_standard.dat swift_dopair_125_standard.dat @srcdir@/tolerance_125_normal.dat 6
+	    then
+		echo "Accuracy test passed"
+	    else
+		echo "Accuracy test failed"
+		exit 1
+	    fi
 	else
+	    echo "Error Missing test output file"
 	    exit 1
         fi
 
+	echo "------------"
+
     done
 done
 	
diff --git a/tests/test125cellsPerturbed.sh.in b/tests/test125cellsPerturbed.sh.in
new file mode 100755
index 0000000000000000000000000000000000000000..9a5cfc07c978b0cfd5aa050aa117e887a1d40907
--- /dev/null
+++ b/tests/test125cellsPerturbed.sh.in
@@ -0,0 +1,32 @@
+#!/bin/bash
+for v in {0..3}
+do
+    for p in {0..2}
+    do
+	echo ""
+
+	rm -f brute_force_125_perturbed.dat swift_dopair_125_perturbed.dat
+
+	echo "Running ./test125cells -n 6 -r 1 -d 0.1 -v $v -p $p -f perturbed"
+	./test125cells -n 6 -r 1 -d 0.1 -v $v -p $p -f perturbed
+
+	if [ -e brute_force_125_perturbed.dat ]
+	then
+	    if python @srcdir@/difffloat.py brute_force_125_perturbed.dat swift_dopair_125_perturbed.dat @srcdir@/tolerance_125_perturbed.dat 6
+	    then
+		echo "Accuracy test passed"
+	    else
+		echo "Accuracy test failed"
+		exit 1
+	    fi
+	else
+	    echo "Error Missing test output file"
+	    exit 1
+        fi
+
+	echo "------------"
+
+    done
+done
+
+exit $?
diff --git a/tests/test27cells.c b/tests/test27cells.c
index bd827b68e90ea5f4e9d5577612e6cecda2edf83a..a0f541d17100a13079580aabbef065fa5adbd5e1 100644
--- a/tests/test27cells.c
+++ b/tests/test27cells.c
@@ -158,7 +158,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->split = 0;
   cell->h_max = h;
   cell->count = count;
-  cell->dx_max = 0.;
+  cell->dx_max_part = 0.;
   cell->dx_max_sort = 0.;
   cell->width[0] = size;
   cell->width[1] = size;
@@ -167,7 +167,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[1] = offset[1];
   cell->loc[2] = offset[2];
 
-  cell->ti_old = 8;
+  cell->ti_old_part = 8;
   cell->ti_end_min = 8;
   cell->ti_end_max = 8;
   cell->ti_sort = 8;
@@ -438,7 +438,7 @@ int main(int argc, char *argv[]) {
         cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho,
                                              &partId, perturbation, vel);
 
-        runner_do_drift_particles(&runner, cells[i * 9 + j * 3 + k], 0);
+        runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0);
 
         runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0);
       }
diff --git a/tests/test27cells.sh.in b/tests/test27cells.sh.in
index 07b6b92a82cee2bbe9c593f8f62e750d4406f84e..4312ce55e13097d4ae40c289b9c5caa885ff37cc 100755
--- a/tests/test27cells.sh.in
+++ b/tests/test27cells.sh.in
@@ -3,18 +3,28 @@
 for v in {0..3}
 do
     echo ""
-
+	
     rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
 
+    echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4"
     ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4
 
     if [ -e brute_force_27_standard.dat ]
     then
-	python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_normal.dat 6
+	if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_normal.dat 6
+	then
+	    echo "Accuracy test passed"
+	else
+	    echo "Accuracy test failed"
+	    exit 1
+	fi
     else
+	echo "Error Missing test output file"
 	exit 1
     fi
 
+    echo "------------"
+    
 done
 
 exit $?
diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in
index 30498594b659101216b51dfea2346fa9230dbc97..2f2e1db76346ca8f0ea4c2365ee349e232a1ce53 100755
--- a/tests/test27cellsPerturbed.sh.in
+++ b/tests/test27cellsPerturbed.sh.in
@@ -6,15 +6,25 @@ do
 
     rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat
 
+    echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4"
     ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4
 
     if [ -e brute_force_27_perturbed.dat ]
     then
-	python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6
+	if python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 1
+	then
+	    echo "Accuracy test passed"
+	else
+	    echo "Accuracy test failed"
+	    exit 1
+	fi
     else
+	echo "Error Missing test output file"
 	exit 1
     fi
 
+    echo "------------"
+
 done
 
 exit $?
diff --git a/tests/testFFT.c b/tests/testFFT.c
index c4aeb2885c788bd769bda49bdd15ab121dd8e9d4..4ddd030ece95bf26cbfe41f2408be7c3e0c50535 100644
--- a/tests/testFFT.c
+++ b/tests/testFFT.c
@@ -18,8 +18,8 @@
  ******************************************************************************/
 
 /* Some standard headers. */
-#include <stdlib.h>
-#include <string.h>
+
+#include "../config.h"
 
 #ifndef HAVE_FFTW
 
@@ -27,169 +27,93 @@ int main() { return 0; }
 
 #else
 
-#include <fftw3.h>
+/* Some standard headers. */
+#include <stdlib.h>
+#include <string.h>
 
 /* Includes. */
 #include "swift.h"
 
-const double G = 1.;
-
-const size_t N = 16;
-const size_t PMGRID = 8;
-
-// const double asmth = 2. * M_PI * const_gravity_a_smooth / boxSize;
-// const double asmth2 = asmth * asmth;
-// const double fact = G / (M_PI * boxSize) * (1. / (2. * boxSize / PMGRID));
-
 int main() {
 
   /* Initialize CPU frequency, this also starts time. */
   unsigned long long cpufreq = 0;
   clocks_set_cpufreq(cpufreq);
 
-  /* Simulation properties */
-  const size_t count = N * N * N;
-  const double boxSize = 1.;
-
-  /* Create some particles */
-  struct gpart* gparts = malloc(count * sizeof(struct gpart));
-  bzero(gparts, count * sizeof(struct gpart));
-  for (size_t i = 0; i < N; ++i) {
-    for (size_t j = 0; j < N; ++j) {
-      for (size_t k = 0; k < N; ++k) {
-
-        struct gpart* gp = &gparts[i * N * N + j * N + k];
-
-        gp->x[0] = i * boxSize / N + boxSize / (2 * N);
-        gp->x[1] = j * boxSize / N + boxSize / (2 * N);
-        gp->x[2] = k * boxSize / N + boxSize / (2 * N);
-
-        gp->mass = 1. / count;
-
-        gp->id_or_neg_offset = i * N * N + j * N + k;
-      }
-    }
-  }
-
-  /* Properties of the mesh */
-  const size_t meshmin[3] = {0, 0, 0};
-  const size_t meshmax[3] = {PMGRID - 1, PMGRID - 1, PMGRID - 1};
-
-  const size_t dimx = meshmax[0] - meshmin[0] + 2;
-  const size_t dimy = meshmax[1] - meshmin[1] + 2;
-  const size_t dimz = meshmax[2] - meshmin[2] + 2;
-
-  const double fac = PMGRID / boxSize;
-  const size_t PMGRID2 = 2 * (PMGRID / 2 + 1);
-
-  /* message("dimx=%zd dimy=%zd dimz=%zd", dimx, dimy, dimz); */
-
-  /* Allocate and empty the workspace mesh */
-  const size_t workspace_size = (dimx + 4) * (dimy + 4) * (dimz + 4);
-  double* workspace = fftw_malloc(workspace_size * sizeof(double));
-  bzero(workspace, workspace_size * sizeof(double));
-
-  /* Do CIC with the particles */
-  for (size_t pid = 0; pid < count; ++pid) {
-
-    const struct gpart* const gp = &gparts[pid];
-
-    const size_t slab_x =
-        (fac * gp->x[0] >= PMGRID) ? PMGRID - 1 : fac * gp->x[0];
-    const size_t slab_y =
-        (fac * gp->x[1] >= PMGRID) ? PMGRID - 1 : fac * gp->x[1];
-    const size_t slab_z =
-        (fac * gp->x[2] >= PMGRID) ? PMGRID - 1 : fac * gp->x[2];
-
-    const double dx = fac * gp->x[0] - (double)slab_x;
-    const double dy = fac * gp->x[1] - (double)slab_y;
-    const double dz = fac * gp->x[2] - (double)slab_z;
-
-    const size_t slab_xx = slab_x + 1;
-    const size_t slab_yy = slab_y + 1;
-    const size_t slab_zz = slab_z + 1;
-
-    workspace[(slab_x * dimy + slab_y) * dimz + slab_z] +=
-        gp->mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
-    workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] +=
-        gp->mass * (1.0 - dx) * dy * (1.0 - dz);
-    workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] +=
-        gp->mass * (1.0 - dx) * (1.0 - dy) * dz;
-    workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] +=
-        gp->mass * (1.0 - dx) * dy * dz;
-    workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] +=
-        gp->mass * (dx) * (1.0 - dy) * (1.0 - dz);
-    workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] +=
-        gp->mass * (dx)*dy * (1.0 - dz);
-    workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] +=
-        gp->mass * (dx) * (1.0 - dy) * dz;
-    workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] +=
-        gp->mass * (dx)*dy * dz;
-  }
-
-  /* for(size_t i = 0 ; i < dimx*dimy*dimz; ++i) */
-  /*   message("workspace[%zd] = %f", i, workspace[i]); */
-
-  /* Prepare the force grid */
-  const size_t fft_size = workspace_size;
-  double* forcegrid = fftw_malloc(fft_size * sizeof(double));
-  bzero(forcegrid, fft_size * sizeof(double));
-
-  const size_t sendmin = 0, recvmin = 0;
-  const size_t sendmax = PMGRID, recvmax = PMGRID;
-
-  memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
-         (sendmax - sendmin + 1) * dimy * dimz * sizeof(double));
-
-  /* for (size_t i = 0; i < fft_size; ++i) */
-  /*   if (forcegrid[i] != workspace[i]) error("wrong"); */
-
-  /* Prepare the density grid */
-  double* rhogrid = fftw_malloc(fft_size * sizeof(double));
-  bzero(rhogrid, fft_size * sizeof(double));
-
-  /* Now get the density */
-  for (size_t slab_x = recvmin; slab_x <= recvmax; slab_x++) {
-
-    const size_t slab_xx = slab_x % PMGRID;
-
-    for (size_t slab_y = recvmin; slab_y <= recvmax; slab_y++) {
-
-      const size_t slab_yy = slab_y % PMGRID;
-
-      for (size_t slab_z = recvmin; slab_z <= recvmax; slab_z++) {
-
-        const size_t slab_zz = slab_z % PMGRID;
-
-        rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] +=
-            forcegrid[((slab_x - recvmin) * dimy + (slab_y - recvmin)) * dimz +
-                      (slab_z - recvmin)];
-      }
-    }
-  }
-
-  /* for (size_t i = 0; i < 640; i++) { */
-  /*   if (rhogrid[i] != workspace[i]) { */
-  /*     message("rhogrid[%zd]= %f workspace[%zd]= %f forcegrid[%zd]= %f", i, */
-  /*             rhogrid[i], i, workspace[i], i, forcegrid[i]); */
-  /*   } */
-  /* } */
-
-  /* FFT of the density field */
-  fftw_complex* fftgrid = fftw_malloc(fft_size * sizeof(fftw_complex));
-  fftw_plan plan_forward = fftw_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid,
-                                                fftgrid, FFTW_ESTIMATE);
-  fftw_execute(plan_forward);
-
-  for (size_t i = 0; i < 640; i++) {
-    message("workspace[%zd]= %f", i, fftgrid[i][0]);
+  /* Make one particle */
+  int nr_gparts = 1;
+  struct gpart *gparts = NULL;
+  if (posix_memalign((void **)&gparts, 64, nr_gparts * sizeof(struct gpart)) !=
+      0)
+    error("Impossible to allocate memory for gparts.");
+  bzero(gparts, nr_gparts * sizeof(struct gpart));
+
+  gparts[0].x[0] = 0.3;
+  gparts[0].x[1] = 0.8;
+  gparts[0].x[2] = 0.2;
+  gparts[0].mass = 1.f;
+
+  /* Read the parameter file */
+  struct swift_params *params = malloc(sizeof(struct swift_params));
+  parser_read_file("fft_params.yml", params);
+
+  /* Initialise the gravity properties */
+  struct gravity_props gravity_properties;
+  gravity_props_init(&gravity_properties, params);
+
+  /* Build the infrastructure */
+  struct space space;
+  double dim[3] = {1., 1., 1.};
+  space_init(&space, params, dim, NULL, gparts, NULL, 0, nr_gparts, 0, 1, 1, 1,
+             0, 0);
+
+  struct engine engine;
+  engine.s = &space;
+  space.e = &engine;
+  engine.time = 0.1f;
+  engine.ti_current = 0;
+  engine.ti_old = 0;
+  engine.max_active_bin = num_time_bins;
+  engine.gravity_properties = &gravity_properties;
+  engine.nr_threads = 1;
+
+  struct runner runner;
+  runner.e = &engine;
+
+  /* Initialize the threadpool. */
+  threadpool_init(&engine.threadpool, engine.nr_threads);
+
+  space_rebuild(&space, 0);
+
+  /* Run the FFT task */
+  runner_do_grav_fft(&runner, 1);
+
+  /* Now check that we got the right answer */
+  int nr_cells = space.nr_cells;
+  double *r = malloc(nr_cells * sizeof(double));
+  double *pot = malloc(nr_cells * sizeof(double));
+  double *pot_exact = malloc(nr_cells * sizeof(double));
+
+  // FILE *file = fopen("potential.dat", "w");
+  for (int i = 0; i < nr_cells; ++i) {
+    pot[i] = space.multipoles_top[i].pot.F_000;
+    double dx =
+        nearest(space.multipoles_top[i].CoM[0] - gparts[0].x[0], dim[0]);
+    double dy =
+        nearest(space.multipoles_top[i].CoM[1] - gparts[0].x[1], dim[1]);
+    double dz =
+        nearest(space.multipoles_top[i].CoM[2] - gparts[0].x[2], dim[2]);
+    r[i] = sqrt(dx * dx + dy * dy + dz * dz);
+    if (r[i] > 0) pot_exact[i] = -1. / r[i];
+    // fprintf(file, "%e %e %e\n", r[i], pot[i], pot_exact[i]);
   }
+  // fclose(file);
 
-  /* Clean-up */
-  fftw_destroy_plan(plan_forward);
-  fftw_free(forcegrid);
-  fftw_free(rhogrid);
-  fftw_free(workspace);
+  /* Clean up */
+  free(r);
+  free(pot);
+  free(pot_exact);
+  free(params);
   free(gparts);
   return 0;
 }
diff --git a/tests/testKernel.c b/tests/testKernel.c
index 13f4e36534eb11a4c8f7ba9c19a48de6599e31f5..a2744119a527cc842cdd4711056eee7a7d7b4270 100644
--- a/tests/testKernel.c
+++ b/tests/testKernel.c
@@ -39,7 +39,7 @@ int main() {
   const float numPoints_inv = 1. / numPoints;
 
   for (int i = 0; i < numPoints; ++i) {
-    u[i] = i * 2.5f * numPoints_inv / h;
+    u[i] = i * 2.25f * numPoints_inv / h;
   }
 
   for (int i = 0; i < numPoints; ++i) {
@@ -55,19 +55,63 @@ int main() {
 
 #ifdef WITH_VECTORIZATION
 
+  printf("\nVector Output for kernel_deval_1_vec\n");
+  printf("-------------\n");
+
+  /* Test vectorised kernel that uses one vector. */
+  for (int i = 0; i < numPoints; i += VEC_SIZE) {
+
+    vector vx, vx_h;
+    vector W_vec, dW_vec;
+
+    for (int j = 0; j < VEC_SIZE; j++) {
+      vx.f[j] = (i + j) * 2.25f / numPoints;
+    }
+
+    vx_h.v = vx.v / vec_set1(h);
+
+    kernel_deval_1_vec(&vx_h, &W_vec, &dW_vec);
+
+    for (int j = 0; j < VEC_SIZE; j++) {
+      printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h,
+             h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]);
+
+      if (fabsf(W_vec.f[j] - W[i + j]) > 2e-7) {
+        printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j],
+               W_vec.f[j]);
+        return 1;
+      }
+      if (fabsf(dW_vec.f[j] - dW[i + j]) > 2e-7) {
+        printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j],
+               dW_vec.f[j]);
+        return 1;
+      }
+    }
+  }
+
+  printf("\nVector Output for kernel_deval_2_vec\n");
+  printf("-------------\n");
+
+  /* Test vectorised kernel that uses two vectors. */
   for (int i = 0; i < numPoints; i += VEC_SIZE) {
 
     vector vx, vx_h;
     vector W_vec, dW_vec;
 
+    vector vx_2, vx_h_2;
+    vector W_vec_2, dW_vec_2;
+
     for (int j = 0; j < VEC_SIZE; j++) {
-      vx.f[j] = (i + j) * 2.5f / numPoints;
+      vx.f[j] = (i + j) * 2.25f / numPoints;
+      vx_2.f[j] = (i + j) * 2.25f / numPoints;
     }
 
     vx_h.v = vx.v / vec_set1(h);
+    vx_h_2.v = vx_2.v / vec_set1(h);
 
-    kernel_deval_vec(&vx_h, &W_vec, &dW_vec);
+    kernel_deval_2_vec(&vx_h, &W_vec, &dW_vec, &vx_h_2, &W_vec_2, &dW_vec_2);
 
+    /* Check first vector results. */
     for (int j = 0; j < VEC_SIZE; j++) {
       printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h,
              h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]);
@@ -83,6 +127,23 @@ int main() {
         return 1;
       }
     }
+
+    /* Check second vector results. */
+    for (int j = 0; j < VEC_SIZE; j++) {
+      printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h,
+             h * kernel_gamma, vx_2.f[j], W_vec_2.f[j], dW_vec_2.f[j]);
+
+      if (fabsf(W_vec_2.f[j] - W[i + j]) > 2e-7) {
+        printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j],
+               W_vec_2.f[j]);
+        return 1;
+      }
+      if (fabsf(dW_vec_2.f[j] - dW[i + j]) > 2e-7) {
+        printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j],
+               dW_vec_2.f[j]);
+        return 1;
+      }
+    }
   }
 
   printf("\nAll values are consistent\n");
diff --git a/tests/testPair.c b/tests/testPair.c
index c2533b63b902e3bdc7e7cae6fcbcf50c87dee4af..92987d2fdb625fec6e186a280837f145787f599b 100644
--- a/tests/testPair.c
+++ b/tests/testPair.c
@@ -84,7 +84,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->split = 0;
   cell->h_max = h;
   cell->count = count;
-  cell->dx_max = 0.;
+  cell->dx_max_part = 0.;
+  cell->dx_max_sort = 0.;
   cell->width[0] = n;
   cell->width[1] = n;
   cell->width[2] = n;
@@ -92,7 +93,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[1] = offset[1];
   cell->loc[2] = offset[2];
 
-  cell->ti_old = 8;
+  cell->ti_old_part = 8;
   cell->ti_end_min = 8;
   cell->ti_end_max = 8;
 
diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c
index 0c7ae1d0d8855371b8f8f9fbf51c7c63b3221aaa..014dacd1eb62040b03e6038b2c23183a24ec4850 100644
--- a/tests/testSPHStep.c
+++ b/tests/testSPHStep.c
@@ -71,7 +71,8 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) {
   cell->h_max = h;
   cell->count = count;
   cell->gcount = 0;
-  cell->dx_max = 0.;
+  cell->dx_max_part = 0.;
+  cell->dx_max_sort = 0.;
   cell->width[0] = cellSize;
   cell->width[1] = cellSize;
   cell->width[2] = cellSize;
diff --git a/tests/tolerance_125.dat b/tests/tolerance_125_normal.dat
similarity index 100%
rename from tests/tolerance_125.dat
rename to tests/tolerance_125_normal.dat
diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat
new file mode 100644
index 0000000000000000000000000000000000000000..04e642b28cb3729cb81f8183c3e69595ac651876
--- /dev/null
+++ b/tests/tolerance_125_perturbed.dat
@@ -0,0 +1,3 @@
+#   ID    pos_x    pos_y    pos_z      v_x      v_y      v_z        h      rho    div_v        S        u        P        c      a_x      a_y      a_z     h_dt    v_sig    dS/dt    du/dt
+    0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 1e-4	  1e-4	   1e-4	   1e-4	   1e-4	    1e-4     1e-4
+    0	  1e-4	   1e-4	    1e-4       1e-4	1e-4	 1e-4	    1e-4   1e-4	  1e-4	       1e-4	1e-4	 1e-4	  1e-4	 5e-3	  5e-3	   5e-3	   1e-4	   1e-4	    1e-4     1e-4
diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat
index 9c7ca10414507746b41e453d75426a072f989d2e..31ee002bb9c73ff8d74cce545aff715476b33507 100644
--- a/tests/tolerance_27_normal.dat
+++ b/tests/tolerance_27_normal.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6	      4e-5	    2e-4       2e-3		 8e-6	     6e-6	   6e-6		 6e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6	      4e-5	    2e-4       2e-3		 1e-5	     6e-6	   6e-6		 6e-6
     0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.2e-4	    1e-4       1e-4		 2e-4	     1e-4	   1e-4	 	 1e-4
diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat
index 53de4ec7632039a56a3757488881e890296e3ac8..9c6ee8c77cc6d53e67db9dbb86be197d49149b10 100644
--- a/tests/tolerance_27_perturbed.dat
+++ b/tests/tolerance_27_perturbed.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-4	    5e-5       2e-3		 3.1e-6	     3e-6	   3e-6		 3e-6
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.2e-2	    1e-5       1e-4		 2e-5	     2e-3	   2e-3	 	 2e-3
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-4	    5e-5       2e-3		 4e-6	     3e-6	   3e-6		 3e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      2e-3	    1e-5       1e-4		 4e-5	     2e-3	   2e-3	 	 2e-3