diff --git a/.gitignore b/.gitignore
index 596f144ed1e790d03510c5bdec851adb3fcecc3c..ba0028d163dae64143b0c43b465152e357d042b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,7 @@ tests/testRiemannHLLC
 tests/testMatrixInversion
 tests/testDump
 tests/testLogger
+tests/benchmarkInteractions
 
 theory/latex/swift.pdf
 theory/SPH/Kernels/kernels.pdf
diff --git a/examples/CoolingHaloWithSpin/cooling_halo.yml b/examples/CoolingHaloWithSpin/cooling_halo.yml
index 2e5f09b9cae199090f29ae2341c232218f8099d0..fc5094f9f5dcae62bb936d2b5510f41e3c70504e 100644
--- a/examples/CoolingHaloWithSpin/cooling_halo.yml
+++ b/examples/CoolingHaloWithSpin/cooling_halo.yml
@@ -9,8 +9,8 @@ InternalUnitSystem:
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   10.    # The end time of the simulation (in internal units).
-  dt_min:     1e-10 # The minimal time-step size of the simulation (in internal units).
+  time_end:   10.   # The end time of the simulation (in internal units).
+  dt_min:     1e-5  # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-1  # The maximal time-step size of the simulation (in internal units).
 
 # Parameters governing the conserved quantities statistics
@@ -48,4 +48,4 @@ LambdaCooling:
   minimum_temperature:         1.0e4  # Minimal temperature (Kelvin)
   mean_molecular_weight:       0.59   # Mean molecular weight
   hydrogen_mass_abundance:     0.75   # Hydrogen mass abundance (dimensionless)
-  cooling_tstep_mult:          1.0    # Dimensionless pre-factor for the time-step condition
+  cooling_tstep_mult:          0.1    # Dimensionless pre-factor for the time-step condition
diff --git a/examples/CoolingHaloWithSpin/density_profile.py b/examples/CoolingHaloWithSpin/density_profile.py
index ea282328e5b75530a128eab2dec5f065e46cf819..fb88ddd6aea71603a6f6fcb36b13771106737e6a 100644
--- a/examples/CoolingHaloWithSpin/density_profile.py
+++ b/examples/CoolingHaloWithSpin/density_profile.py
@@ -28,7 +28,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"])
 X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"])
@@ -101,18 +101,18 @@ for i in range(n_snaps):
         rho_0 = density[0]
 
         rho_analytic_init = rho_0 * (radial_bin_mids/r_0)**(-2)
-    plt.plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell")
-    #plt.plot(t,rho_analytic,label = "Initial analytic density profile"
+    plt.plot(radial_bin_mids,density,'ko',label = "Average density of shell")
+    plt.plot(radial_bin_mids,rho_analytic_init,label = "Initial analytic density profile")
     plt.xlabel(r"$r / r_{vir}$")
-    plt.ylabel(r"$\rho / \rho_{init})$")
+    plt.ylabel(r"$(\rho / \rho_{init})$")
     plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
     #plt.ylim((1.e-2,1.e1))
-    plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,20),'r',label = "Cooling radius")
+    plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(1.0e-4,1.0e4),'r',label = "Cooling radius")
     plt.xlim((radial_bin_mids[0],max_r))
-    plt.ylim((0,20))
-    plt.plot((0,max_r),(1,1))
+    plt.ylim((1.0e-4,1.0e4))
+    #plt.plot((0,max_r),(1,1))
     #plt.xscale('log')
-    #plt.yscale('log')
+    plt.yscale('log')
     plt.legend(loc = "upper right")
     plot_filename = "density_profile_%03d.png" %i
     plt.savefig(plot_filename,format = "png")
diff --git a/examples/CoolingHaloWithSpin/internal_energy_profile.py b/examples/CoolingHaloWithSpin/internal_energy_profile.py
index a3e470cc24a939c9bc915371e927d9bd39196bff..5f71d69ca7a978de242559f84ec390faa86a27f0 100644
--- a/examples/CoolingHaloWithSpin/internal_energy_profile.py
+++ b/examples/CoolingHaloWithSpin/internal_energy_profile.py
@@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"])
 X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"])
diff --git a/examples/CoolingHaloWithSpin/makeIC.py b/examples/CoolingHaloWithSpin/makeIC.py
index 8970fbaa70578532a4f41bab7a096d8fa3565d26..a6d57868ad7542498b27007a5c3ef9234b9feb84 100644
--- a/examples/CoolingHaloWithSpin/makeIC.py
+++ b/examples/CoolingHaloWithSpin/makeIC.py
@@ -36,6 +36,7 @@ h = 0.67777 # hubble parameter
 gamma = 5./3.
 eta = 1.2349
 spin_lambda = 0.05 #spin parameter
+f_b = 0.2 #baryon fraction
 
 # First set unit velocity and then the circular velocity parameter for the isothermal potential
 const_unit_velocity_in_cgs = 1.e5 #kms^-1
@@ -99,6 +100,8 @@ grp.attrs["PeriodicBoundariesOn"] = periodic
 # set seed for random number
 np.random.seed(1234)
 
+gas_mass = f_b * np.sqrt(3.) / 2. #virial mass of halo is 1, virial radius is 1, enclosed mass scales with r
+gas_particle_mass = gas_mass / float(N)
 
 # Positions
 # r^(-2) distribution corresponds to uniform distribution in radius
@@ -164,12 +167,12 @@ N = x_coords.size
 print "Number of particles in the box = " , N
 
 #make the coords and radius arrays again
-coords_2 = np.zeros((N,3))
-coords_2[:,0] = x_coords
-coords_2[:,1] = y_coords
-coords_2[:,2] = z_coords
+coords= np.zeros((N,3))
+coords[:,0] = x_coords
+coords[:,1] = y_coords
+coords[:,2] = z_coords
 
-radius = np.sqrt((coords_2[:,0]-boxSize/2.)**2 + (coords_2[:,1]-boxSize/2.)**2 + (coords_2[:,2]-boxSize/2.)**2)
+radius = np.sqrt((coords[:,0]-boxSize/2.)**2 + (coords[:,1]-boxSize/2.)**2 + (coords[:,2]-boxSize/2.)**2)
 
 #now give particle's velocities
 v = np.zeros((N,3))
@@ -184,7 +187,7 @@ print "J =", J
 omega = np.zeros((N,3))
 for i in range(N):
     omega[i,2] = 3.*J / radius[i]
-    v[i,:] = np.cross(omega[i,:],(coords_2[i,:]-boxSize/2.))
+    v[i,:] = np.cross(omega[i,:],(coords[i,:]-boxSize/2.))
         
 # Header
 grp = file.create_group("/Header")
@@ -202,16 +205,15 @@ grp.attrs["Dimension"] = 3
 grp = file.create_group("/PartType0")
 
 ds = grp.create_dataset('Coordinates', (N, 3), 'd')
-ds[()] = coords_2
-coords_2 = np.zeros(1)
+ds[()] = coords
+coords = np.zeros(1)
 
 ds = grp.create_dataset('Velocities', (N, 3), 'f')
 ds[()] = v
 v = np.zeros(1)
 
 # All particles of equal mass
-mass = 1. / N
-m = np.full((N,),mass)
+m = np.full((N,),gas_particle_mass)
 ds = grp.create_dataset('Masses', (N, ), 'f')
 ds[()] = m
 m = np.zeros(1)
diff --git a/examples/CoolingHaloWithSpin/velocity_profile.py b/examples/CoolingHaloWithSpin/velocity_profile.py
index d64d255b18482bc26578f21f46199aa3540ae7b5..07df8e1b0751307513c30a5b128773b193c3a9cd 100644
--- a/examples/CoolingHaloWithSpin/velocity_profile.py
+++ b/examples/CoolingHaloWithSpin/velocity_profile.py
@@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 header = f["Header"]
 N = header.attrs["NumPart_Total"][0]
diff --git a/examples/EAGLE_100/README b/examples/EAGLE_100/README
new file mode 100644
index 0000000000000000000000000000000000000000..e3af3c0e1281f8e9ba9e0aae3fa6dd8475359a47
--- /dev/null
+++ b/examples/EAGLE_100/README
@@ -0,0 +1,16 @@
+ICs extracted from the EAGLE suite of simulations. 
+
+WARNING: The ICs are 217GB in size. They contain ~3.4G DM particles,
+~3.2G gas particles and ~170M star particles
+
+The particle distribution here is the snapshot 27 (z=0.1) of the 100Mpc
+Ref-model. h- and a- factors from the original Gadget code have been
+corrected for. Variables not used in a pure hydro & gravity code have
+been removed. 
+Everything is ready to be run without cosmological integration. 
+
+The particle load of the main EAGLE simulation can be reproduced by
+running these ICs on 4096 cores.
+
+MD5 checksum of the ICs:
+2301ea73e14207b541bbb04163c5269e  EAGLE_ICs_100.hdf5
diff --git a/examples/EAGLE_100/eagle_100.yml b/examples/EAGLE_100/eagle_100.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a9b83b81f085e66b36d115c5265b66d6093ffdfb
--- /dev/null
+++ b/examples/EAGLE_100/eagle_100.yml
@@ -0,0 +1,35 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.989e43      # 10^10 M_sun in grams
+  UnitLength_in_cgs:   3.085678e24   # Mpc in centimeters
+  UnitVelocity_in_cgs: 1e5           # km/s in centimeters per second
+  UnitCurrent_in_cgs:  1             # Amperes
+  UnitTemp_in_cgs:     1             # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1e-2  # The end time of the simulation (in internal units).
+  dt_min:     1e-10 # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            eagle # Common part of the name of output files
+  time_first:          0.    # Time of the first output (in internal units)
+  delta_time:          1e-3  # Time difference between consecutive outputs (in internal units)
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1e-2 # Time between statistics output
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./EAGLE_ICs_100.hdf5     # The file to read
+
diff --git a/examples/EAGLE_100/getIC.sh b/examples/EAGLE_100/getIC.sh
new file mode 100755
index 0000000000000000000000000000000000000000..227df3f9f79d294cd8ccbfd3b72b02dfbea2ebd6
--- /dev/null
+++ b/examples/EAGLE_100/getIC.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_100.hdf5
diff --git a/examples/EAGLE_100/run.sh b/examples/EAGLE_100/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6ef47d5d98172cc8a318242923ede37332bd5590
--- /dev/null
+++ b/examples/EAGLE_100/run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+ # Generate the initial conditions if they are not present.
+if [ ! -e EAGLE_ICs_100.hdf5 ]
+then
+    echo "Fetching initial conditions for the EAGLE 100Mpc example..."
+    ./getIC.sh
+fi
+
+../swift -s -t 16 eagle_100.yml 2>&1 | tee output.log
+
diff --git a/examples/ExternalPointMass/energy_plot.py b/examples/ExternalPointMass/energy_plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..a75fcb835d33b3695170aab822092556f12db7d1
--- /dev/null
+++ b/examples/ExternalPointMass/energy_plot.py
@@ -0,0 +1,121 @@
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
+import h5py
+
+# Plot parameters
+params = {'axes.labelsize': 10,
+'axes.titlesize': 10,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 10,
+'ytick.labelsize': 10,
+'text.usetex': True,
+ 'figure.figsize' : (3.15,3.15),
+'figure.subplot.left'    : 0.145,
+'figure.subplot.right'   : 0.99,
+'figure.subplot.bottom'  : 0.11,
+'figure.subplot.top'     : 0.99,
+'figure.subplot.wspace'  : 0.15,
+'figure.subplot.hspace'  : 0.12,
+'lines.markersize' : 6,
+'lines.linewidth' : 3.,
+'text.latex.unicode': True
+}
+rcParams.update(params)
+rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
+
+import numpy as np
+import h5py as h5
+import sys
+
+# File containing the total energy
+stats_filename = "./energy.txt"
+
+# First snapshot
+snap_filename = "pointMass_000.hdf5"
+f = h5.File(snap_filename,'r')
+
+# Read the units parameters from the snapshot
+units = f["InternalCodeUnits"]
+unit_mass = units.attrs["Unit mass in cgs (U_M)"]
+unit_length = units.attrs["Unit length in cgs (U_L)"]
+unit_time = units.attrs["Unit time in cgs (U_t)"]
+
+G = 6.67408e-8 * unit_mass * unit_time**2 / unit_length**3
+
+# Read the header
+header = f["Header"]
+box_size = float(header.attrs["BoxSize"][0])
+
+# Read the properties of the potential
+parameters = f["Parameters"]
+mass = float(parameters.attrs["PointMassPotential:mass"])
+centre = [box_size/2, box_size/2, box_size/2]
+f.close()
+
+# Read the statistics summary
+file_energy = np.loadtxt("energy.txt")
+time_stats = file_energy[:,0]
+E_kin_stats = file_energy[:,3]
+E_pot_stats = file_energy[:,5]
+E_tot_stats = E_kin_stats + E_pot_stats
+
+# Read the snapshots
+time_snap = np.zeros(402)
+E_kin_snap = np.zeros(402)
+E_pot_snap = np.zeros(402)
+E_tot_snap = np.zeros(402)
+Lz_snap = np.zeros(402)
+
+# Read all the particles from the snapshots
+for i in range(402):
+    snap_filename = "pointMass_%0.3d.hdf5"%i
+    f = h5.File(snap_filename,'r')
+
+    pos_x = f["PartType1/Coordinates"][:,0]
+    pos_y = f["PartType1/Coordinates"][:,1]
+    pos_z = f["PartType1/Coordinates"][:,2]
+    vel_x = f["PartType1/Velocities"][:,0]
+    vel_y = f["PartType1/Velocities"][:,1]
+    vel_z = f["PartType1/Velocities"][:,2]
+    m = f["/PartType1/Masses"][:]
+    
+    r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2)
+    Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:]
+
+    time_snap[i] = f["Header"].attrs["Time"]
+    E_kin_snap[i] = np.sum(0.5 * m * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2))
+    E_pot_snap[i] = np.sum(-mass * m * G / r)
+    E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i]
+    Lz_snap[i] = np.sum(Lz)
+
+# Plot energy evolution
+figure()
+plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy")
+plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy")
+plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy")
+
+plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2)
+plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2)
+plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2)
+
+legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1)
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Energy}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("energy.png", dpi=200)
+
+# Plot angular momentum evolution
+figure()
+plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2)
+
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Angular~momentum}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("angular_momentum.png", dpi=200)
+
+
diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml
index 621a66bbc39838ac8d3d8a8a3992b2a7be3157a8..20b5bb3aa613d553d8c401e968d8ebfc0572e610 100644
--- a/examples/ExternalPointMass/externalPointMass.yml
+++ b/examples/ExternalPointMass/externalPointMass.yml
@@ -9,7 +9,7 @@ InternalUnitSystem:
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   1.    # The end time of the simulation (in internal units).
+  time_end:   8.    # The end time of the simulation (in internal units).
   dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-3  # The maximal time-step size of the simulation (in internal units).
 
@@ -31,7 +31,7 @@ SPH:
 
 # Parameters related to the initial conditions
 InitialConditions:
-  file_name:  Sphere.hdf5           # The file to read
+  file_name:  PointMass.hdf5        # The file to read
   shift_x:    50.                   # A shift to apply to all particles read from the ICs (in internal units).
   shift_y:    50.
   shift_z:    50.
diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py
index 326183398933c88d7348e72e00343064b3e3a64c..ba415daf9e03058239599cc08039fc89e0929393 100644
--- a/examples/ExternalPointMass/makeIC.py
+++ b/examples/ExternalPointMass/makeIC.py
@@ -24,10 +24,10 @@ import numpy
 import math
 import random
 
-# Generates a random distriution of particles, for motion in an external potnetial centred at (0,0,0)
+# Generates a random distriution of particles, for motion in an external potential centred at (0,0,0)
 
 # physical constants in cgs
-NEWTON_GRAVITY_CGS  = 6.672e-8
+NEWTON_GRAVITY_CGS  = 6.67408e-8
 SOLAR_MASS_IN_CGS   = 1.9885e33
 PARSEC_IN_CGS       = 3.0856776e18
 
@@ -39,34 +39,28 @@ const_unit_velocity_in_cgs =   (1e5)
 print "UnitMass_in_cgs:     ", const_unit_mass_in_cgs 
 print "UnitLength_in_cgs:   ", const_unit_length_in_cgs
 print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs
+print "UnitTime_in_cgs:     ", const_unit_length_in_cgs / const_unit_velocity_in_cgs
 
 # derived units
 const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs)
 const_G                = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs)))
-print 'G=', const_G
+print '---------------------'
+print 'G in internal units: ', const_G
 
 
 # Parameters
-periodic= 1            # 1 For periodic box
-boxSize = 100.         # 
-Radius  = boxSize / 4. # maximum radius of particles
-G       = const_G 
-Mass    = 1e10         
+periodic   = 1            # 1 For periodic box
+boxSize    = 100.         # 
+max_radius = boxSize / 4. # maximum radius of particles
+Mass       = 1e10         
+print "Mass at the centre:  ", Mass
 
-N       = int(sys.argv[1])  # Number of particles
-L       = N**(1./3.)
+numPart = int(sys.argv[1])  # Number of particles
+mass    = 1.
 
-# these are not used but necessary for I/O
-rho = 2.              # Density
-P = 1.                # Pressure
-gamma = 5./3.         # Gas adiabatic index
-fileName = "Sphere.hdf5" 
+fileName = "PointMass.hdf5" 
 
 
-#---------------------------------------------------
-numPart        = N
-mass           = 1
-internalEnergy = P / ((gamma - 1.)*rho)
 
 #--------------------------------------------------
 
@@ -98,25 +92,26 @@ grp.attrs["Unit current in cgs (U_I)"] = 1.
 grp.attrs["Unit temperature in cgs (U_T)"] = 1.
 
 #Particle group
-#grp0 = file.create_group("/PartType0")
 grp1 = file.create_group("/PartType1")
+
 #generate particle positions
-radius = Radius * (numpy.random.rand(N))**(1./3.) 
-ctheta = -1. + 2 * numpy.random.rand(N)
-stheta = numpy.sqrt(1.-ctheta**2)
-phi    =  2 * math.pi * numpy.random.rand(N)
+radius = max_radius * (numpy.random.rand(numPart))**(1./3.)
+print '---------------------'
+print 'Radius: minimum = ',min(radius)
+print 'Radius: maximum = ',max(radius)
+radius = numpy.sort(radius)
 r      = numpy.zeros((numPart, 3))
-# r[:,0] = radius * stheta * numpy.cos(phi)
-# r[:,1] = radius * stheta * numpy.sin(phi)
-# r[:,2] = radius * ctheta
 r[:,0] = radius
-#
-speed  = numpy.sqrt(G * Mass / radius)
-v      = numpy.zeros((numPart, 3))
+
+#generate particle velocities
+speed  = numpy.sqrt(const_G * Mass / radius)
 omega  = speed / radius
 period = 2.*math.pi/omega
-print 'period = minimum = ',min(period), ' maximum = ',max(period)
+print '---------------------'
+print 'Period: minimum = ',min(period)
+print 'Period: maximum = ',max(period)
 
+v      = numpy.zeros((numPart, 3))
 v[:,0] = -omega * r[:,1]
 v[:,1] =  omega * r[:,0]
 
@@ -129,17 +124,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f')
 ds[()] = m
 m = numpy.zeros(1)
 
-h = numpy.full((numPart, ), 1.1255 * boxSize / L)
-ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f')
-ds[()] = h
-h = numpy.zeros(1)
-
-u = numpy.full((numPart, ), internalEnergy)
-ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f')
-ds[()] = u
-u = numpy.zeros(1)
-
-
 ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False)
 ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L')
 ds[()] = ids
diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh
index 4ac513f09cb8ac8dcefc256a68478e215b8bc320..e074c384c4e002a161c7d8258e9068663204099f 100755
--- a/examples/ExternalPointMass/run.sh
+++ b/examples/ExternalPointMass/run.sh
@@ -1,10 +1,13 @@
 #!/bin/bash
 
 # Generate the initial conditions if they are not present.
-if [ ! -e Sphere.hdf5 ]
+if [ ! -e PointMass.hdf5 ]
 then
     echo "Generating initial conditions for the point mass potential box example..."
     python makeIC.py 10000
 fi
 
+rm -rf pointMass_*.hdf5
 ../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log
+
+python energy_plot.py
diff --git a/examples/ExternalPointMass/test.pro b/examples/ExternalPointMass/test.pro
deleted file mode 100644
index 21c10e9d27daa45b085c6a659ba3cf7260f017fb..0000000000000000000000000000000000000000
--- a/examples/ExternalPointMass/test.pro
+++ /dev/null
@@ -1,65 +0,0 @@
-;
-;  test energy / angular momentum conservation of test problem
-;
-@physunits
-
-indir    = '/gpfs/data/tt/Codes/Swift-git/swiftsim/examples/'
-basefile = 'output_'
-nfiles   = 657
-nfollow  = 100 ; number of particles to follow
-eout     = fltarr(nfollow, nfiles)
-ekin     = fltarr(nfollow, nfiles)
-epot     = fltarr(nfollow, nfiles)
-tout     = fltarr(nfiles)
-; set properties of potential
-uL  = 1e3 * phys.pc             ; unit of length
-uM  = phys.msun                 ; unit of mass
-uV  = 1d5                       ; unit of velocity
-
-; derived units
-constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
-pcentre  = [50.,50.,50.] * 1d3 * pc / uL
-mextern  = 1d10 * msun / uM
-;
-;
-;
-ifile  = 0
-for ifile=0,nfiles-1 do begin
-;for ifile=0,3 do begin
-   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
-   time   = h5ra(inf, 'Header','Time')
-   p      = h5rd(inf,'PartType1/Coordinates')
-   v      = h5rd(inf,'PartType1/Velocities')
-   id     = h5rd(inf,'PartType1/ParticleIDs')
-   indx   = sort(id)
-;
-   id     = id[indx]
-   for ic=0,2 do begin
-      tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
-      tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
-   endfor
-; calculate energy
-   dd  = size(p,/dimen) & npart = dd[1]
-   ener = fltarr(npart)
-   dr   = fltarr(npart) & dv = dr
-   for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2
-   for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2
-   dr = sqrt(dr)
-;   print,'time = ',time,p[0,0],v[0,0],id[0]
-   ek   = 0.5 * dv
-   ep   = - constG * mextern / dr
-   ener = ek + ep
-   tout(ifile) = time
-   eout(*,ifile) = ener[0:nfollow-1]
-   ekin(*,ifile) = ek[0:nfollow-1]
-   epot(*,ifile) = ep[0:nfollow-1]
-endfor
-
-; calculate relative energy change
-de = 0.0 * eout
-for ifile=1, nfiles -1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0]
-
-
-end
-
-
diff --git a/examples/Feedback/feedback.pro b/examples/Feedback/feedback.pro
deleted file mode 100644
index 02d616fc82f0aeb7011d022d13db9d1d1030e89c..0000000000000000000000000000000000000000
--- a/examples/Feedback/feedback.pro
+++ /dev/null
@@ -1,24 +0,0 @@
-base = 'Feedback'
-inf  = 'Feedback_005.hdf5'
-
-blast  = [5.650488e-01, 5.004371e-01, 5.010494e-01] ; location of blast
-pos    = h5rd(inf,'PartType0/Coordinates')
-vel    = h5rd(inf,'PartType0/Velocities')
-rho    = h5rd(inf,'PartType0/Density')
-utherm = h5rd(inf,'PartType0/InternalEnergy')
-
-; shift to centre
-for ic=0,2 do pos[ic,*] = pos[ic,*] - blast[ic]
-
-;; distance from centre
-dist = fltarr(n_elements(rho))
-for ic=0,2 do dist = dist + pos[ic,*]^2
-dist = sqrt(dist)
-
-; radial velocity
-vr = fltarr(n_elements(rho))
-for ic=0,2 do vr = vr + pos[ic,*]*vel[ic,*]
-vr = vr / dist
-
-; 
-end
diff --git a/examples/Feedback/feedback.yml b/examples/Feedback/feedback.yml
deleted file mode 100644
index de4f7abef1ef538a97a5e38c72b4db5ce2647976..0000000000000000000000000000000000000000
--- a/examples/Feedback/feedback.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-# Define the system of units to use internally. 
-InternalUnitSystem:
-  UnitMass_in_cgs:     1   # Grams
-  UnitLength_in_cgs:   1   # Centimeters
-  UnitVelocity_in_cgs: 1   # Centimeters per second
-  UnitCurrent_in_cgs:  1   # Amperes
-  UnitTemp_in_cgs:     1   # Kelvin
-
-# Parameters governing the time integration
-TimeIntegration:
-  time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   5e-2  # The end time of the simulation (in internal units).
-  dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
-
-# Parameters governing the snapshots
-Snapshots:
-  basename:            Feedback # Common part of the name of output files
-  time_first:          0.       # Time of the first output (in internal units)
-  delta_time:          1e-2     # Time difference between consecutive outputs (in internal units)
-
-# Parameters governing the conserved quantities statistics
-Statistics:
-  delta_time:          1e-3 # Time between statistics output
-
-# Parameters for the hydrodynamics scheme
-SPH:
-  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
-  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
-  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
-
-# Parameters related to the initial conditions
-InitialConditions:
-  file_name:  ./Feedback.hdf5          # The file to read
-
-# Parameters for feedback
-
-SN:
-  time:    0.001 # time the SN explodes (internal units)
-  energy:  1.0   # energy of the explosion (internal units)
-  x:       0.5   # x-position of explostion (internal units)
-  y:       0.5   # y-position of explostion (internal units)
-  z:       0.5   # z-position of explostion (internal units)
diff --git a/examples/Feedback/makeIC.py b/examples/Feedback/makeIC.py
deleted file mode 100644
index bd1081a9c275616038f5fa4e3eb943c36cb4c3eb..0000000000000000000000000000000000000000
--- a/examples/Feedback/makeIC.py
+++ /dev/null
@@ -1,109 +0,0 @@
-###############################################################################
- # This file is part of SWIFT.
- # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
- #                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- #               2016 Tom Theuns (tom.theuns@durham.ac.uk)
- # 
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published
- # by the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- # 
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
- # 
- # You should have received a copy of the GNU Lesser General Public License
- # along with this program.  If not, see <http://www.gnu.org/licenses/>.
- # 
- ##############################################################################
-
-import h5py
-import sys
-from numpy import *
-
-# Generates a swift IC file containing a cartesian distribution of particles
-# at a constant density and pressure in a cubic box
-
-# Parameters
-periodic= 1           # 1 For periodic box
-boxSize = 1.
-L = int(sys.argv[1])  # Number of particles along one axis
-rho = 1.              # Density
-P = 1.e-6             # Pressure
-gamma = 5./3.         # Gas adiabatic index
-eta = 1.2349          # 48 ngbs with cubic spline kernel
-fileName = "Feedback.hdf5" 
-
-#---------------------------------------------------
-numPart = L**3
-mass = boxSize**3 * rho / numPart
-internalEnergy = P / ((gamma - 1.)*rho)
-
-#--------------------------------------------------
-
-#File
-file = h5py.File(fileName, 'w')
-
-# Header
-grp = file.create_group("/Header")
-grp.attrs["BoxSize"] = boxSize
-grp.attrs["NumPart_Total"] =  [numPart, 0, 0, 0, 0, 0]
-grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
-grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0]
-grp.attrs["Time"] = 0.0
-grp.attrs["NumFilesPerSnapshot"] = 1
-grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-grp.attrs["Flag_Entropy_ICs"] = 0
-
-#Runtime parameters
-grp = file.create_group("/RuntimePars")
-grp.attrs["PeriodicBoundariesOn"] = periodic
-
-#Units
-grp = file.create_group("/Units")
-grp.attrs["Unit length in cgs (U_L)"] = 1.
-grp.attrs["Unit mass in cgs (U_M)"] = 1.
-grp.attrs["Unit time in cgs (U_t)"] = 1.
-grp.attrs["Unit current in cgs (U_I)"] = 1.
-grp.attrs["Unit temperature in cgs (U_T)"] = 1.
-
-#Particle group
-grp = file.create_group("/PartType0")
-
-v  = zeros((numPart, 3))
-ds = grp.create_dataset('Velocities', (numPart, 3), 'f')
-ds[()] = v
-v = zeros(1)
-
-m = full((numPart, 1), mass)
-ds = grp.create_dataset('Masses', (numPart,1), 'f')
-ds[()] = m
-m = zeros(1)
-
-h = full((numPart, 1), eta * boxSize / L)
-ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f')
-ds[()] = h
-h = zeros(1)
-
-u = full((numPart, 1), internalEnergy)
-ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f')
-ds[()] = u
-u = zeros(1)
-
-
-ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1))
-ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L')
-ds[()] = ids + 1
-x      = ids % L;
-y      = ((ids - x) / L) % L;
-z      = (ids - x - L * y) / L**2;
-coords = zeros((numPart, 3))
-coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
-ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
-ds[()] = coords
-
-file.close()
diff --git a/examples/IsothermalPotential/energy_plot.py b/examples/IsothermalPotential/energy_plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..0afa6fa93fa2a992e6ddeab3c9d33538c0b41de3
--- /dev/null
+++ b/examples/IsothermalPotential/energy_plot.py
@@ -0,0 +1,120 @@
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
+import h5py
+
+# Plot parameters
+params = {'axes.labelsize': 10,
+'axes.titlesize': 10,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 10,
+'ytick.labelsize': 10,
+'text.usetex': True,
+ 'figure.figsize' : (3.15,3.15),
+'figure.subplot.left'    : 0.145,
+'figure.subplot.right'   : 0.99,
+'figure.subplot.bottom'  : 0.11,
+'figure.subplot.top'     : 0.99,
+'figure.subplot.wspace'  : 0.15,
+'figure.subplot.hspace'  : 0.12,
+'lines.markersize' : 6,
+'lines.linewidth' : 3.,
+'text.latex.unicode': True
+}
+rcParams.update(params)
+rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
+
+import numpy as np
+import h5py as h5
+import sys
+
+# File containing the total energy
+stats_filename = "./energy.txt"
+
+# First snapshot
+snap_filename = "Isothermal_000.hdf5"
+f = h5.File(snap_filename,'r')
+
+# Read the units parameters from the snapshot
+units = f["InternalCodeUnits"]
+unit_mass = units.attrs["Unit mass in cgs (U_M)"]
+unit_length = units.attrs["Unit length in cgs (U_L)"]
+unit_time = units.attrs["Unit time in cgs (U_t)"]
+
+# Read the header
+header = f["Header"]
+box_size = float(header.attrs["BoxSize"][0])
+
+# Read the properties of the potential
+parameters = f["Parameters"]
+R200 = 100 
+Vrot = float(parameters.attrs["IsothermalPotential:vrot"])
+centre = [box_size/2, box_size/2, box_size/2]
+f.close()
+
+# Read the statistics summary
+file_energy = np.loadtxt("energy.txt")
+time_stats = file_energy[:,0]
+E_kin_stats = file_energy[:,3]
+E_pot_stats = file_energy[:,5]
+E_tot_stats = E_kin_stats + E_pot_stats
+
+# Read the snapshots
+time_snap = np.zeros(402)
+E_kin_snap = np.zeros(402)
+E_pot_snap = np.zeros(402)
+E_tot_snap = np.zeros(402)
+Lz_snap = np.zeros(402)
+
+# Read all the particles from the snapshots
+for i in range(402):
+    snap_filename = "Isothermal_%0.3d.hdf5"%i
+    f = h5.File(snap_filename,'r')
+
+    pos_x = f["PartType1/Coordinates"][:,0]
+    pos_y = f["PartType1/Coordinates"][:,1]
+    pos_z = f["PartType1/Coordinates"][:,2]
+    vel_x = f["PartType1/Velocities"][:,0]
+    vel_y = f["PartType1/Velocities"][:,1]
+    vel_z = f["PartType1/Velocities"][:,2]
+    mass = f["/PartType1/Masses"][:]
+    
+    r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2)
+    Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:]
+
+    time_snap[i] = f["Header"].attrs["Time"]
+    E_kin_snap[i] = np.sum(0.5 * mass * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2))
+    E_pot_snap[i] = np.sum(-mass * Vrot**2 *  log(r))
+    E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i]
+    Lz_snap[i] = np.sum(Lz)
+
+# Plot energy evolution
+figure()
+plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy")
+plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy")
+plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy")
+
+plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2)
+plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2)
+plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2)
+
+legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1)
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Energy}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("energy.png", dpi=200)
+
+# Plot angular momentum evolution
+figure()
+plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2)
+
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Angular~momentum}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("angular_momentum.png", dpi=200)
+
+
diff --git a/examples/IsothermalPotential/isothermal.yml b/examples/IsothermalPotential/isothermal.yml
index 0de99779f07591a5b71be11b75bc56ec741ddaed..8d9ec3875e405d95a89b3486bca5fd3465a3e20d 100644
--- a/examples/IsothermalPotential/isothermal.yml
+++ b/examples/IsothermalPotential/isothermal.yml
@@ -15,7 +15,7 @@ TimeIntegration:
 
 # Parameters governing the conserved quantities statistics
 Statistics:
-  delta_time:          1e-2 # Time between statistics output
+  delta_time:          1e-3 # Time between statistics output
   
 # Parameters governing the snapshots
 Snapshots:
@@ -23,25 +23,18 @@ Snapshots:
   time_first:          0.         # Time of the first output (in internal units)
   delta_time:          0.02       # Time difference between consecutive outputs (in internal units)
 
-# Parameters for the hydrodynamics scheme
-SPH:
-  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
-  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
-  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
-  max_smoothing_length:  40.      # Maximal smoothing length allowed (in internal units).
-
 # Parameters related to the initial conditions
 InitialConditions:
   file_name:  Isothermal.hdf5       # The file to read
-  shift_x:    100.                  # A shift to apply to all particles read from the ICs (in internal units).
-  shift_y:    100.
-  shift_z:    100.
+  shift_x:    200.                  # Shift all particles to be in the potential
+  shift_y:    200.
+  shift_z:    200.
  
 # External potential parameters
 IsothermalPotential:
-  position_x:      100.     # location of centre of isothermal potential in internal units
-  position_y:      100.
-  position_z:      100.	
+  position_x:      0.       # location of centre of isothermal potential in internal units
+  position_y:      0.
+  position_z:      0.
   vrot:            200.     # rotation speed of isothermal potential in internal units
-  timestep_mult:   0.03     # controls time step
-
+  timestep_mult:   0.01     # controls time step
+  epsilon:         0.       # No softening at the centre of the halo
diff --git a/examples/IsothermalPotential/makeIC.py b/examples/IsothermalPotential/makeIC.py
index 976119f0a312c5acc81fab943ba3cf5769102269..7d1c5361f9a255365517226e49c55a8a50c4d6ce 100644
--- a/examples/IsothermalPotential/makeIC.py
+++ b/examples/IsothermalPotential/makeIC.py
@@ -30,10 +30,10 @@ import random
 # all particles move in the xy plane, and start at y=0
 
 # physical constants in cgs
-NEWTON_GRAVITY_CGS  = 6.672e-8
+NEWTON_GRAVITY_CGS  = 6.67408e-8
 SOLAR_MASS_IN_CGS   = 1.9885e33
 PARSEC_IN_CGS       = 3.0856776e18
-PROTON_MASS_IN_CGS  = 1.6726231e24
+PROTON_MASS_IN_CGS  = 1.672621898e24
 YEAR_IN_CGS         = 3.154e+7
 
 # choice of units
@@ -66,17 +66,12 @@ N       = int(sys.argv[1])  # Number of particles
 icirc   = int(sys.argv[2])  # if = 0, all particles are on circular orbits, if = 1, Lz/Lcirc uniform in ]0,1[
 L       = N**(1./3.)
 
-# these are not used but necessary for I/O
-rho = 2.              # Density
-P = 1.                # Pressure
-gamma = 5./3.         # Gas adiabatic index
 fileName = "Isothermal.hdf5" 
 
 
 #---------------------------------------------------
 numPart        = N
 mass           = 1
-internalEnergy = P / ((gamma - 1.)*rho)
 
 #--------------------------------------------------
 
@@ -111,7 +106,6 @@ grp.attrs["PeriodicBoundariesOn"] = periodic
 numpy.random.seed(1234)
 
 #Particle group
-#grp0 = file.create_group("/PartType0")
 grp1 = file.create_group("/PartType1")
 #generate particle positions
 radius = Radius * (numpy.random.rand(N))**(1./3.) 
@@ -119,10 +113,8 @@ ctheta = -1. + 2 * numpy.random.rand(N)
 stheta = numpy.sqrt(1.-ctheta**2)
 phi    =  2 * math.pi * numpy.random.rand(N)
 r      = numpy.zeros((numPart, 3))
-#r[:,0] = radius * stheta * numpy.cos(phi)
-#r[:,1] = radius * stheta * numpy.sin(phi)
-#r[:,2] = radius * ctheta
 r[:,0] = radius
+
 #
 speed  = vrot
 v      = numpy.zeros((numPart, 3))
@@ -146,17 +138,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f')
 ds[()] = m
 m = numpy.zeros(1)
 
-h = numpy.full((numPart, ), 1.1255 * boxSize / L,  dtype='f')
-ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f')
-ds[()] = h
-h = numpy.zeros(1)
-
-u = numpy.full((numPart, ), internalEnergy,  dtype='f')
-ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f')
-ds[()] = u
-u = numpy.zeros(1)
-
-
 ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False, dtype='L')
 ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L')
 ds[()] = ids
diff --git a/examples/IsothermalPotential/run.sh b/examples/IsothermalPotential/run.sh
index 28a3cc0910f986f84bcd603091543643356f1c4a..976fbddc01cf7a3dcbb114d437ddb8f03b4d54bd 100755
--- a/examples/IsothermalPotential/run.sh
+++ b/examples/IsothermalPotential/run.sh
@@ -7,4 +7,7 @@ then
     python makeIC.py 1000 1
 fi
 
+rm -rf Isothermal_*.hdf5
 ../swift -g -t 1 isothermal.yml 2>&1 | tee output.log
+
+python energy_plot.py
diff --git a/examples/IsothermalPotential/test.pro b/examples/IsothermalPotential/test.pro
deleted file mode 100644
index edfa50121d2e5adb7e039f3c38d6d4c0b4d5e34f..0000000000000000000000000000000000000000
--- a/examples/IsothermalPotential/test.pro
+++ /dev/null
@@ -1,168 +0,0 @@
-;
-;  test energy / angular momentum conservation of test problem
-;
-
-iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy
-
-; set physical constants
-@physunits
-
-indir    = './'
-basefile = 'Isothermal_'
-
-; set properties of potential
-uL   = 1e3 * phys.pc             ; unit of length
-uM   = phys.msun                 ; unit of mass
-uV   = 1d5                       ; unit of velocity
-vrot = 200.                      ; km/s
-r200 = 100.                      ; virial radius
-
-; derived units
-constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
-pcentre  = [100.,100.,100.] * 1d3 * pc / uL
-
-;
-infile = indir + basefile + '*'
-spawn,'ls -1 '+infile,res
-nfiles = n_elements(res)
-
-
-
-; choose: calculate change of energy and Lz, comparing first and last
-; snapshots for all particles, or do so for a subset
-
-; compare all
-ifile   = 0
-inf     = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
-id      = h5rd(inf,'PartType1/ParticleIDs')
-nfollow = n_elements(id)
-
-; follow a subset
-nfollow  = 500                    ; number of particles to follow
-
-;
-if (iplot eq 1) then begin
-   nskip = 1
-   nsave = nfiles
-endif else begin
-   nskip = nfiles - 2
-   nsave = 2
-endelse
-
-;
-lout     = fltarr(nfollow, nsave) ; Lz
-xout     = fltarr(nfollow, nsave) ; x
-yout     = fltarr(nfollow, nsave) ; y
-zout     = fltarr(nfollow, nsave) ; z
-eout     = fltarr(nfollow, nsave) ; energies
-ekin     = fltarr(nfollow, nsave)
-epot     = fltarr(nfollow, nsave)
-tout     = fltarr(nsave)
-
-
-
-ifile  = 0
-isave = 0
-for ifile=0,nfiles-1,nskip do begin
-   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
-   time   = h5ra(inf, 'Header','Time')
-   p      = h5rd(inf,'PartType1/Coordinates')
-   v      = h5rd(inf,'PartType1/Velocities')
-   id     = h5rd(inf,'PartType1/ParticleIDs')
-   indx   = sort(id)
-;
-   id     = id[indx]
-   for ic=0,2 do begin
-      tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
-      tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
-   endfor
-
-
-; calculate energy
-   dd  = size(p,/dimen) & npart = dd[1]
-   ener = fltarr(npart)
-   dr   = fltarr(npart) & dv = dr
-   for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2
-   for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2
-   xout[*,isave] = p[0,0:nfollow-1]-pcentre[0]
-   yout[*,isave] = p[1,0:nfollow-1]-pcentre[1]
-   zout[*,isave] = p[2,0:nfollow-1]-pcentre[2]
-   Lz  = (p[0,*]-pcentre[0]) * v[1,*] - (p[1,*]-pcentre[1]) * v[0,*]
-   dr = sqrt(dr)
-;   print,'time = ',time,p[0,0],v[0,0],id[0]
-   ek   = 0.5 * dv
-;   ep   = - constG * mextern / dr
-   ep   = -vrot*vrot * (1 + alog(r200/dr))
-   ener = ek + ep
-   tout(isave) = time
-   lout[*,isave] = lz[0:nfollow-1]
-   eout(*,isave) = ener[0:nfollow-1]
-   ekin(*,isave) = ek[0:nfollow-1]
-   epot(*,isave) = ep[0:nfollow-1]
-
-;  write some output
-;   print,' time= ',time,' e= ',eout[0],' Lz= ',lz[0],format='(%a %f %a
-;   %f)'
-   print,format='('' time= '',f7.1,'' E= '',f9.2,'' Lz= '',e9.2)', time,eout[0],lz[0]
-   isave = isave + 1
-   
-endfor
-x0 = reform(xout[0,*])
-y0 = reform(xout[1,*])
-z0 = reform(xout[2,*])
-
-; calculate relative energy change
-de    = 0.0 * eout
-dl    = 0.0 * lout
-nsave = isave
-for ifile=1, nsave-1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0]
-for ifile=1, nsave-1 do dl[*,ifile] = (lout[*,ifile] - lout[*,0])/lout[*,0]
-
-
-; calculate statistics of energy changes
-print,' relatve energy change: (per cent) ',minmax(de) * 100.
-print,' relative Lz    change: (per cent) ',minmax(dl) * 100.
-
-; plot enery and Lz conservation for some particles
-if(iplot eq 1) then begin
-; plot results on energy conservation for some particles
-   nplot = min(10, nfollow)
-   win,0
-   xr = [min(tout), max(tout)]
-   yr = [-2,2]*1d-2             ; in percent
-   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='time',ytitle='dE/E, dL/L (%)'
-   for i=0,nplot-1 do oplot,tout,de[i,*]
-   for i=0,nplot-1 do oplot,tout,dl[i,*],color=red
-   legend,['dE/E','dL/L'],linestyle=[0,0],color=[black,red],box=0,/bottom,/left
-   screen_to_png,'e-time.png'
-
-;  plot orbits of those particles
-   win,2
-   xr = [-100,100]
-   yr = xr
-   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/iso,/nodata,xtitle='x',ytitle='y'
-   color = floor(findgen(nplot)*255/float(nplot))
-   for i=0,nplot-1 do oplot,xout[i,*],yout[i,*],color=color(i)
-   screen_to_png,'orbit.png'
-
-; plot radial position of these particles
-   win,4
-   xr = [min(tout), max(tout)]
-   yr = [0,80]
-   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='t',ytitle='r'
-   color = floor(findgen(nplot)*255/float(nplot))
-for i=0,nplot-1 do begin dr = sqrt(reform(xout[i,*])^2 + reform(yout[i,*])^2) &  oplot,tout,dr,color=color[i] & endfor
-   screen_to_png,'r-time.png'
-
-; make histogram of energy changes at end
-   win,6
-   ohist,de,x,y,-0.05,0.05,0.001
-   plot,x,y,psym=10,xtitle='de (%)'
-   screen_to_png,'de-hist.png'
-
-
-endif
-
-end
-
-
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 28a4629bdb401c0736379a2fe14a3a5f19caf650..dd13fb7eb4b82fbbfbb1ae450e20d01b13f2a455 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -63,11 +63,11 @@ EXTRA_DIST = BigCosmoVolume/makeIC.py \
 	     EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \
 	     EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \
 	     EAGLE_50/eagle_50.yml EAGLE_50/getIC.sh EAGLE_50/README EAGLE_50/run.sh \
-	     ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/test.pro \
+	     ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/energy_plot.py \
 	     GreshoVortex_2D/getGlass.sh GreshoVortex_2D/gresho.yml GreshoVortex_2D/makeIC.py GreshoVortex_2D/plotSolution.py GreshoVortex_2D/run.sh \
 	     HydrostaticHalo/README HydrostaticHalo/hydrostatic.yml HydrostaticHalo/makeIC.py HydrostaticHalo/run.sh \
 	     HydrostaticHalo/density_profile.py HydrostaticHalo/velocity_profile.py HydrostaticHalo/internal_energy_profile.py HydrostaticHalo/test_energy_conservation.py \
-	     IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/test.pro IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \
+	     IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/energy_plot.py IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \
 	     KelvinHelmholtz_2D/kelvinHelmholtz.yml KelvinHelmholtz_2D/makeIC.py KelvinHelmholtz_2D/plotSolution.py KelvinHelmholtz_2D/run.sh \
 	     MultiTypes/makeIC.py  MultiTypes/multiTypes.yml MultiTypes/run.sh \
 	     PerturbedBox_2D/makeIC.py PerturbedBox_2D/perturbedPlane.yml \
diff --git a/examples/main.c b/examples/main.c
index 50f444e692c8ee9cfd3453cd7b3c9b1e4749a2d3..4d511ce8bc4ad35afee4f86655cbabeda3eebc0a 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -290,7 +290,8 @@ int main(int argc, char *argv[]) {
 
 /* Do we have debugging checks ? */
 #ifdef SWIFT_DEBUG_CHECKS
-  message("WARNING: Debugging checks activated. Code will be slower !");
+  if (myrank == 0)
+    message("WARNING: Debugging checks activated. Code will be slower !");
 #endif
 
   /* Do we choke on FP-exceptions ? */
@@ -359,7 +360,7 @@ int main(int argc, char *argv[]) {
 
   /* Initialise the hydro properties */
   struct hydro_props hydro_properties;
-  hydro_props_init(&hydro_properties, params);
+  if (with_hydro) hydro_props_init(&hydro_properties, params);
 
   /* Read particles and space information from (GADGET) ICs */
   char ICfileName[200] = "";
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 6295c81a5f2fdb1e726cdf0a8fb43713004800f1..978448b3cd049c6ff31a92c7255851390ccc700c 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -55,40 +55,44 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
-             "extra_ghost", "kick", "send", "recv",
-             "grav_gather_m", "grav_fft", "grav_mm", "grav_up",
-             "grav_external", "cooling", "count"]
-
-TASKCOLOURS = {"none": "black",
-               "sort": "lightblue",
-               "self": "greenyellow",
-               "pair": "navy",
-               "sub_self": "greenyellow",
-               "sub_pair": "navy",
-               "init": "indigo",
-               "ghost": "cyan",
-               "extra_ghost": "cyan",
-               "kick": "green",
-               "send": "yellow",
-               "recv": "magenta",
-               "grav_gather_m": "mediumorchid",
-               "grav_fft": "mediumnightblue",
-               "grav_mm": "mediumturquoise",
-               "grav_up": "mediumvioletred",
-               "grav_external": "darkred",
-               "cooling": "darkblue",
-               "count": "powerblue"}
-
-SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"]
-
-SUBCOLOURS = {"none": "black",
-              "density": "red",
-              "gradient": "powerblue",
-              "force": "blue",
-              "grav": "indigo",
-              "tend": "grey",
-              "count": "black"}
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
+             "init", "ghost", "extra_ghost", "drift", "kick1", "kick2",
+             "timestep", "send", "recv", "grav_gather_m", "grav_fft",
+             "grav_mm", "grav_up", "cooling", "sourceterms", "count"]
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
+            "tend", "xv", "rho", "gpart", "count"]
+
+#  Task/subtypes of interest.
+FULLTYPES = ["self/force", "self/density", "sub_self/force",
+             "sub_self/density", "pair/force", "pair/density", "sub_pair/force",
+             "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
+             "recv/tend", "send/tend"]
+
+#  Get a number of colours for the various types.
+colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon",
+           "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki",
+           "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen",
+           "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey",
+           "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue",
+           "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink",
+           "pink"]
+maxcolours = len(colours)
+
+#  Set colours of task/subtype.
+TASKCOLOURS = {}
+ncolours = 0
+for task in TASKTYPES:
+    TASKCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+SUBCOLOURS = {}
+for task in SUBTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+for task in FULLTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
 
 #  Show docs if help is requested.
 if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
@@ -149,39 +153,26 @@ num_lines = pl.size(data) / 10
 for line in range(num_lines):
     thread = int(data[line,0])
     tasks[thread].append({})
-    tasks[thread][-1]["type"] = TASKTYPES[int(data[line,1])]
-    tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,2])]
+    tasktype = TASKTYPES[int(data[line,1])]
+    subtype = SUBTYPES[int(data[line,2])]
+    tasks[thread][-1]["type"] = tasktype
+    tasks[thread][-1]["subtype"] = subtype
     tic = int(data[line,4]) / CPU_CLOCK * 1000
     toc = int(data[line,5]) / CPU_CLOCK * 1000
     tasks[thread][-1]["tic"] = tic
     tasks[thread][-1]["toc"] = toc
     tasks[thread][-1]["t"] = (toc + tic)/ 2
+    if "self" in tasktype or "pair" in tasktype:
+        fulltype = tasktype + "/" + subtype
+        if fulltype in SUBCOLOURS:
+            tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
+        else:
+            tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
+    else:
+        tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
     
-combtasks = {}
-combtasks[-1] = []
-for i in range(nthread):
-    combtasks[i] = []
-
 for thread in range(nthread):
     tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"])
-    lasttype = ""
-    types = []
-    for task in tasks[thread]:
-        if task["type"] not in types:
-            types.append(task["type"])
-        if lasttype == "" or not lasttype == task["type"]:
-            combtasks[thread].append({})
-            combtasks[thread][-1]["type"] = task["type"]
-            combtasks[thread][-1]["subtype"] = task["subtype"]
-            combtasks[thread][-1]["tic"] = task["tic"]
-            combtasks[thread][-1]["toc"] = task["toc"]
-            if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub":
-                combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]]
-            else:
-                combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]]
-            lasttype = task["type"]
-        else:
-            combtasks[thread][-1]["toc"] = task["toc"]
             
 typesseen = []
 fig = pl.figure()
@@ -192,11 +183,11 @@ tictoc = np.zeros(2)
 for i in range(nthread):
 
     #  Collect ranges and colours into arrays.
-    tictocs = np.zeros(len(combtasks[i])*2)
-    colours = np.empty(len(combtasks[i])*2, dtype='object')
+    tictocs = np.zeros(len(tasks[i])*2)
+    colours = np.empty(len(tasks[i])*2, dtype='object')
     coloursseen = []
     j = 0
-    for task in combtasks[i]:
+    for task in tasks[i]:
         tictocs[j] = task["tic"]
         tictocs[j+1] = task["toc"]
         colours[j] = task["colour"]
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index 734918b8cbf388ef8f1a064e014cfd28775edde2..c95bfa1fd2d087cc907b57201c1a1397cbeb1460 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -63,40 +63,44 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init",
-             "ghost", "extra_ghost", "kick", "send", "recv",
-             "grav_gather_m", "grav_fft", "grav_mm", "grav_up",
-             "grav_external", "cooling", "count"]
-
-TASKCOLOURS = {"none": "black",
-               "sort": "lightblue",
-               "self": "greenyellow",
-               "pair": "navy",
-               "sub_self": "greenyellow",
-               "sub_pair": "navy",
-               "init": "indigo",
-               "ghost": "cyan",
-               "extra_ghost": "cyan",
-               "kick": "green",
-               "send": "yellow",
-               "recv": "magenta",
-               "grav_gather_m": "mediumorchid",
-               "grav_fft": "mediumnightblue",
-               "grav_mm": "mediumturquoise",
-               "grav_up": "mediumvioletred",
-               "grav_external": "darkred",
-               "cooling": "darkblue",
-               "count": "powerblue"}
-
-SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"]
-
-SUBCOLOURS = {"none": "black",
-              "density": "red",
-              "gradient": "powerblue",
-              "force": "blue",
-              "grav": "indigo",
-              "tend": "grey",
-              "count": "black"}
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
+             "init", "ghost", "extra_ghost", "drift", "kick1", "kick2",
+             "timestep", "send", "recv", "grav_gather_m", "grav_fft",
+             "grav_mm", "grav_up", "cooling", "sourceterms", "count"]
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
+            "tend", "xv", "rho", "gpart", "count"]
+
+#  Task/subtypes of interest.
+FULLTYPES = ["self/force", "self/density", "sub_self/force",
+             "sub_self/density", "pair/force", "pair/density", "sub_pair/force",
+             "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
+             "recv/tend", "send/tend"]
+
+#  Get a number of colours for the various types.
+colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon",
+           "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki",
+           "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen",
+           "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey",
+           "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue",
+           "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink",
+           "pink"]
+maxcolours = len(colours)
+
+#  Set colours of task/subtype.
+TASKCOLOURS = {}
+ncolours = 0
+for task in TASKTYPES:
+    TASKCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+SUBCOLOURS = {}
+for task in SUBTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+for task in FULLTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
 
 #  Show docs if help is requested.
 if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
@@ -185,39 +189,26 @@ for rank in range(nranks):
         for line in range(num_lines):
             thread = int(data[line,1])
             tasks[thread].append({})
-            tasks[thread][-1]["type"] = TASKTYPES[int(data[line,2])]
-            tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,3])]
+            tasktype = TASKTYPES[int(data[line,2])]
+            subtype = SUBTYPES[int(data[line,3])]
+            tasks[thread][-1]["type"] = tasktype
+            tasks[thread][-1]["subtype"] = subtype
             tic = int(data[line,5]) / CPU_CLOCK * 1000
             toc = int(data[line,6]) / CPU_CLOCK * 1000
             tasks[thread][-1]["tic"] = tic
             tasks[thread][-1]["toc"] = toc
             tasks[thread][-1]["t"] = (toc + tic)/ 2
-
-        combtasks = {}
-        combtasks[-1] = []
-        for i in range(nthread):
-            combtasks[i] = []
+            if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
+                fulltype = tasktype + "/" + subtype
+                if fulltype in SUBCOLOURS:
+                    tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
+                else:
+                    tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
+            else:
+                tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
 
         for thread in range(nthread):
             tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"])
-            lasttype = ""
-            types = []
-            for task in tasks[thread]:
-                if task["type"] not in types:
-                    types.append(task["type"])
-                if lasttype == "" or not lasttype == task["type"]:
-                    combtasks[thread].append({})
-                    combtasks[thread][-1]["type"] = task["type"]
-                    combtasks[thread][-1]["subtype"] = task["subtype"]
-                    combtasks[thread][-1]["tic"] = task["tic"]
-                    combtasks[thread][-1]["toc"] = task["toc"]
-                    if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub":
-                        combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]]
-                    else:
-                        combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]]
-                    lasttype = task["type"]
-                else:
-                    combtasks[thread][-1]["toc"] = task["toc"]
 
         fig = pl.figure()
         ax = fig.add_subplot(1,1,1)
@@ -227,11 +218,11 @@ for rank in range(nranks):
         for i in range(nthread):
 
             #  Collect ranges and colours into arrays.
-            tictocs = np.zeros(len(combtasks[i])*2)
-            colours = np.empty(len(combtasks[i])*2, dtype='object')
+            tictocs = np.zeros(len(tasks[i])*2)
+            colours = np.empty(len(tasks[i])*2, dtype='object')
             coloursseen = []
             j = 0
-            for task in combtasks[i]:
+            for task in tasks[i]:
                 tictocs[j] = task["tic"]
                 tictocs[j+1] = task["toc"]
                 colours[j] = task["colour"]
diff --git a/src/Makefile.am b/src/Makefile.am
index 5982be729020fff7ef732a78b5155ad0f5a430eb..5434241c0eb203f069901dd9be99640b91dd8e90 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -59,7 +59,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
 		 kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \
-                 units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
+                 runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
 		 dimension.h equation_of_state.h \
 		 gravity.h gravity_io.h \
 		 gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
diff --git a/src/active.h b/src/active.h
index edb276c5e9d7f420e8bcaa9cb9d15dae0ad4a880..8dfd063fda9189ff715107c599ef4851cc5a0387 100644
--- a/src/active.h
+++ b/src/active.h
@@ -41,9 +41,10 @@ __attribute__((always_inline)) INLINE static int cell_is_drifted(
 #ifdef SWIFT_DEBUG_CHECKS
   if (c->ti_old > e->ti_current)
     error(
-        "Cell has been drifted too far forward in time! c->ti_old=%lld "
-        "e->ti_current=%lld",
-        c->ti_old, e->ti_current);
+        "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
+        "and e->ti_current=%lld (t=%e)",
+        c->ti_old, c->ti_old * e->timeBase, e->ti_current,
+        e->ti_current * e->timeBase);
 #endif
 
   return (c->ti_old == e->ti_current);
@@ -62,9 +63,10 @@ __attribute__((always_inline)) INLINE static int cell_is_active(
 #ifdef SWIFT_DEBUG_CHECKS
   if (c->ti_end_min < e->ti_current)
     error(
-        "cell in an impossible time-zone! c->ti_end_min=%lld "
-        "e->ti_current=%lld",
-        c->ti_end_min, e->ti_current);
+        "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
+        "e->ti_current=%lld (t=%e)",
+        c->ti_end_min, c->ti_end_min * e->timeBase, e->ti_current,
+        e->ti_current * e->timeBase);
 #endif
 
   return (c->ti_end_min == e->ti_current);
diff --git a/src/cell.c b/src/cell.c
index 1684111d5b03ce712df11f4dabffeb2fdbf33dc3..ead1d7a49e40c028cdbf737cad7c295e4d005383 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -130,7 +130,6 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
       temp->dx_max_sort = 0.f;
       temp->nodeID = c->nodeID;
       temp->parent = c;
-      temp->ti_old = c->ti_old;
       c->progeny[k] = temp;
       c->split = 1;
       count += cell_unpack(&pc[pc->progeny[k]], temp, s);
@@ -245,7 +244,7 @@ int cell_pack(struct cell *c, struct pcell *pc) {
  *
  * @return The number of packed cells.
  */
-int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
+int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) {
 
 #ifdef WITH_MPI
 
@@ -276,7 +275,7 @@ int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
  *
  * @return The number of cells created.
  */
-int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
+int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends) {
 
 #ifdef WITH_MPI
 
@@ -908,6 +907,10 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e) {
  */
 int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
 
+#ifdef WITH_MPI
+  struct engine *e = s->space->e;
+#endif
+
   /* Un-skip the density tasks involved with this cell. */
   for (struct link *l = c->density; l != NULL; l = l->next) {
     struct task *t = l->t;
@@ -958,8 +961,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
 
         /* Activate the tasks to recv foreign cell ci's data. */
         scheduler_activate(s, ci->recv_xv);
-        scheduler_activate(s, ci->recv_rho);
-        scheduler_activate(s, ci->recv_ti);
+        if (cell_is_active(ci, e)) {
+          scheduler_activate(s, ci->recv_rho);
+          scheduler_activate(s, ci->recv_ti);
+        }
 
         /* Look for the local cell cj's send tasks. */
         struct link *l = NULL;
@@ -974,24 +979,28 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         else
           error("Drift task missing !");
 
-        for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (cell_is_active(cj, e)) {
+          for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
 
       } else if (cj->nodeID != engine_rank) {
 
         /* Activate the tasks to recv foreign cell cj's data. */
         scheduler_activate(s, cj->recv_xv);
-        scheduler_activate(s, cj->recv_rho);
-        scheduler_activate(s, cj->recv_ti);
+        if (cell_is_active(cj, e)) {
+          scheduler_activate(s, cj->recv_rho);
+          scheduler_activate(s, cj->recv_ti);
+        }
 
         /* Look for the local cell ci's send tasks. */
         struct link *l = NULL;
@@ -1006,17 +1015,19 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         else
           error("Drift task missing !");
 
-        for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (cell_is_active(ci, e)) {
+          for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
       } else {
         scheduler_activate(s, ci->super->drift);
         scheduler_activate(s, cj->super->drift);
@@ -1168,3 +1179,25 @@ void cell_drift(struct cell *c, const struct engine *e) {
   /* Update the time of the last drift */
   c->ti_old = ti_current;
 }
+
+/**
+ * @brief Recursively checks that all particles in a cell have a time-step
+ */
+void cell_check_timesteps(struct cell *c) {
+#ifdef SWIFT_DEBUG_CHECKS
+
+  if (c->ti_end_min == 0 && c->nr_tasks > 0)
+    error("Cell without assigned time-step");
+
+  if (c->split) {
+    for (int k = 0; k < 8; ++k)
+      if (c->progeny[k] != NULL) cell_check_timesteps(c->progeny[k]);
+  } else {
+
+    if (c->nodeID == engine_rank)
+      for (int i = 0; i < c->count; ++i)
+        if (c->parts[i].time_bin == 0)
+          error("Particle without assigned time-bin");
+  }
+#endif
+}
diff --git a/src/cell.h b/src/cell.h
index a89353a2b9105387fdc5f22b130ac1e108498303..eed72bfa64f9cab00d25f6e9b0ae3a008a090186 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -35,6 +35,7 @@
 #include "multipole.h"
 #include "part.h"
 #include "task.h"
+#include "timeline.h"
 
 /* Avoid cyclic inclusions */
 struct engine;
@@ -299,8 +300,8 @@ int cell_glocktree(struct cell *c);
 void cell_gunlocktree(struct cell *c);
 int cell_pack(struct cell *c, struct pcell *pc);
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
-int cell_pack_ti_ends(struct cell *c, int *ti_ends);
-int cell_unpack_ti_ends(struct cell *c, int *ti_ends);
+int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends);
+int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends);
 int cell_getsize(struct cell *c);
 int cell_link_parts(struct cell *c, struct part *parts);
 int cell_link_gparts(struct cell *c, struct gpart *gparts);
@@ -315,5 +316,6 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e);
 int cell_unskip_tasks(struct cell *c, struct scheduler *s);
 void cell_set_super(struct cell *c, struct cell *super);
 void cell_drift(struct cell *c, const struct engine *e);
+void cell_check_timesteps(struct cell *c);
 
 #endif /* SWIFT_CELL_H */
diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h
index 757c5e6600d85e19910d2fd75e955e5342fb63ee..9fadd51e3c2a3c5462c8476e0aac893e3a2d530d 100644
--- a/src/cooling/const_lambda/cooling.h
+++ b/src/cooling/const_lambda/cooling.h
@@ -89,10 +89,9 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part(
   float cooling_du_dt = cooling_rate(phys_const, us, cooling, p);
 
   /* Integrate cooling equation to enforce energy floor */
-  if (u_old + hydro_du_dt * dt < u_floor) {
-    cooling_du_dt = 0.f;
-  } else if (u_old + (hydro_du_dt + cooling_du_dt) * dt < u_floor) {
-    cooling_du_dt = (u_old + dt * hydro_du_dt - u_floor) / dt;
+  /* Factor of 1.5 included since timestep could potentially double */
+  if (u_old + (hydro_du_dt + cooling_du_dt) * 1.5f * dt < u_floor) {
+    cooling_du_dt = -(u_old + 1.5f * dt * hydro_du_dt - u_floor) / (1.5f * dt);
   }
 
   /* Update the internal energy time derivative */
diff --git a/src/engine.c b/src/engine.c
index 285e68dc94bdb2214ba8228d4ffa2df0b10c0813..2106c044fe02b360232f67cd037203e94ad9af64 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -48,6 +48,7 @@
 #include "engine.h"
 
 /* Local headers. */
+#include "active.h"
 #include "atomic.h"
 #include "cell.h"
 #include "clocks.h"
@@ -698,14 +699,14 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
         ci->super->drift = scheduler_addtask(
             s, task_type_drift, task_subtype_none, 0, 0, ci->super, NULL, 0);
 
-      t_xv = scheduler_addtask(s, task_type_send, task_subtype_none,
-                               4 * ci->tag, 0, ci, cj, 0);
-      t_rho = scheduler_addtask(s, task_type_send, task_subtype_none,
+      t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag,
+                               0, ci, cj, 0);
+      t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho,
                                 4 * ci->tag + 1, 0, ci, cj, 0);
       t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
                                4 * ci->tag + 2, 0, ci, cj, 0);
 #ifdef EXTRA_HYDRO_LOOP
-      t_gradient = scheduler_addtask(s, task_type_send, task_subtype_none,
+      t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient,
                                      4 * ci->tag + 3, 0, ci, cj, 0);
 #endif
 
@@ -739,8 +740,8 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
       /* Drift before you send */
       scheduler_addunlock(s, ci->super->drift, t_xv);
 
-      /* The super-cell's kick task should unlock the send_ti task. */
-      if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick2, t_ti);
+      /* The super-cell's timestep task should unlock the send_ti task. */
+      scheduler_addunlock(s, ci->super->timestep, t_ti);
     }
 
     /* Add them to the local cell. */
@@ -749,7 +750,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 #ifdef EXTRA_HYDRO_LOOP
     engine_addlink(e, &ci->send_gradient, t_gradient);
 #endif
-    if (t_ti != NULL) engine_addlink(e, &ci->send_ti, t_ti);
+    engine_addlink(e, &ci->send_ti, t_ti);
   }
 
   /* Recurse? */
@@ -787,14 +788,14 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   if (t_xv == NULL && c->density != NULL) {
 
     /* Create the tasks. */
-    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 4 * c->tag,
-                             0, c, NULL, 0);
-    t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none,
+    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0,
+                             c, NULL, 0);
+    t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho,
                               4 * c->tag + 1, 0, c, NULL, 0);
     t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
                              4 * c->tag + 2, 0, c, NULL, 0);
 #ifdef EXTRA_HYDRO_LOOP
-    t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_none,
+    t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient,
                                    4 * c->tag + 3, 0, c, NULL, 0);
 #endif
   }
@@ -815,7 +816,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   }
   for (struct link *l = c->force; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_gradient, l->t);
-    if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti);
+    scheduler_addunlock(s, l->t, t_ti);
   }
   if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
 #else
@@ -825,7 +826,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   }
   for (struct link *l = c->force; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_rho, l->t);
-    if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti);
+    scheduler_addunlock(s, l->t, t_ti);
   }
   if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
 #endif
@@ -2049,9 +2050,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
                              void *extra_data) {
   /* Unpack the arguments. */
   struct task *tasks = (struct task *)map_data;
-  const integertime_t ti_end = ((size_t *)extra_data)[0];
   size_t *rebuild_space = &((size_t *)extra_data)[1];
   struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]);
+  struct engine *e = (struct engine *)((size_t *)extra_data)[0];
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct task *t = &tasks[ind];
@@ -2062,7 +2063,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         t->type == task_type_sourceterms || t->type == task_type_sub_self) {
 
       /* Set this task's skip. */
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Pair? */
@@ -2080,7 +2081,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         *rebuild_space = 1;
 
       /* Set this task's skip, otherwise nothing to do. */
-      if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end)
+      if (cell_is_active(t->ci, e) || cell_is_active(t->cj, e))
         scheduler_activate(s, t);
       else
         continue;
@@ -2109,8 +2110,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
 
         /* Activate the tasks to recv foreign cell ci's data. */
         scheduler_activate(s, ci->recv_xv);
-        scheduler_activate(s, ci->recv_rho);
-        scheduler_activate(s, ci->recv_ti);
+        if (cell_is_active(ci, e)) {
+          scheduler_activate(s, ci->recv_rho);
+          scheduler_activate(s, ci->recv_ti);
+        }
 
         /* Look for the local cell cj's send tasks. */
         struct link *l = NULL;
@@ -2125,24 +2128,28 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         else
           error("Drift task missing !");
 
-        for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (cell_is_active(cj, e)) {
+          for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
 
       } else if (cj->nodeID != engine_rank) {
 
         /* Activate the tasks to recv foreign cell cj's data. */
         scheduler_activate(s, cj->recv_xv);
-        scheduler_activate(s, cj->recv_rho);
-        scheduler_activate(s, cj->recv_ti);
+        if (cell_is_active(cj, e)) {
+          scheduler_activate(s, cj->recv_rho);
+          scheduler_activate(s, cj->recv_ti);
+        }
 
         /* Look for the local cell ci's send tasks. */
         struct link *l = NULL;
@@ -2157,49 +2164,35 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         else
           error("Drift task missing !");
 
-        for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
-      } else {
-        scheduler_activate(s, ci->super->drift);
-        scheduler_activate(s, cj->super->drift);
+        if (cell_is_active(ci, e)) {
+          for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
       }
-#else
-      scheduler_activate(s, ci->super->drift);
-      scheduler_activate(s, cj->super->drift);
+
 #endif
     }
 
-    /* Kick? */
-    else if (t->type == task_type_kick1) {
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
-    } else if (t->type == task_type_kick2) {
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
+    /* Kick/Drift/Init? */
+    else if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
+             t->type == task_type_drift || t->type == task_type_init) {
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Time-step? */
     else if (t->type == task_type_timestep) {
       t->ci->updated = 0;
       t->ci->g_updated = 0;
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
-    }
-
-    /* Drift? */
-    else if (t->type == task_type_drift) {
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
-    }
-
-    /* Init? */
-    else if (t->type == task_type_init) {
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Tasks with no cells should not be skipped? */
@@ -2222,7 +2215,7 @@ int engine_marktasks(struct engine *e) {
   int rebuild_space = 0;
 
   /* Run through the tasks and mark as skip or not. */
-  size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched};
+  size_t extra_data[3] = {(size_t)e, rebuild_space, (size_t)&e->sched};
   threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks,
                  sizeof(struct task), 10000, extra_data);
   rebuild_space = extra_data[1];
@@ -2321,15 +2314,16 @@ void engine_rebuild(struct engine *e) {
  * @param drift_all Whether to drift particles before rebuilding or not. Will
  *                not be necessary if all particles have already been
  *                drifted (before repartitioning for instance).
- * @param deferskip Whether to defer the skip until after the rebuild.
- *                  Needed after a repartition.
+ * @param postrepart If we have just repartitioned, if so we need to defer the
+ *                   skip until after the rebuild and not check the if all
+ *                   cells have been drifted.
  */
-void engine_prepare(struct engine *e, int drift_all, int deferskip) {
+void engine_prepare(struct engine *e, int drift_all, int postrepart) {
 
   TIMER_TIC;
 
   /* Unskip active tasks and check for rebuild */
-  if (!deferskip) engine_unskip(e);
+  if (!postrepart) engine_unskip(e);
 
   /* Run through the tasks and mark as skip or not. */
   int rebuild = e->forcerebuild;
@@ -2350,13 +2344,15 @@ void engine_prepare(struct engine *e, int drift_all, int deferskip) {
     if (drift_all) engine_drift_all(e);
 
 #ifdef SWIFT_DEBUG_CHECKS
-    /* Check that all cells have been drifted to the current time */
-    space_check_drift_point(e->s, e->ti_current);
+    /* Check that all cells have been drifted to the current time, unless
+     * we have just repartitioned, that can include cells that have not
+     * previously been active on this rank. */
+    if (!postrepart) space_check_drift_point(e->s, e->ti_current);
 #endif
 
     engine_rebuild(e);
   }
-  if (deferskip) engine_unskip(e);
+  if (postrepart) engine_unskip(e);
 
   /* Re-rank the tasks every now and then. */
   if (e->tasks_age % engine_tasksreweight == 1) {
@@ -2417,36 +2413,33 @@ void engine_barrier(struct engine *e, int tid) {
  */
 void engine_collect_kick(struct cell *c) {
 
-  /* Skip super-cells (Their values are already set) */
+/* Skip super-cells (Their values are already set) */
+#ifdef WITH_MPI
+  if (c->timestep != NULL || c->recv_ti != NULL) return;
+#else
   if (c->timestep != NULL) return;
+#endif /* WITH_MPI */
 
   /* Counters for the different quantities. */
   int updated = 0, g_updated = 0;
   integertime_t ti_end_min = max_nr_timesteps;
 
-  /* Only do something is the cell is non-empty */
-  if (c->count != 0 || c->gcount != 0) {
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && (cp->count > 0 || cp->gcount > 0)) {
 
-    /* If this cell is not split, I'm in trouble. */
-    if (!c->split) error("Cell is not split.");
+      /* Recurse */
+      engine_collect_kick(cp);
 
-    /* Collect the values from the progeny. */
-    for (int k = 0; k < 8; k++) {
-      struct cell *cp = c->progeny[k];
-      if (cp != NULL) {
+      /* And update */
+      ti_end_min = min(ti_end_min, cp->ti_end_min);
+      updated += cp->updated;
+      g_updated += cp->g_updated;
 
-        /* Recurse */
-        engine_collect_kick(cp);
-
-        /* And update */
-        ti_end_min = min(ti_end_min, cp->ti_end_min);
-        updated += cp->updated;
-        g_updated += cp->g_updated;
-
-        /* Collected, so clear for next time. */
-        cp->updated = 0;
-        cp->g_updated = 0;
-      }
+      /* Collected, so clear for next time. */
+      cp->updated = 0;
+      cp->g_updated = 0;
     }
   }
 
@@ -2470,9 +2463,9 @@ void engine_collect_timestep(struct engine *e) {
   const struct space *s = e->s;
 
   /* Collect the cell data. */
-  for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells_top[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells_top[k];
+  for (int k = 0; k < s->nr_cells; k++) {
+    struct cell *c = &s->cells_top[k];
+    if (c->count > 0 || c->gcount > 0) {
 
       /* Make the top-cells recurse */
       engine_collect_kick(c);
@@ -2486,15 +2479,16 @@ void engine_collect_timestep(struct engine *e) {
       c->updated = 0;
       c->g_updated = 0;
     }
+  }
 
 /* Aggregate the data from the different nodes. */
 #ifdef WITH_MPI
   {
-    int in_i[1], out_i[1];
+    integertime_t in_i[1], out_i[1];
     in_i[0] = 0;
     out_i[0] = ti_end_min;
-    if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) !=
-        MPI_SUCCESS)
+    if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
       error("Failed to aggregate t_end_min.");
     ti_end_min = in_i[0];
   }
@@ -2672,7 +2666,9 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
     if (e->nodeID == 0) message("Converting internal energy variable.");
 
     /* Apply the conversion */
-    space_map_cells_pre(s, 0, cell_convert_hydro, NULL);
+    // space_map_cells_pre(s, 0, cell_convert_hydro, NULL);
+    for (size_t i = 0; i < s->nr_parts; ++i)
+      hydro_convert_quantities(&s->parts[i], &s->xparts[i]);
 
     /* Correct what we did (e.g. in PE-SPH, need to recompute rho_bar) */
     if (hydro_need_extra_init_loop) {
@@ -2693,8 +2689,12 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
 
   clocks_gettime(&time2);
 
+#ifdef SWIFT_DEBUG_CHECKS
+  space_check_timesteps(e->s);
+#endif
+
   /* Ready to go */
-  e->step = -1;
+  e->step = 0;
   e->forcerebuild = 1;
   e->wallclock_time = (float)clocks_diff(&time1, &time2);
 
@@ -2785,12 +2785,6 @@ void engine_step(struct engine *e) {
   engine_launch(e, e->nr_threads);
   TIMER_TOC(timer_runners);
 
-#ifdef SWIFT_DEBUG_CHECKS
-  for (size_t i = 0; i < e->s->nr_parts; ++i) {
-    if (e->s->parts[i].time_bin == 0) error("Particle in bin 0");
-  }
-#endif
-
   TIMER_TOC2(timer_step);
 
   clocks_gettime(&time2);
@@ -2833,6 +2827,11 @@ void engine_drift_all(struct engine *e) {
   threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that all cells have been drifted to the current time. */
+  space_check_drift_point(e->s, e->ti_current);
+#endif
+
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -3317,6 +3316,7 @@ void engine_init(struct engine *e, struct space *s,
 #endif
 
   if (with_aff) {
+#ifdef HAVE_SETAFFINITY
 #ifdef WITH_MPI
     printf("[%04i] %s engine_init: cpu map is [ ", nodeID,
            clocks_get_timesincestart());
@@ -3325,6 +3325,7 @@ void engine_init(struct engine *e, struct space *s,
 #endif
     for (int i = 0; i < nr_affinity_cores; i++) printf("%i ", cpuid[i]);
     printf("].\n");
+#endif
   }
 
   /* Are we doing stuff in parallel? */
diff --git a/src/engine.h b/src/engine.h
index 72d54f6d263eef9cc2f7d63a3b2aeddcf8b3c2e5..db5a7317d233a8e386fb03817cd42659ecd10816 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -225,7 +225,7 @@ void engine_init(struct engine *e, struct space *s,
                  const struct cooling_function_data *cooling,
                  struct sourceterms *sourceterms);
 void engine_launch(struct engine *e, int nr_runners);
-void engine_prepare(struct engine *e, int drift_all, int deferskip);
+void engine_prepare(struct engine *e, int drift_all, int postrepart);
 void engine_print(struct engine *e);
 void engine_init_particles(struct engine *e, int flag_entropy_ICs);
 void engine_step(struct engine *e);
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index bf8362a573519ecfacb54ae6001aedb74e429db3..a0bfee05f8b7f93cce65e8b9a3e7e322e166569d 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -42,21 +42,6 @@ gravity_compute_timestep_self(const struct gpart* const gp) {
   return dt;
 }
 
-/**
- * @brief Initialises the g-particles for the first time
- *
- * This function is called only once just after the ICs have been
- * read in to do some conversions.
- *
- * @param gp The particle to act upon
- */
-__attribute__((always_inline)) INLINE static void gravity_first_init_gpart(
-    struct gpart* gp) {
-
-  gp->time_bin = 0;
-  gp->epsilon = 0.;  // MATTHIEU
-}
-
 /**
  * @brief Prepares a g-particle for the gravity calculation
  *
@@ -100,4 +85,21 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
 __attribute__((always_inline)) INLINE static void gravity_kick_extra(
     struct gpart* gp, float dt) {}
 
+/**
+ * @brief Initialises the g-particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline)) INLINE static void gravity_first_init_gpart(
+    struct gpart* gp) {
+
+  gp->time_bin = 0;
+  gp->epsilon = 0.;  // MATTHIEU
+
+  gravity_init_gpart(gp);
+}
+
 #endif /* SWIFT_DEFAULT_GRAVITY_H */
diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h
index 4c3c41c94329666c274e6c129e728b59d2f5b892..f0d145647ab3f973f3c0ffc2f995ee01d534bc72 100644
--- a/src/gravity/Default/gravity_debug.h
+++ b/src/gravity/Default/gravity_debug.h
@@ -22,11 +22,10 @@
 __attribute__((always_inline)) INLINE static void gravity_debug_particle(
     const struct gpart* p) {
   printf(
-      "x=[%.3e,%.3e,%.3e], "
-      "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
-      "mass=%.3e time_bin=%d\n",
-      p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2],
-      p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->time_bin);
+      "mass=%.3e epsilon=%.5e time_bin=%d\n"
+      "x=[%.5e,%.5e,%.5e], v_full=[%.5e,%.5e,%.5e], a=[%.5e,%.5e,%.5e]\n",
+      p->mass, p->epsilon, p->time_bin, p->x[0], p->x[1], p->x[2], p->v_full[0],
+      p->v_full[1], p->v_full[2], p->a_grav[0], p->a_grav[1], p->a_grav[2]);
 }
 
 #endif /* SWIFT_DEFAULT_GRAVITY_DEBUG_H */
diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h
index 18d524d39f86f363df2a8ad7ce6638f438bdb477..aa309a5beb201c1aeec0cabe9d64a158d2071fa9 100644
--- a/src/gravity/Default/gravity_part.h
+++ b/src/gravity/Default/gravity_part.h
@@ -47,6 +47,16 @@ struct gpart {
   /* Time-step length */
   timebin_t time_bin;
 
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
+
 } SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_DEFAULT_GRAVITY_PART_H */
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index 0bfbf7ffef18f22cac9440ccc8adde0ba1899616..160a2d8b5d25a97cefb2afd5e22d8e6bcea0006e 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -377,9 +377,13 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
     struct part *restrict p, struct xpart *restrict xp, float dt) {
 
   /* Do not decrease the entropy by more than a factor of 2 */
-  const float entropy_change = p->entropy_dt * dt;
-  xp->entropy_full =
-      max(xp->entropy_full + entropy_change, 0.5f * xp->entropy_full);
+  if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) {
+    /* message("Warning! Limiting entropy_dt. Possible cooling error.\n
+     * entropy_full = %g \n entropy_dt * dt =%g \n", */
+    /* 	     xp->entropy_full,p->entropy_dt * dt); */
+    p->entropy_dt = -0.5f * xp->entropy_full / dt;
+  }
+  xp->entropy_full += p->entropy_dt * dt;
 
   /* Compute the pressure */
   const float pressure = gas_pressure_from_entropy(p->rho, xp->entropy_full);
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index f88a2896ffcf09eef422c9449d30ba3dc62eb14b..20856b7e038855e22aa3776a74ba9f495ff6c93f 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -360,8 +360,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
     struct part *restrict p, struct xpart *restrict xp, float dt) {
 
   /* Do not decrease the energy by more than a factor of 2*/
-  const float u_change = p->u_dt * dt;
-  xp->u_full = max(xp->u_full + u_change, 0.5f * xp->u_full);
+  if (dt > 0. && p->u_dt * dt < -0.5f * xp->u_full) {
+    p->u_dt = -0.5f * xp->u_full / dt;
+  }
+  xp->u_full += p->u_dt * dt;
 
   /* Compute the pressure */
   const float pressure = gas_pressure_from_internal_energy(p->rho, xp->u_full);
diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h
index 5ac0c73b70b8ebae7a0f5107b0a35b094e179053..f22bb8a13a8ba4d896a77bd4c4f5e86bed5a5960 100644
--- a/src/hydro/PressureEntropy/hydro.h
+++ b/src/hydro/PressureEntropy/hydro.h
@@ -394,11 +394,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
     struct part *restrict p, struct xpart *restrict xp, float dt) {
 
   /* Do not decrease the entropy (temperature) by more than a factor of 2*/
-  const float entropy_change = p->entropy_dt * dt;
-  if (entropy_change > -0.5f * xp->entropy_full)
-    xp->entropy_full += entropy_change;
-  else
-    xp->entropy_full *= 0.5f;
+  if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) {
+    p->entropy_dt = -0.5f * xp->entropy_full / dt;
+  }
+  xp->entropy_full += p->entropy_dt * dt;
 
   /* Compute the pressure */
   const float pressure =
diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h
index b82e97d23e3d682fa8ca0cd26bd6e70de64733d5..9c07f3eb67528a003788ca94bd1e2e52dd985a2c 100644
--- a/src/potential/isothermal/potential.h
+++ b/src/potential/isothermal/potential.h
@@ -132,7 +132,7 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
  * @brief Computes the gravitational potential energy of a particle in an
  * isothermal potential.
  *
- * phi = 0.5 * vrot^2 * ln(r^2 + epsilon^2)
+ * phi = -0.5 * vrot^2 * ln(r^2 + epsilon^2)
  *
  * @param time The current time (unused here).
  * @param potential The #external_potential used in the run.
@@ -148,8 +148,8 @@ external_gravity_get_potential_energy(
   const float dy = g->x[1] - potential->y;
   const float dz = g->x[2] - potential->z;
 
-  return 0.5f * potential->vrot * potential->vrot *
-         logf(dx * dx + dy * dy * dz * dz + potential->epsilon2);
+  return -0.5f * potential->vrot * potential->vrot *
+         logf(dx * dx + dy * dy + dz * dz + potential->epsilon2);
 }
 
 /**
diff --git a/src/runner.c b/src/runner.c
index 3bb5151fde8d0da34c557c5981a60d707d9af794..69ae0b059d7e92cf204d43bf914e3b8916e2afd3 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -336,9 +336,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) {
   struct part *parts = c->parts;
   struct xpart *xparts = c->xparts;
   struct entry *sort;
-  const int count = c->count;
-  int off[8], inds[8], temp_i;
+  int j, k, count = c->count;
+  int i, ind, off[8], inds[8], temp_i, missing;
   float buff[8];
+  double px[3];
 
   TIMER_TIC
 
@@ -776,6 +777,9 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
  */
 static void runner_do_unskip(struct cell *c, struct engine *e) {
 
+  /* Ignore empty cells. */
+  if (c->count == 0 && c->gcount == 0) return;
+
   /* Recurse */
   if (c->split) {
     for (int k = 0; k < 8; k++) {
@@ -893,7 +897,11 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
         const integertime_t ti_end =
             get_integer_time_end(ti_current, p->time_bin);
 
-        if (ti_end - ti_begin != ti_step) error("Particle in wrong time-bin");
+        if (ti_end - ti_begin != ti_step)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, p->time_bin, ti_current);
 #endif
 
         /* do the kick */
@@ -976,7 +984,10 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         if (ti_begin + ti_step != ti_current)
-          error("Particle in wrong time-bin");
+          error(
+              "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
+              "time_bin=%d ti_current=%lld",
+              ti_begin, ti_step, p->time_bin, ti_current);
 #endif
 
         /* Finish the time-step with a second half-kick */
@@ -1072,7 +1083,9 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
         /* What is the next sync-point ? */
         ti_end_min = min(ti_current + ti_new_step, ti_end_min);
         ti_end_max = max(ti_current + ti_new_step, ti_end_max);
-      } else { /* part is inactive */
+      }
+
+      else { /* part is inactive */
 
         const integertime_t ti_end =
             get_integer_time_end(ti_current, p->time_bin);
@@ -1212,20 +1225,18 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
 }
 
 /**
- * @brief Construct the cell properties from the received particles
+ * @brief Construct the cell properties from the received #part.
  *
  * @param r The runner thread.
  * @param c The cell.
  * @param timer Are we timing this ?
  */
-void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
+void runner_do_recv_part(struct runner *r, struct cell *c, int timer) {
 
 #ifdef WITH_MPI
 
   const struct part *restrict parts = c->parts;
-  const struct gpart *restrict gparts = c->gparts;
   const size_t nr_parts = c->count;
-  const size_t nr_gparts = c->gcount;
   const integertime_t ti_current = r->e->ti_current;
 
   TIMER_TIC;
@@ -1241,40 +1252,107 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
     for (size_t k = 0; k < nr_parts; k++) {
       const integertime_t ti_end =
           get_integer_time_end(ti_current, parts[k].time_bin);
-      // if(ti_end < ti_current) error("Received invalid particle !");
       ti_end_min = min(ti_end_min, ti_end);
       ti_end_max = max(ti_end_max, ti_end);
       h_max = max(h_max, parts[k].h);
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (parts[k].ti_drift != ti_current)
+        error("Received un-drifted particle !");
+#endif
     }
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_recv_part(r, c->progeny[k], 0);
+        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
+        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+        h_max = max(h_max, c->progeny[k]->h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  c->ti_end_min = ti_end_min;
+  c->ti_end_max = ti_end_max;
+  c->ti_old = ti_current;
+  c->h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_part);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #gpart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
+
+  const struct gpart *restrict gparts = c->gparts;
+  const size_t nr_gparts = c->gcount;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_end_min = max_nr_timesteps;
+  integertime_t ti_end_max = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
     for (size_t k = 0; k < nr_gparts; k++) {
       const integertime_t ti_end =
           get_integer_time_end(ti_current, gparts[k].time_bin);
-      // if(ti_end < ti_current) error("Received invalid particle !");
       ti_end_min = min(ti_end_min, ti_end);
       ti_end_max = max(ti_end_max, ti_end);
     }
-
   }
 
   /* Otherwise, recurse and collect. */
   else {
     for (int k = 0; k < 8; k++) {
       if (c->progeny[k] != NULL) {
-        runner_do_recv_cell(r, c->progeny[k], 0);
+        runner_do_recv_gpart(r, c->progeny[k], 0);
         ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
         ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        h_max = max(h_max, c->progeny[k]->h_max);
       }
     }
   }
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
   /* ... and store. */
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
   c->ti_old = ti_current;
-  c->h_max = h_max;
 
-  if (timer) TIMER_TOC(timer_dorecv_cell);
+  if (timer) TIMER_TOC(timer_dorecv_gpart);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -1337,13 +1415,13 @@ void *runner_main(void *data) {
 
       } else if (cj == NULL) { /* self */
 
-        /* if (!cell_is_active(ci, e) && t->type != task_type_sort &&
+        if (!cell_is_active(ci, e) && t->type != task_type_sort &&
             t->type != task_type_send && t->type != task_type_recv)
           error(
               "Task (type='%s/%s') should have been skipped ti_current=%lld "
               "c->ti_end_min=%lld",
               taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
-              ci->ti_end_min); */
+              ci->ti_end_min);
 
         /* Special case for sorts */
         if (!cell_is_active(ci, e) && t->type == task_type_sort &&
@@ -1476,8 +1554,11 @@ void *runner_main(void *data) {
           if (t->subtype == task_subtype_tend) {
             cell_unpack_ti_ends(ci, t->buff);
             free(t->buff);
-          } else {
-            runner_do_recv_cell(r, ci, 1);
+          } else if (t->subtype == task_subtype_xv ||
+                     t->subtype == task_subtype_rho) {
+            runner_do_recv_part(r, ci, 1);
+          } else if (t->subtype == task_subtype_gpart) {
+            runner_do_recv_gpart(r, ci, 1);
           }
           break;
 #endif
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 7e6165c5be37cfa8da1343e11a98ca5d33c28347..9188d877d84a56a7da604fae6b9a46991a22862b 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -32,9 +32,18 @@
 #define _DOPAIR2(f) PASTE(runner_dopair2, f)
 #define DOPAIR2 _DOPAIR2(FUNCTION)
 
+#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f)
+#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION)
+
+#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f)
+#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION)
+
 #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
 #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
 
+#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
+#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)
+
 #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
 #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)
 
@@ -101,6 +110,8 @@
 #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
 #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
 
+#include "runner_doiact_nosort.h"
+
 /**
  * @brief Compute the interactions between a cell pair (non-symmetric).
  *
@@ -249,7 +260,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
 // error("Don't use in actual runs ! Slow code !");
 #endif
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -304,7 +315,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
       /* Hit or miss? */
       if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT(r2, dx, hi, pj->h, pi, pj);
 
@@ -334,7 +345,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -352,7 +363,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 // error("Don't use in actual runs ! Slow code !");
 #endif
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -395,7 +406,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
       /* Hit or miss? */
       if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT(r2, dx, hi, pj->h, pi, pj);
 
@@ -425,7 +436,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -454,7 +465,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
 
   error("Don't use in actual runs ! Slow code !");
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -504,7 +515,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
       /* Hit or miss? */
       if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -534,7 +545,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -561,7 +572,14 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 
   struct engine *e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_MPI
+  if (ci->nodeID != cj->nodeID) {
+    DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj);
+    return;
+  }
+#endif
+
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -636,7 +654,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -701,7 +719,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -732,7 +750,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
     } /* loop over the parts in ci. */
   }
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -755,7 +773,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts, int *restrict ind, int count) {
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -795,7 +813,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
       /* Hit or miss? */
       if (r2 > 0.0f && r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -825,7 +843,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -846,7 +864,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   const struct engine *restrict e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_MPI
+  if (ci->nodeID != cj->nodeID) {
+    DOPAIR1_NOSORT(r, ci, cj);
+    return;
+  }
+#endif
+
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -951,7 +976,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
       /* Hit or miss? */
       if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -1021,7 +1046,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
       /* Hit or miss? */
       if (r2 < hjg2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
 
@@ -1051,7 +1076,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -1072,7 +1097,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   struct engine *restrict e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_MPI
+  if (ci->nodeID != cj->nodeID) {
+    DOPAIR2_NOSORT(r, ci, cj);
+    return;
+  }
+#endif
+
+#ifdef WITH_OLD_VECTORIZATION
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
   float hiq1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1195,7 +1227,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi);
 
@@ -1254,7 +1286,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Does pj need to be updated too? */
           if (part_is_active(pj, e))
@@ -1355,7 +1387,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
         /* Hit or miss? */
         if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hi, hj, pi, pj);
 
@@ -1413,7 +1445,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
         /* Hit or miss? */
         if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Does pi need to be updated too? */
           if (part_is_active(pi, e))
@@ -1471,7 +1503,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount1 > 0)
     for (int k = 0; k < icount1; k++)
@@ -1498,7 +1530,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
   const struct engine *e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
   float hiq1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1575,7 +1607,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
         /* Hit or miss? */
         if (r2 < hj * hj * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi);
 
@@ -1639,7 +1671,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
         /* Hit or miss? */
         if (r2 < hig2 || doj) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Which parts need to be updated? */
           if (r2 < hig2 && doj)
@@ -1722,7 +1754,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
   } /* loop over all particles. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount1 > 0)
     for (int k = 0; k < icount1; k++)
@@ -1747,7 +1779,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
   const struct engine *e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
   float hiq1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1824,7 +1856,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
         /* Hit or miss? */
         if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi);
 
@@ -1886,7 +1918,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
         /* Hit or miss? */
         if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Does pj need to be updated too? */
           if (part_is_active(pj, e))
@@ -1944,7 +1976,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
   } /* loop over all particles. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount1 > 0)
     for (int k = 0; k < icount1; k++)
@@ -2240,8 +2272,14 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
   }
 
   /* Otherwise, compute self-interaction. */
-  else
+  else {
+#if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \
+    defined(GADGET2_SPH)
+    runner_doself1_density_vec(r, ci);
+#else
     DOSELF1(r, ci);
+#endif
+  }
 
   if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
 }
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 59a5ae496680390c23458bde65b4bba321ffe7a1..9d2606ceb06fd6d32592010376e867a6ae582bf0 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -25,8 +25,6 @@
 #include "gravity.h"
 #include "part.h"
 
-#define ICHECK -1000
-
 /**
  * @brief Compute the recursive upward sweep, i.e. construct the
  *        multipoles in a cell hierarchy.
diff --git a/src/runner_doiact_nosort.h b/src/runner_doiact_nosort.h
new file mode 100644
index 0000000000000000000000000000000000000000..057dd756b4a8d636253eccd8808417ac452e193d
--- /dev/null
+++ b/src/runner_doiact_nosort.h
@@ -0,0 +1,305 @@
+
+/**
+ * @brief Compute the interactions between a cell pair.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void DOPAIR1_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+  if (!cell_is_drifted(ci, e)) cell_drift(ci, e);
+  if (!cell_is_drifted(cj, e)) cell_drift(cj, e);
+
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  space_getsid(e->s, &ci, &cj, shift);
+
+  /* Loop over the parts in ci. */
+  for (int pid = 0; pid < count_i; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[pid];
+    if (!part_is_active(pi, e)) continue;
+    const float hi = pi->h;
+
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+      }
+
+    } /* loop over the parts in cj. */
+
+  } /* loop over the parts in ci. */
+
+  /* Loop over the parts in cj. */
+  for (int pjd = 0; pjd < count_j; pjd++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pj = &parts_j[pjd];
+    if (!part_is_active(pj, e)) continue;
+    const float hj = pj->h;
+
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
+
+    /* Loop over the parts in ci. */
+    for (int pid = 0; pid < count_i; pid++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pi = &parts_i[pid];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pjx[k] - pi->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hjg2) {
+        IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
+      }
+
+    } /* loop over the parts in ci. */
+
+  } /* loop over the parts in cj. */
+
+  TIMER_TOC(TIMER_DOPAIR);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void DOPAIR2_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+  if (!cell_is_drifted(ci, e)) cell_drift(ci, e);
+  if (!cell_is_drifted(cj, e)) cell_drift(cj, e);
+
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  space_getsid(e->s, &ci, &cj, shift);
+
+  /* Loop over the parts in ci. */
+  for (int pid = 0; pid < count_i; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[pid];
+    if (!part_is_active(pi, e)) continue;
+    const float hi = pi->h;
+
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      const float hjg2 = pj->h * pj->h * kernel_gamma2;
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2 || r2 < hjg2) {
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+      }
+
+    } /* loop over the parts in cj. */
+
+  } /* loop over the parts in ci. */
+
+  /* Loop over the parts in cj. */
+  for (int pjd = 0; pjd < count_j; pjd++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pj = &parts_j[pjd];
+    if (!part_is_active(pj, e)) continue;
+    const float hj = pj->h;
+
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
+
+    /* Loop over the parts in ci. */
+    for (int pid = 0; pid < count_i; pid++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pi = &parts_i[pid];
+      const float hig2 = pi->h * pi->h * kernel_gamma2;
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pjx[k] - pi->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hjg2 || r2 < hig2) {
+        IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
+      }
+
+    } /* loop over the parts in ci. */
+
+  } /* loop over the parts in cj. */
+
+  TIMER_TOC(TIMER_DOPAIR);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param parts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param count The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+void DOPAIR_SUBSET_NOSORT(struct runner *r, struct cell *restrict ci,
+                          struct part *restrict parts_i, int *restrict ind,
+                          int count, struct cell *restrict cj) {
+
+  struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  const int count_j = cj->count;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Loop over the parts_i. */
+  for (int pid = 0; pid < count; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[ind[pid]];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    if (!part_is_active(pi, e))
+      error("Trying to correct smoothing length of inactive particle !");
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+
+  TIMER_TOC(timer_dopair_subset);
+}
diff --git a/src/scheduler.c b/src/scheduler.c
index de3f9f51257a0a32811819e2bc6982f3176208a9..dd9b92b60daa5fd7e5df78e26c9266b5bd4c39e4 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -135,8 +135,11 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
         ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) ||
         ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) ||
         ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) ||
+        ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID) ||
         ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) {
       t->type = task_type_none;
+      t->subtype = task_subtype_none;
+      t->cj = NULL;
       t->skip = 1;
       break;
     }
@@ -692,6 +695,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
                                enum task_subtypes subtype, int flags, int wait,
                                struct cell *ci, struct cell *cj, int tight) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == NULL && cj != NULL)
+    error("Added a task with ci==NULL and cj!=NULL type=%s/%s",
+          taskID_names[type], subtaskID_names[subtype]);
+#endif
+
   /* Get the next free task. */
   const int ind = atomic_inc(&s->tasks_next);
 
@@ -964,12 +973,18 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
       case task_type_ghost:
         if (t->ci == t->ci->super) cost = wscale * t->ci->count;
         break;
+      case task_type_drift:
+        cost = wscale * t->ci->count;
+        break;
       case task_type_kick1:
         cost = wscale * t->ci->count;
         break;
       case task_type_kick2:
         cost = wscale * t->ci->count;
         break;
+      case task_type_timestep:
+        cost = wscale * t->ci->count;
+        break;
       case task_type_init:
         cost = wscale * t->ci->count;
         break;
@@ -1070,6 +1085,11 @@ void scheduler_start(struct scheduler *s) {
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
 
+      if (t->type == task_type_none) continue;
+
+      /* Don't check MPI stuff */
+      if (t->type == task_type_send || t->type == task_type_recv) continue;
+
       if (ci == NULL && cj == NULL) {
 
         if (t->type != task_type_grav_gather_m && t->type != task_type_grav_fft)
@@ -1098,14 +1118,18 @@ void scheduler_start(struct scheduler *s) {
 
       } else { /* pair */
 
-        if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) &&
-            t->skip)
-          error(
-              "Task (type='%s/%s') should not have been skipped "
-              "ti_current=%lld "
-              "ci->ti_end_min=%lld cj->ti_end_min=%lld",
-              taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
-              ci->ti_end_min, cj->ti_end_min);
+        if (t->skip) {
+
+          /* Check that the pair is active if the local cell is active */
+          if ((ci->ti_end_min == ti_current && ci->nodeID == engine_rank) ||
+              (cj->ti_end_min == ti_current && cj->nodeID == engine_rank))
+            error(
+                "Task (type='%s/%s') should not have been skipped "
+                "ti_current=%lld "
+                "ci->ti_end_min=%lld cj->ti_end_min=%lld",
+                taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
+                ci->ti_end_min, cj->ti_end_min);
+        }
       }
     }
   }
@@ -1153,7 +1177,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
   /* Otherwise, look for a suitable queue. */
   else {
 #ifdef WITH_MPI
-    int err;
+    int err = MPI_SUCCESS;
 #endif
 
     /* Find the previous owner for each task type, and do
@@ -1166,6 +1190,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_kick1:
       case task_type_kick2:
       case task_type_drift:
+      case task_type_timestep:
       case task_type_init:
         qid = t->ci->super->owner;
         break;
@@ -1179,19 +1204,26 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_recv:
 #ifdef WITH_MPI
         if (t->subtype == task_subtype_tend) {
-          t->buff = malloc(sizeof(int) * t->ci->pcell_size);
-          err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID,
-                          t->flags, MPI_COMM_WORLD, &t->req);
-        } else {
+          t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size);
+          err = MPI_Irecv(t->buff, t->ci->pcell_size * sizeof(integertime_t),
+                          MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD,
+                          &t->req);
+        } else if (t->subtype == task_subtype_xv ||
+                   t->subtype == task_subtype_rho) {
           err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
                           t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          // message( "receiving %i parts with tag=%i from %i to %i." ,
+          //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
+          // fflush(stdout);
+        } else if (t->subtype == task_subtype_gpart) {
+          err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
+                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else {
+          error("Unknown communication sub-type");
         }
         if (err != MPI_SUCCESS) {
           mpi_error(err, "Failed to emit irecv for particle data.");
         }
-        // message( "receiving %i parts with tag=%i from %i to %i." ,
-        //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
-        // fflush(stdout);
         qid = 1 % s->nr_queues;
 #else
         error("SWIFT was not compiled with MPI support.");
@@ -1200,20 +1232,33 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_send:
 #ifdef WITH_MPI
         if (t->subtype == task_subtype_tend) {
-          t->buff = malloc(sizeof(int) * t->ci->pcell_size);
+          t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size);
           cell_pack_ti_ends(t->ci, t->buff);
-          err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID,
-                          t->flags, MPI_COMM_WORLD, &t->req);
-        } else {
+          err = MPI_Isend(t->buff, t->ci->pcell_size * sizeof(integertime_t),
+                          MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD,
+                          &t->req);
+        } else if (t->subtype == task_subtype_xv ||
+                   t->subtype == task_subtype_rho) {
+#ifdef SWIFT_DEBUG_CHECKS
+          for (int k = 0; k < t->ci->count; k++)
+            if (t->ci->parts[k].ti_drift != s->space->e->ti_current)
+              error("Sending un-drifted particle !");
+#endif
           err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
                           t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+
+          // message( "sending %i parts with tag=%i from %i to %i." ,
+          //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
+          // fflush(stdout);
+        } else if (t->subtype == task_subtype_gpart) {
+          err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
+                          t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else {
+          error("Unknown communication sub-type");
         }
         if (err != MPI_SUCCESS) {
           mpi_error(err, "Failed to emit isend for particle data.");
         }
-        // message( "sending %i parts with tag=%i from %i to %i." ,
-        //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
-        // fflush(stdout);
         qid = 0;
 #else
         error("SWIFT was not compiled with MPI support.");
diff --git a/src/space.c b/src/space.c
index 5aff6bc17265a2c058366804fe8a1adfa0d898df..b7b3523e6a4dc345f3acdad3552482ec8738b8f3 100644
--- a/src/space.c
+++ b/src/space.c
@@ -473,7 +473,7 @@ void space_rebuild(struct space *s, int verbose) {
 
 /* Be verbose about this. */
 #ifdef SWIFT_DEBUG_CHECKS
-  message("re)building space");
+  if (s->e->nodeID == 0 || verbose) message("re)building space");
   fflush(stdout);
 #endif
 
@@ -845,6 +845,13 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
         cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
     ind[k] = index;
 
+#ifdef SWIFT_DEBUG_CHECKS
+    if (pos_x > dim_x || pos_y > dim_y || pos_z > pos_z || pos_x < 0. ||
+        pos_y < 0. || pos_z < 0.)
+      error("Particle outside of simulation box. p->x=[%e %e %e]", pos_x, pos_y,
+            pos_z);
+#endif
+
     /* Update the position */
     p->x[0] = pos_x;
     p->x[1] = pos_y;
@@ -1008,7 +1015,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
     if (ind[i - 1] > ind[i])
       error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
-  message("Sorting succeeded.");
+  if (s->e->nodeID == 0 || verbose) message("Sorting succeeded.");
 #endif
 
   /* Clean up. */
@@ -1186,7 +1193,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
     if (ind[i - 1] > ind[i])
       error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
-  message("Sorting succeeded.");
+  if (s->e->nodeID == 0 || verbose) message("Sorting succeeded.");
 #endif
 
   /* Clean up. */
@@ -2037,6 +2044,19 @@ void space_check_drift_point(struct space *s, integertime_t ti_current) {
   space_map_cells_pre(s, 1, cell_check_drift_point, &ti_current);
 }
 
+/**
+ * @brief Checks that all particles and local cells have a non-zero time-step.
+ */
+void space_check_timesteps(struct space *s) {
+#ifdef SWIFT_DEBUG_CHECKS
+
+  for (int i = 0; i < s->nr_cells; ++i) {
+    cell_check_timesteps(&s->cells_top[i]);
+  }
+
+#endif
+}
+
 /**
  * @brief Frees up the memory allocated for this #space
  */
diff --git a/src/space.h b/src/space.h
index 67ff51cad34b0a9d1f4fd5492ab0e9d80ef064f2..3513c39e624570e8f1ad31ee711548bbec1a036d 100644
--- a/src/space.h
+++ b/src/space.h
@@ -186,6 +186,7 @@ void space_init_parts(struct space *s);
 void space_init_gparts(struct space *s);
 void space_link_cleanup(struct space *s);
 void space_check_drift_point(struct space *s, integertime_t ti_current);
+void space_check_timesteps(struct space *s);
 void space_clean(struct space *s);
 
 #endif /* SWIFT_SPACE_H */
diff --git a/src/task.c b/src/task.c
index ee4d92cf714aed2c23ae1795e053103aaf876e4e..0a078b40c3914419cef294b63a527b65d9bea077 100644
--- a/src/task.c
+++ b/src/task.c
@@ -55,7 +55,8 @@ const char *taskID_names[task_type_count] = {
     "sourceterms"};
 
 const char *subtaskID_names[task_subtype_count] = {
-    "none", "density", "gradient", "force", "grav", "external_grav", "tend"};
+    "none",          "density", "gradient", "force", "grav",
+    "external_grav", "tend",    "xv",       "rho",   "gpart"};
 
 /**
  * @brief Computes the overlap between the parts array of two given cells.
diff --git a/src/task.h b/src/task.h
index 2294129ea6f8921f45e5488d2ff77b24c4c5c882..57fe4dcf506ba9dee47d8bc60931b49ab05e8a38 100644
--- a/src/task.h
+++ b/src/task.h
@@ -71,6 +71,9 @@ enum task_subtypes {
   task_subtype_grav,
   task_subtype_external_grav,
   task_subtype_tend,
+  task_subtype_xv,
+  task_subtype_rho,
+  task_subtype_gpart,
   task_subtype_count
 } __attribute__((packed));
 
diff --git a/src/timers.h b/src/timers.h
index 6106b93fdb7e66c7c295c35aef6c20007554aa8a..1e7965f3fdda5a7d7aa9213a3c36abdc783efccb 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -60,7 +60,8 @@ enum {
   timer_dopair_subset,
   timer_do_ghost,
   timer_do_extra_ghost,
-  timer_dorecv_cell,
+  timer_dorecv_part,
+  timer_dorecv_gpart,
   timer_gettask,
   timer_qget,
   timer_qsteal,
diff --git a/src/vector.h b/src/vector.h
index b2b3049f6abdf2a8464cb516147b050a6f4fc3e1..5e7c978ce6c3df9b1fbc47be2a43ee76c85a352a 100644
--- a/src/vector.h
+++ b/src/vector.h
@@ -216,9 +216,15 @@
 #define VEC_INT __m128i
 #define vec_load(a) _mm_load_ps(a)
 #define vec_store(a, addr) _mm_store_ps(addr, a)
+#define vec_setzero() _mm_setzero_ps()
+#define vec_setintzero() _mm_setzero_si256()
 #define vec_set1(a) _mm_set1_ps(a)
+#define vec_setint1(a) _mm_set1_epi32(a)
 #define vec_set(a, b, c, d) _mm_set_ps(d, c, b, a)
 #define vec_dbl_set(a, b) _mm_set_pd(b, a)
+#define vec_add(a, b) _mm_add_ps(a, b)
+#define vec_sub(a, b) _mm_sub_ps(a, b)
+#define vec_mul(a, b) _mm_mul_ps(a, b)
 #define vec_sqrt(a) _mm_sqrt_ps(a)
 #define vec_rcp(a) _mm_rcp_ps(a)
 #define vec_rsqrt(a) _mm_rsqrt_ps(a)
@@ -227,9 +233,11 @@
 #define vec_fmax(a, b) _mm_max_ps(a, b)
 #define vec_fabs(a) _mm_andnot_ps(_mm_set1_ps(-0.f), a)
 #define vec_floor(a) _mm_floor_ps(a)
+#define vec_cmp_gt(a, b) _mm_cmpgt_ps(a, b)
 #define vec_cmp_lt(a, b) _mm_cmplt_ps(a, b)
 #define vec_cmp_lte(a, b) _mm_cmp_ps(a, b, _CMP_LE_OQ)
 #define vec_cmp_result(a) _mm_movemask_ps(a)
+#define vec_and(a, b) _mm_and_ps(a, b)
 #define vec_todbl_lo(a) _mm_cvtps_pd(a)
 #define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a, a))
 #define vec_dbl_tofloat(a, b) _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b))
@@ -243,6 +251,12 @@
 #define vec_dbl_fmax(a, b) _mm_max_pd(a, b)
 #define FILL_VEC(a) \
   { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a }
+#define VEC_HADD(a, b)         \
+  a.v = _mm_hadd_ps(a.v, a.v); \
+  b += a.f[0] + a.f[1];
+#ifndef vec_fma
+#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c)
+#endif
 #else
 #define VEC_SIZE 4
 #endif
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 966df5af7dd11057fb79cf7cd5b155f713752b1c..0db5c2544433012dcd7f451f535391aa81b1f802 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -31,7 +31,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
 check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
 		 testSPHStep testPair test27cells test125cells testParser \
                  testKernel testKernelGrav testFFT testInteractions testMaths \
-                 testSymmetry testThreadpool \
+                 testSymmetry testThreadpool benchmarkInteractions \
                  testAdiabaticIndex testRiemannExact testRiemannTRRS \
                  testRiemannHLLC testMatrixInversion testDump testLogger
 
@@ -66,6 +66,8 @@ testFFT_SOURCES = testFFT.c
 
 testInteractions_SOURCES = testInteractions.c
 
+benchmarkInteractions_SOURCES = benchmarkInteractions.c
+
 testAdiabaticIndex_SOURCES = testAdiabaticIndex.c
 
 testRiemannExact_SOURCES = testRiemannExact.c
diff --git a/tests/benchmarkInteractions.c b/tests/benchmarkInteractions.c
new file mode 100644
index 0000000000000000000000000000000000000000..be23fe0d8d00aa35a37747b049750d6f3b31fa92
--- /dev/null
+++ b/tests/benchmarkInteractions.c
@@ -0,0 +1,496 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <fenv.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "swift.h"
+
+#define array_align sizeof(float) * VEC_SIZE
+#define ACC_THRESHOLD 1e-5
+
+#ifdef NONSYM_DENSITY
+#define IACT runner_iact_nonsym_density
+#define IACT_VEC runner_iact_nonsym_2_vec_density
+#define IACT_NAME "test_nonsym_density"
+#endif
+
+#ifdef SYM_DENSITY
+#define IACT runner_iact_density
+#define IACT_VEC runner_iact_vec_density
+#define IACT_NAME "test_sym_density"
+#endif
+
+#ifdef NONSYM_FORCE
+#define IACT runner_iact_nonsym_force
+#define IACT_VEC runner_iact_nonsym_vec_force
+#define IACT_NAME "test_nonsym_force"
+#endif
+
+#ifdef SYM_FORCE
+#define IACT runner_iact_force
+#define IACT_VEC runner_iact_vec_force
+#define IACT_NAME "test_sym_force"
+#endif
+
+#ifndef IACT
+#define IACT runner_iact_nonsym_density
+#define IACT_VEC runner_iact_nonsym_2_vec_density
+#define IACT_NAME "test_nonsym_density"
+#endif
+
+/**
+ * @brief Constructs an array of particles in a valid state prior to
+ * a IACT_NONSYM and IACT_NONSYM_VEC call.
+ *
+ * @param count No. of particles to create
+ * @param offset The position of the particle offset from (0,0,0).
+ * @param spacing Particle spacing.
+ * @param h The smoothing length of the particles in units of the inter-particle
+ *separation.
+ * @param partId The running counter of IDs.
+ */
+struct part *make_particles(size_t count, double *offset, double spacing,
+                            double h, long long *partId) {
+
+  struct part *particles;
+  if (posix_memalign((void **)&particles, part_align,
+                     count * sizeof(struct part)) != 0) {
+    error("couldn't allocate particles, no. of particles: %d", (int)count);
+  }
+  bzero(particles, count * sizeof(struct part));
+
+  /* Construct the particles */
+  struct part *p;
+
+  /* Set test particle at centre of unit sphere. */
+  p = &particles[0];
+
+  /* Place the test particle at the centre of a unit sphere. */
+  p->x[0] = 0.0f;
+  p->x[1] = 0.0f;
+  p->x[2] = 0.0f;
+
+  p->h = h;
+  p->id = ++(*partId);
+  p->mass = 1.0f;
+
+  /* Place rest of particles around the test particle
+   * with random position within a unit sphere. */
+  for (size_t i = 1; i < count; ++i) {
+    p = &particles[i];
+
+    /* Randomise positions within a unit sphere. */
+    p->x[0] = random_uniform(-1.0, 1.0);
+    p->x[1] = random_uniform(-1.0, 1.0);
+    p->x[2] = random_uniform(-1.0, 1.0);
+
+    /* Randomise velocities. */
+    p->v[0] = random_uniform(-0.05, 0.05);
+    p->v[1] = random_uniform(-0.05, 0.05);
+    p->v[2] = random_uniform(-0.05, 0.05);
+
+    p->h = h;
+    p->id = ++(*partId);
+    p->mass = 1.0f;
+  }
+  return particles;
+}
+
+/**
+ * @brief Populates particle properties needed for the force calculation.
+ */
+void prepare_force(struct part *parts, size_t count) {
+
+  struct part *p;
+  for (size_t i = 0; i < count; ++i) {
+    p = &parts[i];
+    p->rho = i + 1;
+    p->force.balsara = random_uniform(0.0, 1.0);
+    p->force.P_over_rho2 = i + 1;
+    p->force.soundspeed = random_uniform(2.0, 3.0);
+    p->force.v_sig = 0.0f;
+    p->force.h_dt = 0.0f;
+  }
+}
+
+/**
+ * @brief Dumps all particle information to a file
+ */
+void dump_indv_particle_fields(char *fileName, struct part *p) {
+
+  FILE *file = fopen(fileName, "a");
+
+  fprintf(file,
+          "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e "
+          "%13e %13e %13e %13e "
+          "%13e %13e %13e\n",
+          p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2],
+          p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho,
+          p->density.rho_dh, p->density.wcount, p->density.wcount_dh,
+          p->force.h_dt, p->force.v_sig,
+#if defined(MINIMAL_SPH)
+          0., 0., 0., 0.
+#else
+          p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
+          p->density.rot_v[2]
+#endif
+          );
+  fclose(file);
+}
+
+/**
+ * @brief Creates a header for the output file
+ */
+void write_header(char *fileName) {
+
+  FILE *file = fopen(fileName, "w");
+  /* Write header */
+  fprintf(file,
+          "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s "
+          "%13s %13s %13s %13s"
+          "%13s %13s %13s %13s\n",
+          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y",
+          "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig",
+          "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
+  fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n");
+  fclose(file);
+}
+
+/**
+ * @brief Compares the vectorised result against
+ * the serial result of the interaction.
+ *
+ * @param serial_test_part Particle that has been updated serially
+ * @param serial_parts Particle array that has been interacted serially
+ * @param vec_test_part Particle that has been updated using vectors
+ * @param vec_parts Particle array to be interacted using vectors
+ * @param count No. of particles that have been interacted
+ *
+ * @return Non-zero value if difference found, 0 otherwise
+ */
+int check_results(struct part serial_test_part, struct part *serial_parts,
+                  struct part vec_test_part, struct part *vec_parts,
+                  int count) {
+  int result = 0;
+  result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD);
+
+  for (int i = 0; i < count; i++)
+    result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD);
+
+  return result;
+}
+
+/*
+ * @brief Calls the serial and vectorised version of the non-symmetrical density
+ * interaction.
+ *
+ * @param test_part Particle that will be updated
+ * @param parts Particle array to be interacted
+ * @param count No. of particles to be interacted
+ * @param serial_inter_func Serial interaction function to be called
+ * @param vec_inter_func Vectorised interaction function to be called
+ * @param runs No. of times to call interactions
+ *
+ */
+void test_interactions(struct part test_part, struct part *parts, size_t count,
+                       char *filePrefix, int runs) {
+
+  ticks serial_time = 0;
+#ifdef WITH_VECTORIZATION
+  ticks vec_time = 0;
+#endif
+
+  FILE *file;
+  char serial_filename[200] = "";
+  char vec_filename[200] = "";
+
+  strcpy(serial_filename, filePrefix);
+  strcpy(vec_filename, filePrefix);
+  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
+  sprintf(vec_filename + strlen(vec_filename), "_vec.dat");
+
+  write_header(serial_filename);
+  write_header(vec_filename);
+
+  struct part pi_serial, pi_vec;
+  struct part pj_serial[count], pj_vec[count];
+
+  float r2[count] __attribute__((aligned(array_align)));
+  float dx[3 * count] __attribute__((aligned(array_align)));
+
+  struct part *piq[count], *pjq[count];
+  for (size_t k = 0; k < count; k++) {
+    piq[k] = NULL;
+    pjq[k] = NULL;
+  }
+
+#ifdef WITH_VECTORIZATION
+  float r2q[count] __attribute__((aligned(array_align)));
+  float hiq[count] __attribute__((aligned(array_align)));
+  float dxq[count] __attribute__((aligned(array_align)));
+
+  float dyq[count] __attribute__((aligned(array_align)));
+  float dzq[count] __attribute__((aligned(array_align)));
+  float mjq[count] __attribute__((aligned(array_align)));
+  float vixq[count] __attribute__((aligned(array_align)));
+  float viyq[count] __attribute__((aligned(array_align)));
+  float vizq[count] __attribute__((aligned(array_align)));
+  float vjxq[count] __attribute__((aligned(array_align)));
+  float vjyq[count] __attribute__((aligned(array_align)));
+  float vjzq[count] __attribute__((aligned(array_align)));
+#endif
+
+  /* Call serial interaction a set number of times. */
+  for (int k = 0; k < runs; k++) {
+    /* Reset particle to initial setup */
+    pi_serial = test_part;
+    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
+
+    /* Only dump data on first run. */
+    if (k == 0) {
+      /* Dump state of particles before serial interaction. */
+      dump_indv_particle_fields(serial_filename, &pi_serial);
+      for (size_t i = 0; i < count; i++)
+        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
+    }
+
+    /* Perform serial interaction */
+    for (size_t i = 0; i < count; i++) {
+      /* Compute the pairwise distance. */
+      r2[i] = 0.0f;
+      for (int k = 0; k < 3; k++) {
+        int ind = (3 * i) + k;
+        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
+        r2[i] += dx[ind] * dx[ind];
+      }
+    }
+
+    const ticks tic = getticks();
+/* Perform serial interaction */
+#ifdef __ICC
+#pragma novector
+#endif
+    for (size_t i = 0; i < count; i++) {
+      IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
+           &pj_serial[i]);
+    }
+    serial_time += getticks() - tic;
+  }
+
+  file = fopen(serial_filename, "a");
+  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
+  fclose(file);
+
+  /* Dump result of serial interaction. */
+  dump_indv_particle_fields(serial_filename, &pi_serial);
+  for (size_t i = 0; i < count; i++)
+    dump_indv_particle_fields(serial_filename, &pj_serial[i]);
+
+  /* Call vector interaction a set number of times. */
+  for (int k = 0; k < runs; k++) {
+    /* Reset particle to initial setup */
+    pi_vec = test_part;
+    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
+
+    /* Setup arrays for vector interaction. */
+    for (size_t i = 0; i < count; i++) {
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef WITH_VECTORIZATION
+      r2q[i] = r2;
+      dxq[i] = dx[0];
+      hiq[i] = pi_vec.h;
+      piq[i] = &pi_vec;
+      pjq[i] = &pj_vec[i];
+
+      dyq[i] = dx[1];
+      dzq[i] = dx[2];
+      mjq[i] = pj_vec[i].mass;
+      vixq[i] = pi_vec.v[0];
+      viyq[i] = pi_vec.v[1];
+      vizq[i] = pi_vec.v[2];
+      vjxq[i] = pj_vec[i].v[0];
+      vjyq[i] = pj_vec[i].v[1];
+      vjzq[i] = pj_vec[i].v[2];
+#endif
+    }
+
+    /* Only dump data on first run. */
+    if (k == 0) {
+      /* Dump state of particles before vector interaction. */
+      dump_indv_particle_fields(vec_filename, piq[0]);
+      for (size_t i = 0; i < count; i++)
+        dump_indv_particle_fields(vec_filename, pjq[i]);
+    }
+
+/* Perform vector interaction. */
+#ifdef WITH_VECTORIZATION
+    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2;
+    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
+        curlvySum, curlvzSum;
+
+    rhoSum.v = vec_set1(0.f);
+    rho_dhSum.v = vec_set1(0.f);
+    wcountSum.v = vec_set1(0.f);
+    wcount_dhSum.v = vec_set1(0.f);
+    div_vSum.v = vec_set1(0.f);
+    curlvxSum.v = vec_set1(0.f);
+    curlvySum.v = vec_set1(0.f);
+    curlvzSum.v = vec_set1(0.f);
+
+    hi_vec.v = vec_load(&hiq[0]);
+    vix_vec.v = vec_load(&vixq[0]);
+    viy_vec.v = vec_load(&viyq[0]);
+    viz_vec.v = vec_load(&vizq[0]);
+
+    hi_inv_vec = vec_reciprocal(hi_vec);
+    mask.m = vec_setint1(0xFFFFFFFF);
+    mask2.m = vec_setint1(0xFFFFFFFF);
+
+#ifdef HAVE_AVX512_F
+    KNL_MASK_16 knl_mask, knl_mask2;
+    knl_mask = 0xFFFF;
+    knl_mask2 = 0xFFFF;
+#endif
+
+    const ticks vec_tic = getticks();
+
+    for (size_t i = 0; i < count; i += 2 * VEC_SIZE) {
+
+      IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec),
+               (vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]),
+               &(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum,
+               &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum,
+               mask, mask2,
+#ifdef HAVE_AVX512_F
+               knl_mask, knl_mask2);
+#else
+               0, 0);
+#endif
+    }
+
+    VEC_HADD(rhoSum, piq[0]->rho);
+    VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
+    VEC_HADD(wcountSum, piq[0]->density.wcount);
+    VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
+    VEC_HADD(div_vSum, piq[0]->density.div_v);
+    VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
+    VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
+    VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);
+
+    vec_time += getticks() - vec_tic;
+#endif
+  }
+
+  file = fopen(vec_filename, "a");
+  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
+  fclose(file);
+
+  /* Dump result of serial interaction. */
+  dump_indv_particle_fields(vec_filename, piq[0]);
+  for (size_t i = 0; i < count; i++)
+    dump_indv_particle_fields(vec_filename, pjq[i]);
+
+#ifdef WITH_VECTORIZATION
+  /* Check serial results against the vectorised results. */
+  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
+    message("Differences found...");
+#endif
+
+  message("The serial interactions took     : %15lli ticks.",
+          serial_time / runs);
+#ifdef WITH_VECTORIZATION
+  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
+  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
+#endif
+}
+
+/* And go... */
+int main(int argc, char *argv[]) {
+  size_t runs = 10000;
+  double h = 1.0, spacing = 0.5;
+  double offset[3] = {0.0, 0.0, 0.0};
+  size_t count = 256;
+
+  /* Get some randomness going */
+  srand(0);
+
+  char c;
+  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
+    switch (c) {
+      case 'h':
+        sscanf(optarg, "%lf", &h);
+        break;
+      case 's':
+        sscanf(optarg, "%lf", &spacing);
+      case 'n':
+        sscanf(optarg, "%zu", &count);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+  }
+
+  if (h < 0 || spacing < 0) {
+    printf(
+        "\nUsage: %s [OPTIONS...]\n"
+        "\nGenerates a particle array with equal particle separation."
+        "\nThese are then interacted using runner_iact_density and "
+        "runner_iact_vec_density."
+        "\n\nOptions:"
+        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
+        "\n-s SPACING=0.5     - Spacing between particles"
+        "\n-n NUMBER=9        - No. of particles",
+        argv[0]);
+    exit(1);
+  }
+
+  /* Correct count so that VEC_SIZE of particles interact with the test
+   * particle. */
+  count = count - (count % VEC_SIZE) + 1;
+
+  /* Build the infrastructure */
+  static long long partId = 0;
+  struct part test_particle;
+  struct part *particles = make_particles(count, offset, spacing, h, &partId);
+
+#if defined(NONSYM_FORCE) || defined(SYM_FORCE)
+  prepare_force(particles, count);
+#endif
+
+  test_particle = particles[0];
+  /* Call the non-sym density test. */
+  message("Testing %s interaction...", IACT_NAME);
+  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs);
+
+  return 0;
+}
diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat
index dd93f7191d6c0ad38e3de43de3abebf544d555aa..9c7ca10414507746b41e453d75426a072f989d2e 100644
--- a/tests/tolerance_27_normal.dat
+++ b/tests/tolerance_27_normal.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6	      4e-5	    2e-4       2e-3		 7e-6	     5e-6	   5e-6		 5e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6	      4e-5	    2e-4       2e-3		 8e-6	     6e-6	   6e-6		 6e-6
     0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.2e-4	    1e-4       1e-4		 2e-4	     1e-4	   1e-4	 	 1e-4
diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat
index 478bfd94cbbbbabea791eaab9ca2966b4c52b921..53de4ec7632039a56a3757488881e890296e3ac8 100644
--- a/tests/tolerance_27_perturbed.dat
+++ b/tests/tolerance_27_perturbed.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-4	    4e-5       2e-3		 3.1e-6	     3e-6	   3e-6		 3e-6
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      5e-3	    1e-5       1e-4		 2e-5	     2e-3	   2e-3	 	 2e-3
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-4	    5e-5       2e-3		 3.1e-6	     3e-6	   3e-6		 3e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.2e-2	    1e-5       1e-4		 2e-5	     2e-3	   2e-3	 	 2e-3