diff --git a/.gitignore b/.gitignore
index 5a860ed8d811a2a90785db4180a1ed9ea112e272..ba0028d163dae64143b0c43b465152e357d042b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,6 +76,9 @@ tests/testRiemannExact
 tests/testRiemannTRRS
 tests/testRiemannHLLC
 tests/testMatrixInversion
+tests/testDump
+tests/testLogger
+tests/benchmarkInteractions
 
 theory/latex/swift.pdf
 theory/SPH/Kernels/kernels.pdf
diff --git a/README b/README
index 9ef773cd85b408ff822b3652c3fd5507e6d95d01..0d658c333f1328b423851031c5b5d202f43df3c2 100644
--- a/README
+++ b/README
@@ -28,6 +28,7 @@ Valid options are:
   -G          Run with self-gravity
   -n    {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. 
   -s          Run with SPH
+  -S          Run with stars
   -t    {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified.
   -v     [12] Increase the level of verbosity
   	      1: MPI-rank 0 writes
diff --git a/configure.ac b/configure.ac
index 9fa9a1de591d63794dde5db6a8dd733cfcaada09..4b6308e96b81bbfd0a9256bb4914f1356fbfa6f8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -351,7 +351,7 @@ AC_ARG_WITH([tcmalloc],
    [with_tcmalloc="no"]
 )
 if test "x$with_tcmalloc" != "xno"; then
-   if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then
+   if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then
       tclibs="-L$with_tcmalloc -ltcmalloc"
    else
       tclibs="-ltcmalloc"
@@ -361,7 +361,7 @@ if test "x$with_tcmalloc" != "xno"; then
 
    #  Could just have the minimal version.
    if test "$have_tcmalloc" = "no"; then
-      if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then
+      if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then
          tclibs="-L$with_tcmalloc -ltcmalloc_minimal"
       else
          tclibs="-ltcmalloc_minimal"
@@ -394,7 +394,7 @@ AC_ARG_WITH([profiler],
    [with_profiler="yes"]
 )
 if test "x$with_profiler" != "xno"; then
-   if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then
+   if test "x$with_profiler" != "xyes" -a "x$with_profiler" != "x"; then
       proflibs="-L$with_profiler -lprofiler"
    else
       proflibs="-lprofiler"
@@ -411,6 +411,38 @@ fi
 AC_SUBST([PROFILER_LIBS])
 AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"])
 
+#  Check for jemalloc another fast malloc that is good with contention.
+have_jemalloc="no"
+AC_ARG_WITH([jemalloc],
+   [AS_HELP_STRING([--with-jemalloc],
+      [use jemalloc library or specify the directory with lib @<:@yes/no@:>@]
+   )],
+   [with_jemalloc="$withval"],
+   [with_jemalloc="no"]
+)
+if test "x$with_jemalloc" != "xno"; then
+   if test "x$with_jemalloc" != "xyes" -a "x$with_jemalloc" != "x"; then
+      jelibs="-L$with_jemalloc -ljemalloc"
+   else
+      jelibs="-ljemalloc"
+   fi
+   AC_CHECK_LIB([jemalloc],[malloc_usable_size],[have_jemalloc="yes"],[have_jemalloc="no"],
+                $jelibs)
+
+   if test "$have_jemalloc" = "yes"; then
+      JEMALLOC_LIBS="$jelibs"
+   else
+      JEMALLOC_LIBS=""
+   fi
+fi
+AC_SUBST([JEMALLOC_LIBS])
+AM_CONDITIONAL([HAVEJEMALLOC],[test -n "$JEMALLOC_LIBS"])
+
+#  Don't allow both tcmalloc and jemalloc.
+if test "x$have_tcmalloc" != "xno" -a "x$have_jemalloc" != "xno"; then
+   AC_MSG_ERROR([Cannot use tcmalloc at same time as jemalloc])
+fi
+
 # Check for HDF5. This is required.
 AX_LIB_HDF5
 
@@ -734,9 +766,6 @@ case "$with_potential" in
    isothermal)
       AC_DEFINE([EXTERNAL_POTENTIAL_ISOTHERMAL], [1], [Isothermal external potential])
    ;; 
-   softened-isothermal)
-      AC_DEFINE([EXTERNAL_POTENTIAL_SOFTENED_ISOTHERMAL], [1], [Softened isothermal external potential])
-   ;; 
    disc-patch)
       AC_DEFINE([EXTERNAL_POTENTIAL_DISC_PATCH], [1], [Disc-patch external potential])
    ;; 
@@ -781,6 +810,7 @@ AC_MSG_RESULT([
    FFTW3 enabled   : $have_fftw3
    libNUMA enabled : $have_numa
    Using tcmalloc  : $have_tcmalloc
+   Using jemalloc  : $have_jemalloc
    CPU profiler    : $have_profiler
 
    Hydro scheme       : $with_hydro
@@ -795,5 +825,8 @@ AC_MSG_RESULT([
    Debugging checks   : $enable_debugging_checks
 ])
 
+# Make sure the latest git revision string gets included
+touch src/version.c
+
 # Generate output.
 AC_OUTPUT
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 2a5aeba7d1db0b1e1e56a9a6eed3059aba6a09ff..0df1f91194b6d1e7e98cb1b75be7d3eaaca7fc32 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -762,6 +762,7 @@ WARN_LOGFILE           =
 INPUT                  =  @top_srcdir@ @top_srcdir@/src @top_srcdir@/tests @top_srcdir@/examples
 INPUT		       += @top_srcdir@/src/hydro/Minimal
 INPUT		       += @top_srcdir@/src/gravity/Default
+INPUT		       += @top_srcdir@/src/stars/Default
 INPUT		       += @top_srcdir@/src/riemann
 INPUT		       += @top_srcdir@/src/potential/point_mass
 INPUT		       += @top_srcdir@/src/cooling/const_du
diff --git a/examples/CoolingBox/coolingBox.yml b/examples/CoolingBox/coolingBox.yml
index b90ae61e5c862753227b82ebcec4cbf8f3083fab..7b8dbf4bddf8d994dabf34ea68ea55b32a3b4d8a 100644
--- a/examples/CoolingBox/coolingBox.yml
+++ b/examples/CoolingBox/coolingBox.yml
@@ -1,27 +1,27 @@
 # Define the system of units to use internally. 
 InternalUnitSystem:
-  UnitMass_in_cgs:     2.0e33   # Solar masses
-  UnitLength_in_cgs:   3.0857e21   # Kiloparsecs
-  UnitVelocity_in_cgs: 1.0e5   # Time unit is cooling time
-  UnitCurrent_in_cgs:  1   # Amperes
-  UnitTemp_in_cgs:     1   # Kelvin
+  UnitMass_in_cgs:     2.0e33     # Solar masses
+  UnitLength_in_cgs:   3.0857e21  # Kiloparsecs
+  UnitVelocity_in_cgs: 1.0e5      # Kilometers per second
+  UnitCurrent_in_cgs:  1          # Amperes
+  UnitTemp_in_cgs:     1          # Kelvin
 
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   4.    # The end time of the simulation (in internal units).
-  dt_min:     1e-4  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
+  time_end:   0.25  # The end time of the simulation (in internal units).
+  dt_min:     1e-5  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
 
 # Parameters governing the snapshots
 Snapshots:
   basename:            coolingBox # Common part of the name of output files
   time_first:          0.         # Time of the first output (in internal units)
-  delta_time:          1.0e-1       # Time difference between consecutive outputs (in internal units)
+  delta_time:          1e-2       # Time difference between consecutive outputs (in internal units)
 
 # Parameters governing the conserved quantities statistics
 Statistics:
-  delta_time:          1e-2 # Time between statistics output
+  delta_time:          1e-3 # Time between statistics output
 
 # Parameters for the hydrodynamics scheme
 SPH:
@@ -35,8 +35,8 @@ InitialConditions:
 
 # Dimensionless pre-factor for the time-step condition
 LambdaCooling:
-  lambda_cgs:                 1.0e-22    # Cooling rate (in cgs units)
-  minimum_temperature:         1.0e4  # Minimal temperature (Kelvin)
-  mean_molecular_weight:       0.59   # Mean molecular weight
-  hydrogen_mass_abundance:     0.75   # Hydrogen mass abundance (dimensionless)
-  cooling_tstep_mult:          1.0    # Dimensionless pre-factor for the time-step condition
+  lambda_cgs:                  1.0e-22    # Cooling rate (in cgs units)
+  minimum_temperature:         1.0e4      # Minimal temperature (Kelvin)
+  mean_molecular_weight:       0.59       # Mean molecular weight
+  hydrogen_mass_abundance:     0.75       # Hydrogen mass abundance (dimensionless)
+  cooling_tstep_mult:          1.0        # Dimensionless pre-factor for the time-step condition
diff --git a/examples/CoolingBox/energy_plot.py b/examples/CoolingBox/energy_plot.py
index 00e6fd1dfa0ee9bfbb9b5147282776f635b060f5..c8948e7e209c2786ffdecbb2b8b606e73d703238 100644
--- a/examples/CoolingBox/energy_plot.py
+++ b/examples/CoolingBox/energy_plot.py
@@ -1,99 +1,128 @@
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
+import h5py
+
+# Plot parameters
+params = {'axes.labelsize': 10,
+'axes.titlesize': 10,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 10,
+'ytick.labelsize': 10,
+'text.usetex': True,
+ 'figure.figsize' : (3.15,3.15),
+'figure.subplot.left'    : 0.145,
+'figure.subplot.right'   : 0.99,
+'figure.subplot.bottom'  : 0.11,
+'figure.subplot.top'     : 0.99,
+'figure.subplot.wspace'  : 0.15,
+'figure.subplot.hspace'  : 0.12,
+'lines.markersize' : 6,
+'lines.linewidth' : 3.,
+'text.latex.unicode': True
+}
+rcParams.update(params)
+rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
+
 import numpy as np
-import matplotlib.pyplot as plt
 import h5py as h5
 import sys
 
+# File containing the total energy
 stats_filename = "./energy.txt"
+
+# First snapshot
 snap_filename = "coolingBox_000.hdf5"
-#plot_dir = "./"
 
-#some constants in cgs units
+# Some constants in cgs units
 k_b = 1.38E-16 #boltzmann
 m_p = 1.67e-24 #proton mass
-#initial conditions set in makeIC.py
-rho = 3.2e3
-P = 4.5e6
-#n_H_cgs = 0.0001
-gamma = 5./3.
+
+# Initial conditions set in makeIC.py
 T_init = 1.0e5
 
-#Read the units parameters from the snapshot
+# Read the initial state of the gas
 f = h5.File(snap_filename,'r')
+rho = np.mean(f["/PartType0/Density"])
+pressure = np.mean(f["/PartType0/Pressure"])
+
+# Read the units parameters from the snapshot
 units = f["InternalCodeUnits"]
 unit_mass = units.attrs["Unit mass in cgs (U_M)"]
 unit_length = units.attrs["Unit length in cgs (U_L)"]
 unit_time = units.attrs["Unit time in cgs (U_t)"]
+
+# Read the properties of the cooling function
 parameters = f["Parameters"]
 cooling_lambda = float(parameters.attrs["LambdaCooling:lambda_cgs"])
 min_T = float(parameters.attrs["LambdaCooling:minimum_temperature"])
 mu = float(parameters.attrs["LambdaCooling:mean_molecular_weight"])
 X_H = float(parameters.attrs["LambdaCooling:hydrogen_mass_abundance"])
 
-#get number of particles
-header = f["Header"]
-n_particles = header.attrs["NumPart_ThisFile"][0]
+# Read the adiabatic index
+gamma = float(f["HydroScheme"].attrs["Adiabatic index"])
+
+print "Initial density :", rho
+print "Initial pressure:", pressure
+print "Adiabatic index :", gamma
 
-#read energy and time arrays
+# Read energy and time arrays
 array = np.genfromtxt(stats_filename,skip_header = 1)
 time = array[:,0]
-kin_plus_therm = array[:,2]
-radiated = array[:,6]
 total_mass = array[:,1]
-
-#ignore first row where there are just zeros
-time = time[1:]
-kin_plus_therm = kin_plus_therm[1:]
-radiated = radiated[1:]
-total_mass = total_mass[1:]
-
-total_energy = kin_plus_therm + radiated
+total_energy = array[:,2]
+kinetic_energy = array[:,3]
+internal_energy = array[:,4]
+radiated_energy = array[:,8]
 initial_energy = total_energy[0]
-#conversions to cgs
+
+# Conversions to cgs
 rho_cgs = rho * unit_mass / (unit_length)**3
 time_cgs = time * unit_time
-initial_energy_cgs = initial_energy/total_mass[0] * unit_length**2 / (unit_time)**2 
-n_H_cgs = X_H * rho_cgs / m_p
+total_energy_cgs = total_energy / total_mass[0] * unit_length**2 / (unit_time)**2
+kinetic_energy_cgs = kinetic_energy / total_mass[0] * unit_length**2 / (unit_time)**2
+internal_energy_cgs = internal_energy / total_mass[0] * unit_length**2 / (unit_time)**2
+radiated_energy_cgs = radiated_energy / total_mass[0] * unit_length**2 / (unit_time)**2  
 
-#find the energy floor
+# Find the energy floor
 u_floor_cgs = k_b * min_T / (mu * m_p * (gamma - 1.))
 
-#find analytic solution
-analytic_time_cgs = np.linspace(0,time_cgs[-1],1000)
+# Find analytic solution
+initial_energy_cgs = initial_energy/total_mass[0] * unit_length**2 / (unit_time)**2 
+n_H_cgs = X_H * rho_cgs / m_p
 du_dt_cgs = -cooling_lambda * n_H_cgs**2 / rho_cgs
+cooling_time_cgs = (initial_energy_cgs/(-du_dt_cgs))[0]
+analytic_time_cgs = np.linspace(0, cooling_time_cgs * 1.8, 1000)
 u_analytic_cgs = du_dt_cgs*analytic_time_cgs + initial_energy_cgs
-cooling_time_cgs = initial_energy_cgs/(-du_dt_cgs)
-
-for i in range(u_analytic_cgs.size):
-    if u_analytic_cgs[i]<u_floor_cgs:
-        u_analytic_cgs[i] = u_floor_cgs
-
-#rescale analytic solution
-u_analytic = u_analytic_cgs/initial_energy_cgs
-
-#put time in units of cooling_time
-time=time_cgs/cooling_time_cgs
-analytic_time = analytic_time_cgs/cooling_time_cgs
-
-#rescale (numerical) energy by initial energy
-radiated /= initial_energy
-kin_plus_therm /= initial_energy
-total_energy = kin_plus_therm + radiated
-plt.plot(time,kin_plus_therm,'kd',label = "Kinetic + thermal energy")
-plt.plot(time,radiated,'bo',label = "Radiated energy")
-plt.plot(time,total_energy,'g',label = "Total energy")
-plt.plot(analytic_time,u_analytic,'r',lw = 2.0,label = "Analytic Solution")
-#plt.plot(analytic_time,1-u_analytic,'k',lw = 2.0)
-#plt.plot((cooling_time,cooling_time),(0,1),'b',label = "Cooling time")
-#plt.plot((time[1]-time_cgs[0],time_cgs[1]-time_cgs[0]),(0,1),'m',label = "First output")
-#plt.title(r"$n_H = %1.1e \, \mathrm{cm}^{-3}$" %n_H_cgs)
-plt.xlabel("Time / cooling time")
-plt.ylabel("Energy / Initial energy")
-#plt.ylim(0,1.1)
-plt.ylim(0.999,1.001)
-#plt.xlim(0,min(10,time[-1]))
-plt.legend(loc = "upper right")    
-if (int(sys.argv[1])==0):
-    plt.show()
-else:
-    plt.savefig(full_plot_filename,format = "png")
-    plt.close()
+u_analytic_cgs[u_analytic_cgs < u_floor_cgs] = u_floor_cgs
+
+print "Cooling time:", cooling_time_cgs, "[s]"
+
+# Read snapshots
+u_snapshots_cgs = zeros(25)
+t_snapshots_cgs = zeros(25)
+for i in range(25):
+    snap = h5.File("coolingBox_%0.3d.hdf5"%i,'r')
+    u_snapshots_cgs[i] = sum(snap["/PartType0/InternalEnergy"][:] * snap["/PartType0/Masses"][:])  / total_mass[0] * unit_length**2 / (unit_time)**2
+    t_snapshots_cgs[i] = snap["/Header"].attrs["Time"] * unit_time
+
+
+figure()
+plot(time_cgs, total_energy_cgs, 'r-', lw=1.6, label="Gas total energy")
+plot(t_snapshots_cgs, u_snapshots_cgs, 'rD', ms=3)
+plot(time_cgs, radiated_energy_cgs, 'g-', lw=1.6, label="Radiated energy")
+plot(time_cgs, total_energy_cgs + radiated_energy_cgs, 'b-', lw=0.6, label="Gas total + radiated")
+
+plot(analytic_time_cgs, u_analytic_cgs, '--', color='k', alpha=0.8, lw=1.0, label="Analytic solution")
+
+legend(loc="upper right", fontsize=8, frameon=False, handlelength=3, ncol=1)
+xlabel("${\\rm{Time~[s]}}$", labelpad=0)
+ylabel("${\\rm{Energy~[erg]}}$")
+xlim(0, 1.5*cooling_time_cgs)
+ylim(0, 1.5*u_analytic_cgs[0])
+
+savefig("energy.png", dpi=200)
+
+
diff --git a/examples/CoolingBox/getGlass.sh b/examples/CoolingBox/getGlass.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ffd92e88deae6e91237059adac2a6c2067caee46
--- /dev/null
+++ b/examples/CoolingBox/getGlass.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/glassCube_32.hdf5
diff --git a/examples/CoolingBox/makeIC.py b/examples/CoolingBox/makeIC.py
index 5de012a17af4eef71e56548602e7956faef529f5..f863e174b1fcd404ae178fe324c7a165598b4af0 100644
--- a/examples/CoolingBox/makeIC.py
+++ b/examples/CoolingBox/makeIC.py
@@ -1,6 +1,6 @@
 ###############################################################################
  # This file is part of SWIFT.
- # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durhama.ac.uk)
  #                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
  # 
  # This program is free software: you can redistribute it and/or modify
@@ -22,13 +22,11 @@ import h5py
 import sys
 from numpy import *
 
-# Generates a swift IC file containing a cartesian distribution of particles
-# at a constant density and pressure in a cubic box
+# Generates a SWIFT IC file with a constant density and pressure
 
 # Parameters
 periodic= 1           # 1 For periodic box
 boxSize = 1           # 1 kiloparsec    
-L = int(sys.argv[1])  # Number of particles along one axis
 rho = 3.2e3           # Density in code units (3.2e6 is 0.1 hydrogen atoms per cm^3)
 P = 4.5e6             # Pressure in code units (at 10^5K)
 gamma = 5./3.         # Gas adiabatic index
@@ -36,12 +34,17 @@ eta = 1.2349          # 48 ngbs with cubic spline kernel
 fileName = "coolingBox.hdf5" 
 
 #---------------------------------------------------
-numPart = L**3
-mass = boxSize**3 * rho / numPart
-print mass
-internalEnergy = P / ((gamma - 1.)*rho)
 
-#--------------------------------------------------
+# Read id, position and h from glass
+glass = h5py.File("glassCube_32.hdf5", "r")
+ids = glass["/PartType0/ParticleIDs"][:]
+pos = glass["/PartType0/Coordinates"][:,:] * boxSize
+h = glass["/PartType0/SmoothingLength"][:] * boxSize
+
+# Compute basic properties
+numPart = size(pos) / 3
+mass = boxSize**3 * rho / numPart
+internalEnergy = P / ((gamma - 1.) * rho)
 
 #File
 file = h5py.File(fileName, 'w')
@@ -57,11 +60,11 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
 
-#Runtime parameters
+# Runtime parameters
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
-#Units
+# Units
 grp = file.create_group("/Units")
 grp.attrs["Unit length in cgs (U_L)"] = 3.0857e21 
 grp.attrs["Unit mass in cgs (U_M)"] = 2.0e33 
@@ -75,35 +78,26 @@ grp = file.create_group("/PartType0")
 v  = zeros((numPart, 3))
 ds = grp.create_dataset('Velocities', (numPart, 3), 'f')
 ds[()] = v
-v = zeros(1)
 
 m = full((numPart, 1), mass)
 ds = grp.create_dataset('Masses', (numPart,1), 'f')
 ds[()] = m
-m = zeros(1)
 
-h = full((numPart, 1), eta * boxSize / L)
-ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f')
+h = reshape(h, (numPart, 1))
+ds = grp.create_dataset('SmoothingLength', (numPart, 1), 'f')
 ds[()] = h
-h = zeros(1)
 
 u = full((numPart, 1), internalEnergy)
 ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f')
 ds[()] = u
-u = zeros(1)
 
-
-ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1))
+ids = reshape(ids, (numPart, 1))
 ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L')
-ds[()] = ids + 1
-x      = ids % L;
-y      = ((ids - x) / L) % L;
-z      = (ids - x - L * y) / L**2;
-coords = zeros((numPart, 3))
-coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
+ds[()] = ids
+
 ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
-ds[()] = coords
+ds[()] = pos
 
 file.close()
+
+print numPart
diff --git a/examples/CoolingBox/run.sh b/examples/CoolingBox/run.sh
index cb3264808d57b435c9f65bf5a684a94ff9f878fd..19e787df716145c1f5aa7744f4c7204c1c7f1064 100755
--- a/examples/CoolingBox/run.sh
+++ b/examples/CoolingBox/run.sh
@@ -1,14 +1,20 @@
+
 #!/bin/bash
 
 # Generate the initial conditions if they are not present.
-echo "Generating initial conditions for the cooling box example..."
-
-python makeIC.py 10
-
-../swift -s -C -t 16 coolingBox.yml 
-
-#-C 2>&1 | tee output.log
+if [ ! -e glassCube_32.hdf5 ]
+then
+    echo "Fetching initial glass file for the cooling box example..."
+    ./getGlass.sh
+fi
+if [ ! -e coolingBox.hdf5 ]
+then
+    echo "Generating initial conditions for the cooling box example..."
+    python makeIC.py
+fi
 
-python energy_plot.py 0
+# Run SWIFT
+../swift -s -C -t 1 coolingBox.yml
 
-#python test_energy_conservation.py 0
+# Check energy conservation and cooling rate
+python energy_plot.py
diff --git a/examples/CoolingBox/test_energy_conservation.py b/examples/CoolingBox/test_energy_conservation.py
deleted file mode 100644
index bb15071c0668d71580015351ce75ce18390c8cf0..0000000000000000000000000000000000000000
--- a/examples/CoolingBox/test_energy_conservation.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import numpy as np
-import matplotlib.pyplot as plt
-import h5py as h5
-import sys
-
-stats_filename = "./energy.txt"
-snap_filename = "coolingBox_000.hdf5"
-#plot_dir = "./"
-n_snaps = 41
-time_end = 4.0
-dt_snap = 0.1
-#some constants in cgs units
-k_b = 1.38E-16 #boltzmann
-m_p = 1.67e-24 #proton mass
-#initial conditions set in makeIC.py
-rho = 4.8e3
-P = 4.5e6
-#n_H_cgs = 0.0001
-gamma = 5./3.
-T_init = 1.0e5
-
-#find the sound speed
-
-#Read the units parameters from the snapshot
-f = h5.File(snap_filename,'r')
-units = f["InternalCodeUnits"]
-unit_mass = units.attrs["Unit mass in cgs (U_M)"]
-unit_length = units.attrs["Unit length in cgs (U_L)"]
-unit_time = units.attrs["Unit time in cgs (U_t)"]
-parameters = f["Parameters"]
-cooling_lambda = float(parameters.attrs["LambdaCooling:lambda_cgs"])
-min_T = float(parameters.attrs["LambdaCooling:minimum_temperature"])
-mu = float(parameters.attrs["LambdaCooling:mean_molecular_weight"])
-X_H = float(parameters.attrs["LambdaCooling:hydrogen_mass_abundance"])
-
-#get number of particles
-header = f["Header"]
-n_particles = header.attrs["NumPart_ThisFile"][0]
-#read energy and time arrays
-array = np.genfromtxt(stats_filename,skip_header = 1)
-time = array[:,0]
-total_energy = array[:,2]
-total_mass = array[:,1]
-
-time = time[1:]
-total_energy = total_energy[1:]
-total_mass = total_mass[1:]
-
-#conversions to cgs
-rho_cgs = rho * unit_mass / (unit_length)**3
-time_cgs = time * unit_time
-u_init_cgs = total_energy[0]/(total_mass[0]) * unit_length**2 / (unit_time)**2 
-n_H_cgs = X_H * rho_cgs / m_p
-
-#find the sound speed in cgs
-c_s = np.sqrt((gamma - 1.)*k_b*T_init/(mu*m_p))
-#assume box size is unit length
-sound_crossing_time = unit_length/c_s
-
-print "Sound speed = %g cm/s" %c_s
-print "Sound crossing time = %g s" %sound_crossing_time 
-#find the energy floor
-u_floor_cgs = k_b * min_T / (mu * m_p * (gamma - 1.))
-#find analytic solution
-analytic_time_cgs = np.linspace(time_cgs[0],time_cgs[-1],1000)
-du_dt_cgs = -cooling_lambda * n_H_cgs**2 / rho_cgs
-u_analytic = du_dt_cgs*(analytic_time_cgs - analytic_time_cgs[0]) + u_init_cgs
-cooling_time = u_init_cgs/(-du_dt_cgs)
-
-#put time in units of sound crossing time
-time=time_cgs/sound_crossing_time
-analytic_time = analytic_time_cgs/sound_crossing_time
-#rescale energy to initial energy
-total_energy /= total_energy[0]
-u_analytic /= u_init_cgs
-u_floor_cgs /= u_init_cgs
-# plot_title = r"$\Lambda \, = \, %1.1g \mathrm{erg}\mathrm{cm^3}\mathrm{s^{-1}} \, \, T_{init} = %1.1g\mathrm{K} \, \, T_{floor} = %1.1g\mathrm{K} \, \, n_H = %1.1g\mathrm{cm^{-3}}$" %(cooling_lambda,T_init,T_floor,n_H)
-# plot_filename = "energy_plot_creasey_no_cooling_T_init_1p0e5_n_H_0p1.png"
-#analytic_solution = np.zeros(n_snaps-1)
-for i in range(u_analytic.size):
-    if u_analytic[i]<u_floor_cgs:
-        u_analytic[i] = u_floor_cgs
-plt.plot(time-time[0],total_energy,'k',label = "Numerical solution from energy.txt")
-plt.plot(analytic_time-analytic_time[0],u_analytic,'r',lw = 2.0,label = "Analytic Solution")
-
-#now get energies from the snapshots
-snapshot_time = np.linspace(0,time_end,num = n_snaps)
-snapshot_time = snapshot_time[1:]
-snapshot_time_cgs = snapshot_time * unit_time
-snapshot_time = snapshot_time_cgs/ sound_crossing_time
-snapshot_time -= snapshot_time[0]
-snapshot_energy = np.zeros(n_snaps)
-for i in range(0,n_snaps):
-    snap_filename = "coolingBox_%03d.hdf5" %i
-    f = h5.File(snap_filename,'r')
-    snapshot_internal_energy_array = np.array(f["PartType0/InternalEnergy"])
-    total_internal_energy = np.sum(snapshot_internal_energy_array)
-    velocity_array = np.array(f["PartType0/Velocities"])
-    total_kinetic_energy = 0.5*np.sum(velocity_array**2)
-    snapshot_energy[i] = total_internal_energy + total_kinetic_energy
-snapshot_energy/=snapshot_energy[0]
-snapshot_energy = snapshot_energy[1:]
-
-plt.plot(snapshot_time,snapshot_energy,'bd',label = "Numerical solution from snapshots")
-
-#plt.title(r"$n_H = %1.1e \, \mathrm{cm}^{-3}$" %n_H_cgs)
-plt.xlabel("Time (sound crossing time)")
-plt.ylabel("Energy/Initial energy")
-plt.ylim(0.99,1.01)
-#plt.xlim(0,min(10,time[-1]))
-plt.legend(loc = "upper right")    
-if (int(sys.argv[1])==0):
-    plt.show()
-else:
-    plt.savefig(full_plot_filename,format = "png")
-    plt.close()
diff --git a/examples/CoolingHalo/cooling_halo.yml b/examples/CoolingHalo/cooling_halo.yml
index c06b099eb0dd06d39040e0ecc8e8f1320a89ac6b..e8978ad6c96017d9b5fbe35346555e6b59bc7e7d 100644
--- a/examples/CoolingHalo/cooling_halo.yml
+++ b/examples/CoolingHalo/cooling_halo.yml
@@ -37,7 +37,7 @@ InitialConditions:
   shift_z:    0.
  
 # External potential parameters
-SoftenedIsothermalPotential:
+IsothermalPotential:
   position_x:      0.     # location of centre of isothermal potential in internal units
   position_y:      0.
   position_z:      0.	
diff --git a/examples/CoolingHaloWithSpin/cooling_halo.yml b/examples/CoolingHaloWithSpin/cooling_halo.yml
index 684dd11fcf7adc9477d199e599dfb5b76faa91f6..fc5094f9f5dcae62bb936d2b5510f41e3c70504e 100644
--- a/examples/CoolingHaloWithSpin/cooling_halo.yml
+++ b/examples/CoolingHaloWithSpin/cooling_halo.yml
@@ -9,8 +9,8 @@ InternalUnitSystem:
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   10.    # The end time of the simulation (in internal units).
-  dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
+  time_end:   10.   # The end time of the simulation (in internal units).
+  dt_min:     1e-5  # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-1  # The maximal time-step size of the simulation (in internal units).
 
 # Parameters governing the conserved quantities statistics
@@ -34,13 +34,13 @@ InitialConditions:
   file_name:  CoolingHalo.hdf5       # The file to read
  
 # External potential parameters
-SoftenedIsothermalPotential:
-  position_x:      0.     # location of centre of isothermal potential in internal units
+IsothermalPotential:
+  position_x:      0.     # Location of centre of isothermal potential in internal units
   position_y:      0.
   position_z:      0.	
-  vrot:            200.     # rotation speed of isothermal potential in internal units
-  timestep_mult:   0.03     # controls time step
-  epsilon:         1.0      #softening for the isothermal potential
+  vrot:            200.   # Rotation speed of isothermal potential in internal units
+  timestep_mult:   0.03   # Controls time step
+  epsilon:         1.0    # Softening for the isothermal potential
 
 # Cooling parameters
 LambdaCooling:
diff --git a/examples/CoolingHaloWithSpin/density_profile.py b/examples/CoolingHaloWithSpin/density_profile.py
index ea282328e5b75530a128eab2dec5f065e46cf819..fb88ddd6aea71603a6f6fcb36b13771106737e6a 100644
--- a/examples/CoolingHaloWithSpin/density_profile.py
+++ b/examples/CoolingHaloWithSpin/density_profile.py
@@ -28,7 +28,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"])
 X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"])
@@ -101,18 +101,18 @@ for i in range(n_snaps):
         rho_0 = density[0]
 
         rho_analytic_init = rho_0 * (radial_bin_mids/r_0)**(-2)
-    plt.plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell")
-    #plt.plot(t,rho_analytic,label = "Initial analytic density profile"
+    plt.plot(radial_bin_mids,density,'ko',label = "Average density of shell")
+    plt.plot(radial_bin_mids,rho_analytic_init,label = "Initial analytic density profile")
     plt.xlabel(r"$r / r_{vir}$")
-    plt.ylabel(r"$\rho / \rho_{init})$")
+    plt.ylabel(r"$(\rho / \rho_{init})$")
     plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c))
     #plt.ylim((1.e-2,1.e1))
-    plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,20),'r',label = "Cooling radius")
+    plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(1.0e-4,1.0e4),'r',label = "Cooling radius")
     plt.xlim((radial_bin_mids[0],max_r))
-    plt.ylim((0,20))
-    plt.plot((0,max_r),(1,1))
+    plt.ylim((1.0e-4,1.0e4))
+    #plt.plot((0,max_r),(1,1))
     #plt.xscale('log')
-    #plt.yscale('log')
+    plt.yscale('log')
     plt.legend(loc = "upper right")
     plot_filename = "density_profile_%03d.png" %i
     plt.savefig(plot_filename,format = "png")
diff --git a/examples/CoolingHaloWithSpin/internal_energy_profile.py b/examples/CoolingHaloWithSpin/internal_energy_profile.py
index a3e470cc24a939c9bc915371e927d9bd39196bff..5f71d69ca7a978de242559f84ec390faa86a27f0 100644
--- a/examples/CoolingHaloWithSpin/internal_energy_profile.py
+++ b/examples/CoolingHaloWithSpin/internal_energy_profile.py
@@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"])
 X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"])
diff --git a/examples/CoolingHaloWithSpin/makeIC.py b/examples/CoolingHaloWithSpin/makeIC.py
index 8970fbaa70578532a4f41bab7a096d8fa3565d26..a6d57868ad7542498b27007a5c3ef9234b9feb84 100644
--- a/examples/CoolingHaloWithSpin/makeIC.py
+++ b/examples/CoolingHaloWithSpin/makeIC.py
@@ -36,6 +36,7 @@ h = 0.67777 # hubble parameter
 gamma = 5./3.
 eta = 1.2349
 spin_lambda = 0.05 #spin parameter
+f_b = 0.2 #baryon fraction
 
 # First set unit velocity and then the circular velocity parameter for the isothermal potential
 const_unit_velocity_in_cgs = 1.e5 #kms^-1
@@ -99,6 +100,8 @@ grp.attrs["PeriodicBoundariesOn"] = periodic
 # set seed for random number
 np.random.seed(1234)
 
+gas_mass = f_b * np.sqrt(3.) / 2. #virial mass of halo is 1, virial radius is 1, enclosed mass scales with r
+gas_particle_mass = gas_mass / float(N)
 
 # Positions
 # r^(-2) distribution corresponds to uniform distribution in radius
@@ -164,12 +167,12 @@ N = x_coords.size
 print "Number of particles in the box = " , N
 
 #make the coords and radius arrays again
-coords_2 = np.zeros((N,3))
-coords_2[:,0] = x_coords
-coords_2[:,1] = y_coords
-coords_2[:,2] = z_coords
+coords= np.zeros((N,3))
+coords[:,0] = x_coords
+coords[:,1] = y_coords
+coords[:,2] = z_coords
 
-radius = np.sqrt((coords_2[:,0]-boxSize/2.)**2 + (coords_2[:,1]-boxSize/2.)**2 + (coords_2[:,2]-boxSize/2.)**2)
+radius = np.sqrt((coords[:,0]-boxSize/2.)**2 + (coords[:,1]-boxSize/2.)**2 + (coords[:,2]-boxSize/2.)**2)
 
 #now give particle's velocities
 v = np.zeros((N,3))
@@ -184,7 +187,7 @@ print "J =", J
 omega = np.zeros((N,3))
 for i in range(N):
     omega[i,2] = 3.*J / radius[i]
-    v[i,:] = np.cross(omega[i,:],(coords_2[i,:]-boxSize/2.))
+    v[i,:] = np.cross(omega[i,:],(coords[i,:]-boxSize/2.))
         
 # Header
 grp = file.create_group("/Header")
@@ -202,16 +205,15 @@ grp.attrs["Dimension"] = 3
 grp = file.create_group("/PartType0")
 
 ds = grp.create_dataset('Coordinates', (N, 3), 'd')
-ds[()] = coords_2
-coords_2 = np.zeros(1)
+ds[()] = coords
+coords = np.zeros(1)
 
 ds = grp.create_dataset('Velocities', (N, 3), 'f')
 ds[()] = v
 v = np.zeros(1)
 
 # All particles of equal mass
-mass = 1. / N
-m = np.full((N,),mass)
+m = np.full((N,),gas_particle_mass)
 ds = grp.create_dataset('Masses', (N, ), 'f')
 ds[()] = m
 m = np.zeros(1)
diff --git a/examples/CoolingHaloWithSpin/run.sh b/examples/CoolingHaloWithSpin/run.sh
index 3a0d9c02000e760b030a96107038d3c6163f3227..131fbf3cb10d2014546683b5f43194840544fd55 100755
--- a/examples/CoolingHaloWithSpin/run.sh
+++ b/examples/CoolingHaloWithSpin/run.sh
@@ -4,7 +4,8 @@
 echo "Generating initial conditions for the isothermal potential box example..."
 python makeIC.py 10000 
 
-../swift -g -s -C -t 16 cooling_halo.yml 2>&1 | tee output.log
+# Run SWIFT with external potential, SPH and cooling
+../swift -g -s -C -t 1 cooling_halo.yml 2>&1 | tee output.log
 
 # python radial_profile.py 10
 
diff --git a/examples/CoolingHaloWithSpin/velocity_profile.py b/examples/CoolingHaloWithSpin/velocity_profile.py
index d64d255b18482bc26578f21f46199aa3540ae7b5..07df8e1b0751307513c30a5b128773b193c3a9cd 100644
--- a/examples/CoolingHaloWithSpin/velocity_profile.py
+++ b/examples/CoolingHaloWithSpin/velocity_profile.py
@@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"])
 unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"])
 unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"])
 unit_time_cgs = unit_length_cgs / unit_velocity_cgs
-v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"])
+v_c = float(params.attrs["IsothermalPotential:vrot"])
 v_c_cgs = v_c * unit_velocity_cgs
 header = f["Header"]
 N = header.attrs["NumPart_Total"][0]
diff --git a/examples/EAGLE_100/README b/examples/EAGLE_100/README
new file mode 100644
index 0000000000000000000000000000000000000000..e3af3c0e1281f8e9ba9e0aae3fa6dd8475359a47
--- /dev/null
+++ b/examples/EAGLE_100/README
@@ -0,0 +1,16 @@
+ICs extracted from the EAGLE suite of simulations. 
+
+WARNING: The ICs are 217GB in size. They contain ~3.4G DM particles,
+~3.2G gas particles and ~170M star particles
+
+The particle distribution here is the snapshot 27 (z=0.1) of the 100Mpc
+Ref-model. h- and a- factors from the original Gadget code have been
+corrected for. Variables not used in a pure hydro & gravity code have
+been removed. 
+Everything is ready to be run without cosmological integration. 
+
+The particle load of the main EAGLE simulation can be reproduced by
+running these ICs on 4096 cores.
+
+MD5 checksum of the ICs:
+2301ea73e14207b541bbb04163c5269e  EAGLE_ICs_100.hdf5
diff --git a/examples/EAGLE_100/eagle_100.yml b/examples/EAGLE_100/eagle_100.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a9b83b81f085e66b36d115c5265b66d6093ffdfb
--- /dev/null
+++ b/examples/EAGLE_100/eagle_100.yml
@@ -0,0 +1,35 @@
+# Define the system of units to use internally. 
+InternalUnitSystem:
+  UnitMass_in_cgs:     1.989e43      # 10^10 M_sun in grams
+  UnitLength_in_cgs:   3.085678e24   # Mpc in centimeters
+  UnitVelocity_in_cgs: 1e5           # km/s in centimeters per second
+  UnitCurrent_in_cgs:  1             # Amperes
+  UnitTemp_in_cgs:     1             # Kelvin
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1e-2  # The end time of the simulation (in internal units).
+  dt_min:     1e-10 # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters governing the snapshots
+Snapshots:
+  basename:            eagle # Common part of the name of output files
+  time_first:          0.    # Time of the first output (in internal units)
+  delta_time:          1e-3  # Time difference between consecutive outputs (in internal units)
+
+# Parameters governing the conserved quantities statistics
+Statistics:
+  delta_time:          1e-2 # Time between statistics output
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./EAGLE_ICs_100.hdf5     # The file to read
+
diff --git a/examples/EAGLE_100/getIC.sh b/examples/EAGLE_100/getIC.sh
new file mode 100755
index 0000000000000000000000000000000000000000..227df3f9f79d294cd8ccbfd3b72b02dfbea2ebd6
--- /dev/null
+++ b/examples/EAGLE_100/getIC.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_100.hdf5
diff --git a/examples/EAGLE_100/run.sh b/examples/EAGLE_100/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6ef47d5d98172cc8a318242923ede37332bd5590
--- /dev/null
+++ b/examples/EAGLE_100/run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+ # Generate the initial conditions if they are not present.
+if [ ! -e EAGLE_ICs_100.hdf5 ]
+then
+    echo "Fetching initial conditions for the EAGLE 100Mpc example..."
+    ./getIC.sh
+fi
+
+../swift -s -t 16 eagle_100.yml 2>&1 | tee output.log
+
diff --git a/examples/ExternalPointMass/energy_plot.py b/examples/ExternalPointMass/energy_plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..a75fcb835d33b3695170aab822092556f12db7d1
--- /dev/null
+++ b/examples/ExternalPointMass/energy_plot.py
@@ -0,0 +1,121 @@
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
+import h5py
+
+# Plot parameters
+params = {'axes.labelsize': 10,
+'axes.titlesize': 10,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 10,
+'ytick.labelsize': 10,
+'text.usetex': True,
+ 'figure.figsize' : (3.15,3.15),
+'figure.subplot.left'    : 0.145,
+'figure.subplot.right'   : 0.99,
+'figure.subplot.bottom'  : 0.11,
+'figure.subplot.top'     : 0.99,
+'figure.subplot.wspace'  : 0.15,
+'figure.subplot.hspace'  : 0.12,
+'lines.markersize' : 6,
+'lines.linewidth' : 3.,
+'text.latex.unicode': True
+}
+rcParams.update(params)
+rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
+
+import numpy as np
+import h5py as h5
+import sys
+
+# File containing the total energy
+stats_filename = "./energy.txt"
+
+# First snapshot
+snap_filename = "pointMass_000.hdf5"
+f = h5.File(snap_filename,'r')
+
+# Read the units parameters from the snapshot
+units = f["InternalCodeUnits"]
+unit_mass = units.attrs["Unit mass in cgs (U_M)"]
+unit_length = units.attrs["Unit length in cgs (U_L)"]
+unit_time = units.attrs["Unit time in cgs (U_t)"]
+
+G = 6.67408e-8 * unit_mass * unit_time**2 / unit_length**3
+
+# Read the header
+header = f["Header"]
+box_size = float(header.attrs["BoxSize"][0])
+
+# Read the properties of the potential
+parameters = f["Parameters"]
+mass = float(parameters.attrs["PointMassPotential:mass"])
+centre = [box_size/2, box_size/2, box_size/2]
+f.close()
+
+# Read the statistics summary
+file_energy = np.loadtxt("energy.txt")
+time_stats = file_energy[:,0]
+E_kin_stats = file_energy[:,3]
+E_pot_stats = file_energy[:,5]
+E_tot_stats = E_kin_stats + E_pot_stats
+
+# Read the snapshots
+time_snap = np.zeros(402)
+E_kin_snap = np.zeros(402)
+E_pot_snap = np.zeros(402)
+E_tot_snap = np.zeros(402)
+Lz_snap = np.zeros(402)
+
+# Read all the particles from the snapshots
+for i in range(402):
+    snap_filename = "pointMass_%0.3d.hdf5"%i
+    f = h5.File(snap_filename,'r')
+
+    pos_x = f["PartType1/Coordinates"][:,0]
+    pos_y = f["PartType1/Coordinates"][:,1]
+    pos_z = f["PartType1/Coordinates"][:,2]
+    vel_x = f["PartType1/Velocities"][:,0]
+    vel_y = f["PartType1/Velocities"][:,1]
+    vel_z = f["PartType1/Velocities"][:,2]
+    m = f["/PartType1/Masses"][:]
+    
+    r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2)
+    Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:]
+
+    time_snap[i] = f["Header"].attrs["Time"]
+    E_kin_snap[i] = np.sum(0.5 * m * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2))
+    E_pot_snap[i] = np.sum(-mass * m * G / r)
+    E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i]
+    Lz_snap[i] = np.sum(Lz)
+
+# Plot energy evolution
+figure()
+plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy")
+plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy")
+plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy")
+
+plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2)
+plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2)
+plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2)
+
+legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1)
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Energy}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("energy.png", dpi=200)
+
+# Plot angular momentum evolution
+figure()
+plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2)
+
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Angular~momentum}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("angular_momentum.png", dpi=200)
+
+
diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml
index 621a66bbc39838ac8d3d8a8a3992b2a7be3157a8..20b5bb3aa613d553d8c401e968d8ebfc0572e610 100644
--- a/examples/ExternalPointMass/externalPointMass.yml
+++ b/examples/ExternalPointMass/externalPointMass.yml
@@ -9,7 +9,7 @@ InternalUnitSystem:
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   1.    # The end time of the simulation (in internal units).
+  time_end:   8.    # The end time of the simulation (in internal units).
   dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-3  # The maximal time-step size of the simulation (in internal units).
 
@@ -31,7 +31,7 @@ SPH:
 
 # Parameters related to the initial conditions
 InitialConditions:
-  file_name:  Sphere.hdf5           # The file to read
+  file_name:  PointMass.hdf5        # The file to read
   shift_x:    50.                   # A shift to apply to all particles read from the ICs (in internal units).
   shift_y:    50.
   shift_z:    50.
diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py
index 326183398933c88d7348e72e00343064b3e3a64c..ba415daf9e03058239599cc08039fc89e0929393 100644
--- a/examples/ExternalPointMass/makeIC.py
+++ b/examples/ExternalPointMass/makeIC.py
@@ -24,10 +24,10 @@ import numpy
 import math
 import random
 
-# Generates a random distriution of particles, for motion in an external potnetial centred at (0,0,0)
+# Generates a random distriution of particles, for motion in an external potential centred at (0,0,0)
 
 # physical constants in cgs
-NEWTON_GRAVITY_CGS  = 6.672e-8
+NEWTON_GRAVITY_CGS  = 6.67408e-8
 SOLAR_MASS_IN_CGS   = 1.9885e33
 PARSEC_IN_CGS       = 3.0856776e18
 
@@ -39,34 +39,28 @@ const_unit_velocity_in_cgs =   (1e5)
 print "UnitMass_in_cgs:     ", const_unit_mass_in_cgs 
 print "UnitLength_in_cgs:   ", const_unit_length_in_cgs
 print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs
+print "UnitTime_in_cgs:     ", const_unit_length_in_cgs / const_unit_velocity_in_cgs
 
 # derived units
 const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs)
 const_G                = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs)))
-print 'G=', const_G
+print '---------------------'
+print 'G in internal units: ', const_G
 
 
 # Parameters
-periodic= 1            # 1 For periodic box
-boxSize = 100.         # 
-Radius  = boxSize / 4. # maximum radius of particles
-G       = const_G 
-Mass    = 1e10         
+periodic   = 1            # 1 For periodic box
+boxSize    = 100.         # 
+max_radius = boxSize / 4. # maximum radius of particles
+Mass       = 1e10         
+print "Mass at the centre:  ", Mass
 
-N       = int(sys.argv[1])  # Number of particles
-L       = N**(1./3.)
+numPart = int(sys.argv[1])  # Number of particles
+mass    = 1.
 
-# these are not used but necessary for I/O
-rho = 2.              # Density
-P = 1.                # Pressure
-gamma = 5./3.         # Gas adiabatic index
-fileName = "Sphere.hdf5" 
+fileName = "PointMass.hdf5" 
 
 
-#---------------------------------------------------
-numPart        = N
-mass           = 1
-internalEnergy = P / ((gamma - 1.)*rho)
 
 #--------------------------------------------------
 
@@ -98,25 +92,26 @@ grp.attrs["Unit current in cgs (U_I)"] = 1.
 grp.attrs["Unit temperature in cgs (U_T)"] = 1.
 
 #Particle group
-#grp0 = file.create_group("/PartType0")
 grp1 = file.create_group("/PartType1")
+
 #generate particle positions
-radius = Radius * (numpy.random.rand(N))**(1./3.) 
-ctheta = -1. + 2 * numpy.random.rand(N)
-stheta = numpy.sqrt(1.-ctheta**2)
-phi    =  2 * math.pi * numpy.random.rand(N)
+radius = max_radius * (numpy.random.rand(numPart))**(1./3.)
+print '---------------------'
+print 'Radius: minimum = ',min(radius)
+print 'Radius: maximum = ',max(radius)
+radius = numpy.sort(radius)
 r      = numpy.zeros((numPart, 3))
-# r[:,0] = radius * stheta * numpy.cos(phi)
-# r[:,1] = radius * stheta * numpy.sin(phi)
-# r[:,2] = radius * ctheta
 r[:,0] = radius
-#
-speed  = numpy.sqrt(G * Mass / radius)
-v      = numpy.zeros((numPart, 3))
+
+#generate particle velocities
+speed  = numpy.sqrt(const_G * Mass / radius)
 omega  = speed / radius
 period = 2.*math.pi/omega
-print 'period = minimum = ',min(period), ' maximum = ',max(period)
+print '---------------------'
+print 'Period: minimum = ',min(period)
+print 'Period: maximum = ',max(period)
 
+v      = numpy.zeros((numPart, 3))
 v[:,0] = -omega * r[:,1]
 v[:,1] =  omega * r[:,0]
 
@@ -129,17 +124,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f')
 ds[()] = m
 m = numpy.zeros(1)
 
-h = numpy.full((numPart, ), 1.1255 * boxSize / L)
-ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f')
-ds[()] = h
-h = numpy.zeros(1)
-
-u = numpy.full((numPart, ), internalEnergy)
-ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f')
-ds[()] = u
-u = numpy.zeros(1)
-
-
 ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False)
 ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L')
 ds[()] = ids
diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh
index 4ac513f09cb8ac8dcefc256a68478e215b8bc320..e074c384c4e002a161c7d8258e9068663204099f 100755
--- a/examples/ExternalPointMass/run.sh
+++ b/examples/ExternalPointMass/run.sh
@@ -1,10 +1,13 @@
 #!/bin/bash
 
 # Generate the initial conditions if they are not present.
-if [ ! -e Sphere.hdf5 ]
+if [ ! -e PointMass.hdf5 ]
 then
     echo "Generating initial conditions for the point mass potential box example..."
     python makeIC.py 10000
 fi
 
+rm -rf pointMass_*.hdf5
 ../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log
+
+python energy_plot.py
diff --git a/examples/ExternalPointMass/test.pro b/examples/ExternalPointMass/test.pro
deleted file mode 100644
index 21c10e9d27daa45b085c6a659ba3cf7260f017fb..0000000000000000000000000000000000000000
--- a/examples/ExternalPointMass/test.pro
+++ /dev/null
@@ -1,65 +0,0 @@
-;
-;  test energy / angular momentum conservation of test problem
-;
-@physunits
-
-indir    = '/gpfs/data/tt/Codes/Swift-git/swiftsim/examples/'
-basefile = 'output_'
-nfiles   = 657
-nfollow  = 100 ; number of particles to follow
-eout     = fltarr(nfollow, nfiles)
-ekin     = fltarr(nfollow, nfiles)
-epot     = fltarr(nfollow, nfiles)
-tout     = fltarr(nfiles)
-; set properties of potential
-uL  = 1e3 * phys.pc             ; unit of length
-uM  = phys.msun                 ; unit of mass
-uV  = 1d5                       ; unit of velocity
-
-; derived units
-constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
-pcentre  = [50.,50.,50.] * 1d3 * pc / uL
-mextern  = 1d10 * msun / uM
-;
-;
-;
-ifile  = 0
-for ifile=0,nfiles-1 do begin
-;for ifile=0,3 do begin
-   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
-   time   = h5ra(inf, 'Header','Time')
-   p      = h5rd(inf,'PartType1/Coordinates')
-   v      = h5rd(inf,'PartType1/Velocities')
-   id     = h5rd(inf,'PartType1/ParticleIDs')
-   indx   = sort(id)
-;
-   id     = id[indx]
-   for ic=0,2 do begin
-      tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
-      tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
-   endfor
-; calculate energy
-   dd  = size(p,/dimen) & npart = dd[1]
-   ener = fltarr(npart)
-   dr   = fltarr(npart) & dv = dr
-   for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2
-   for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2
-   dr = sqrt(dr)
-;   print,'time = ',time,p[0,0],v[0,0],id[0]
-   ek   = 0.5 * dv
-   ep   = - constG * mextern / dr
-   ener = ek + ep
-   tout(ifile) = time
-   eout(*,ifile) = ener[0:nfollow-1]
-   ekin(*,ifile) = ek[0:nfollow-1]
-   epot(*,ifile) = ep[0:nfollow-1]
-endfor
-
-; calculate relative energy change
-de = 0.0 * eout
-for ifile=1, nfiles -1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0]
-
-
-end
-
-
diff --git a/examples/Feedback/feedback.pro b/examples/Feedback/feedback.pro
deleted file mode 100644
index 02d616fc82f0aeb7011d022d13db9d1d1030e89c..0000000000000000000000000000000000000000
--- a/examples/Feedback/feedback.pro
+++ /dev/null
@@ -1,24 +0,0 @@
-base = 'Feedback'
-inf  = 'Feedback_005.hdf5'
-
-blast  = [5.650488e-01, 5.004371e-01, 5.010494e-01] ; location of blast
-pos    = h5rd(inf,'PartType0/Coordinates')
-vel    = h5rd(inf,'PartType0/Velocities')
-rho    = h5rd(inf,'PartType0/Density')
-utherm = h5rd(inf,'PartType0/InternalEnergy')
-
-; shift to centre
-for ic=0,2 do pos[ic,*] = pos[ic,*] - blast[ic]
-
-;; distance from centre
-dist = fltarr(n_elements(rho))
-for ic=0,2 do dist = dist + pos[ic,*]^2
-dist = sqrt(dist)
-
-; radial velocity
-vr = fltarr(n_elements(rho))
-for ic=0,2 do vr = vr + pos[ic,*]*vel[ic,*]
-vr = vr / dist
-
-; 
-end
diff --git a/examples/Feedback/feedback.yml b/examples/Feedback/feedback.yml
deleted file mode 100644
index de4f7abef1ef538a97a5e38c72b4db5ce2647976..0000000000000000000000000000000000000000
--- a/examples/Feedback/feedback.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-# Define the system of units to use internally. 
-InternalUnitSystem:
-  UnitMass_in_cgs:     1   # Grams
-  UnitLength_in_cgs:   1   # Centimeters
-  UnitVelocity_in_cgs: 1   # Centimeters per second
-  UnitCurrent_in_cgs:  1   # Amperes
-  UnitTemp_in_cgs:     1   # Kelvin
-
-# Parameters governing the time integration
-TimeIntegration:
-  time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   5e-2  # The end time of the simulation (in internal units).
-  dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
-
-# Parameters governing the snapshots
-Snapshots:
-  basename:            Feedback # Common part of the name of output files
-  time_first:          0.       # Time of the first output (in internal units)
-  delta_time:          1e-2     # Time difference between consecutive outputs (in internal units)
-
-# Parameters governing the conserved quantities statistics
-Statistics:
-  delta_time:          1e-3 # Time between statistics output
-
-# Parameters for the hydrodynamics scheme
-SPH:
-  resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
-  delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
-  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
-
-# Parameters related to the initial conditions
-InitialConditions:
-  file_name:  ./Feedback.hdf5          # The file to read
-
-# Parameters for feedback
-
-SN:
-  time:    0.001 # time the SN explodes (internal units)
-  energy:  1.0   # energy of the explosion (internal units)
-  x:       0.5   # x-position of explostion (internal units)
-  y:       0.5   # y-position of explostion (internal units)
-  z:       0.5   # z-position of explostion (internal units)
diff --git a/examples/Feedback/makeIC.py b/examples/Feedback/makeIC.py
deleted file mode 100644
index bd1081a9c275616038f5fa4e3eb943c36cb4c3eb..0000000000000000000000000000000000000000
--- a/examples/Feedback/makeIC.py
+++ /dev/null
@@ -1,109 +0,0 @@
-###############################################################################
- # This file is part of SWIFT.
- # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
- #                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- #               2016 Tom Theuns (tom.theuns@durham.ac.uk)
- # 
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as published
- # by the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- # 
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- # GNU General Public License for more details.
- # 
- # You should have received a copy of the GNU Lesser General Public License
- # along with this program.  If not, see <http://www.gnu.org/licenses/>.
- # 
- ##############################################################################
-
-import h5py
-import sys
-from numpy import *
-
-# Generates a swift IC file containing a cartesian distribution of particles
-# at a constant density and pressure in a cubic box
-
-# Parameters
-periodic= 1           # 1 For periodic box
-boxSize = 1.
-L = int(sys.argv[1])  # Number of particles along one axis
-rho = 1.              # Density
-P = 1.e-6             # Pressure
-gamma = 5./3.         # Gas adiabatic index
-eta = 1.2349          # 48 ngbs with cubic spline kernel
-fileName = "Feedback.hdf5" 
-
-#---------------------------------------------------
-numPart = L**3
-mass = boxSize**3 * rho / numPart
-internalEnergy = P / ((gamma - 1.)*rho)
-
-#--------------------------------------------------
-
-#File
-file = h5py.File(fileName, 'w')
-
-# Header
-grp = file.create_group("/Header")
-grp.attrs["BoxSize"] = boxSize
-grp.attrs["NumPart_Total"] =  [numPart, 0, 0, 0, 0, 0]
-grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
-grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0]
-grp.attrs["Time"] = 0.0
-grp.attrs["NumFilesPerSnapshot"] = 1
-grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-grp.attrs["Flag_Entropy_ICs"] = 0
-
-#Runtime parameters
-grp = file.create_group("/RuntimePars")
-grp.attrs["PeriodicBoundariesOn"] = periodic
-
-#Units
-grp = file.create_group("/Units")
-grp.attrs["Unit length in cgs (U_L)"] = 1.
-grp.attrs["Unit mass in cgs (U_M)"] = 1.
-grp.attrs["Unit time in cgs (U_t)"] = 1.
-grp.attrs["Unit current in cgs (U_I)"] = 1.
-grp.attrs["Unit temperature in cgs (U_T)"] = 1.
-
-#Particle group
-grp = file.create_group("/PartType0")
-
-v  = zeros((numPart, 3))
-ds = grp.create_dataset('Velocities', (numPart, 3), 'f')
-ds[()] = v
-v = zeros(1)
-
-m = full((numPart, 1), mass)
-ds = grp.create_dataset('Masses', (numPart,1), 'f')
-ds[()] = m
-m = zeros(1)
-
-h = full((numPart, 1), eta * boxSize / L)
-ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f')
-ds[()] = h
-h = zeros(1)
-
-u = full((numPart, 1), internalEnergy)
-ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f')
-ds[()] = u
-u = zeros(1)
-
-
-ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1))
-ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L')
-ds[()] = ids + 1
-x      = ids % L;
-y      = ((ids - x) / L) % L;
-z      = (ids - x - L * y) / L**2;
-coords = zeros((numPart, 3))
-coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
-coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
-ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
-ds[()] = coords
-
-file.close()
diff --git a/examples/HydrostaticHalo/hydrostatic.yml b/examples/HydrostaticHalo/hydrostatic.yml
index 39a91a4ec475a70ef4e61b9cdc59b8221a74093e..d20d6018f323de0628a0500d8ba767018711fd0a 100644
--- a/examples/HydrostaticHalo/hydrostatic.yml
+++ b/examples/HydrostaticHalo/hydrostatic.yml
@@ -34,7 +34,7 @@ InitialConditions:
   file_name:  Hydrostatic.hdf5       # The file to read
  
 # External potential parameters
-SoftenedIsothermalPotential:
+IsothermalPotential:
   position_x:      0.     # location of centre of isothermal potential in internal units
   position_y:      0.
   position_z:      0.	
diff --git a/examples/IsothermalPotential/energy_plot.py b/examples/IsothermalPotential/energy_plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..0afa6fa93fa2a992e6ddeab3c9d33538c0b41de3
--- /dev/null
+++ b/examples/IsothermalPotential/energy_plot.py
@@ -0,0 +1,120 @@
+import matplotlib
+matplotlib.use("Agg")
+from pylab import *
+import h5py
+
+# Plot parameters
+params = {'axes.labelsize': 10,
+'axes.titlesize': 10,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 10,
+'ytick.labelsize': 10,
+'text.usetex': True,
+ 'figure.figsize' : (3.15,3.15),
+'figure.subplot.left'    : 0.145,
+'figure.subplot.right'   : 0.99,
+'figure.subplot.bottom'  : 0.11,
+'figure.subplot.top'     : 0.99,
+'figure.subplot.wspace'  : 0.15,
+'figure.subplot.hspace'  : 0.12,
+'lines.markersize' : 6,
+'lines.linewidth' : 3.,
+'text.latex.unicode': True
+}
+rcParams.update(params)
+rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
+
+import numpy as np
+import h5py as h5
+import sys
+
+# File containing the total energy
+stats_filename = "./energy.txt"
+
+# First snapshot
+snap_filename = "Isothermal_000.hdf5"
+f = h5.File(snap_filename,'r')
+
+# Read the units parameters from the snapshot
+units = f["InternalCodeUnits"]
+unit_mass = units.attrs["Unit mass in cgs (U_M)"]
+unit_length = units.attrs["Unit length in cgs (U_L)"]
+unit_time = units.attrs["Unit time in cgs (U_t)"]
+
+# Read the header
+header = f["Header"]
+box_size = float(header.attrs["BoxSize"][0])
+
+# Read the properties of the potential
+parameters = f["Parameters"]
+R200 = 100 
+Vrot = float(parameters.attrs["IsothermalPotential:vrot"])
+centre = [box_size/2, box_size/2, box_size/2]
+f.close()
+
+# Read the statistics summary
+file_energy = np.loadtxt("energy.txt")
+time_stats = file_energy[:,0]
+E_kin_stats = file_energy[:,3]
+E_pot_stats = file_energy[:,5]
+E_tot_stats = E_kin_stats + E_pot_stats
+
+# Read the snapshots
+time_snap = np.zeros(402)
+E_kin_snap = np.zeros(402)
+E_pot_snap = np.zeros(402)
+E_tot_snap = np.zeros(402)
+Lz_snap = np.zeros(402)
+
+# Read all the particles from the snapshots
+for i in range(402):
+    snap_filename = "Isothermal_%0.3d.hdf5"%i
+    f = h5.File(snap_filename,'r')
+
+    pos_x = f["PartType1/Coordinates"][:,0]
+    pos_y = f["PartType1/Coordinates"][:,1]
+    pos_z = f["PartType1/Coordinates"][:,2]
+    vel_x = f["PartType1/Velocities"][:,0]
+    vel_y = f["PartType1/Velocities"][:,1]
+    vel_z = f["PartType1/Velocities"][:,2]
+    mass = f["/PartType1/Masses"][:]
+    
+    r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2)
+    Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:]
+
+    time_snap[i] = f["Header"].attrs["Time"]
+    E_kin_snap[i] = np.sum(0.5 * mass * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2))
+    E_pot_snap[i] = np.sum(-mass * Vrot**2 *  log(r))
+    E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i]
+    Lz_snap[i] = np.sum(Lz)
+
+# Plot energy evolution
+figure()
+plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy")
+plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy")
+plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy")
+
+plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2)
+plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2)
+plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2)
+
+legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1)
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Energy}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("energy.png", dpi=200)
+
+# Plot angular momentum evolution
+figure()
+plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2)
+
+xlabel("${\\rm{Time}}$", labelpad=0)
+ylabel("${\\rm{Angular~momentum}}$",labelpad=0)
+xlim(0, 8)
+
+savefig("angular_momentum.png", dpi=200)
+
+
diff --git a/examples/IsothermalPotential/isothermal.yml b/examples/IsothermalPotential/isothermal.yml
index 0de99779f07591a5b71be11b75bc56ec741ddaed..8d9ec3875e405d95a89b3486bca5fd3465a3e20d 100644
--- a/examples/IsothermalPotential/isothermal.yml
+++ b/examples/IsothermalPotential/isothermal.yml
@@ -15,7 +15,7 @@ TimeIntegration:
 
 # Parameters governing the conserved quantities statistics
 Statistics:
-  delta_time:          1e-2 # Time between statistics output
+  delta_time:          1e-3 # Time between statistics output
   
 # Parameters governing the snapshots
 Snapshots:
@@ -23,25 +23,18 @@ Snapshots:
   time_first:          0.         # Time of the first output (in internal units)
   delta_time:          0.02       # Time difference between consecutive outputs (in internal units)
 
-# Parameters for the hydrodynamics scheme
-SPH:
-  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
-  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
-  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
-  max_smoothing_length:  40.      # Maximal smoothing length allowed (in internal units).
-
 # Parameters related to the initial conditions
 InitialConditions:
   file_name:  Isothermal.hdf5       # The file to read
-  shift_x:    100.                  # A shift to apply to all particles read from the ICs (in internal units).
-  shift_y:    100.
-  shift_z:    100.
+  shift_x:    200.                  # Shift all particles to be in the potential
+  shift_y:    200.
+  shift_z:    200.
  
 # External potential parameters
 IsothermalPotential:
-  position_x:      100.     # location of centre of isothermal potential in internal units
-  position_y:      100.
-  position_z:      100.	
+  position_x:      0.       # location of centre of isothermal potential in internal units
+  position_y:      0.
+  position_z:      0.
   vrot:            200.     # rotation speed of isothermal potential in internal units
-  timestep_mult:   0.03     # controls time step
-
+  timestep_mult:   0.01     # controls time step
+  epsilon:         0.       # No softening at the centre of the halo
diff --git a/examples/IsothermalPotential/makeIC.py b/examples/IsothermalPotential/makeIC.py
index 976119f0a312c5acc81fab943ba3cf5769102269..7d1c5361f9a255365517226e49c55a8a50c4d6ce 100644
--- a/examples/IsothermalPotential/makeIC.py
+++ b/examples/IsothermalPotential/makeIC.py
@@ -30,10 +30,10 @@ import random
 # all particles move in the xy plane, and start at y=0
 
 # physical constants in cgs
-NEWTON_GRAVITY_CGS  = 6.672e-8
+NEWTON_GRAVITY_CGS  = 6.67408e-8
 SOLAR_MASS_IN_CGS   = 1.9885e33
 PARSEC_IN_CGS       = 3.0856776e18
-PROTON_MASS_IN_CGS  = 1.6726231e24
+PROTON_MASS_IN_CGS  = 1.672621898e24
 YEAR_IN_CGS         = 3.154e+7
 
 # choice of units
@@ -66,17 +66,12 @@ N       = int(sys.argv[1])  # Number of particles
 icirc   = int(sys.argv[2])  # if = 0, all particles are on circular orbits, if = 1, Lz/Lcirc uniform in ]0,1[
 L       = N**(1./3.)
 
-# these are not used but necessary for I/O
-rho = 2.              # Density
-P = 1.                # Pressure
-gamma = 5./3.         # Gas adiabatic index
 fileName = "Isothermal.hdf5" 
 
 
 #---------------------------------------------------
 numPart        = N
 mass           = 1
-internalEnergy = P / ((gamma - 1.)*rho)
 
 #--------------------------------------------------
 
@@ -111,7 +106,6 @@ grp.attrs["PeriodicBoundariesOn"] = periodic
 numpy.random.seed(1234)
 
 #Particle group
-#grp0 = file.create_group("/PartType0")
 grp1 = file.create_group("/PartType1")
 #generate particle positions
 radius = Radius * (numpy.random.rand(N))**(1./3.) 
@@ -119,10 +113,8 @@ ctheta = -1. + 2 * numpy.random.rand(N)
 stheta = numpy.sqrt(1.-ctheta**2)
 phi    =  2 * math.pi * numpy.random.rand(N)
 r      = numpy.zeros((numPart, 3))
-#r[:,0] = radius * stheta * numpy.cos(phi)
-#r[:,1] = radius * stheta * numpy.sin(phi)
-#r[:,2] = radius * ctheta
 r[:,0] = radius
+
 #
 speed  = vrot
 v      = numpy.zeros((numPart, 3))
@@ -146,17 +138,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f')
 ds[()] = m
 m = numpy.zeros(1)
 
-h = numpy.full((numPart, ), 1.1255 * boxSize / L,  dtype='f')
-ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f')
-ds[()] = h
-h = numpy.zeros(1)
-
-u = numpy.full((numPart, ), internalEnergy,  dtype='f')
-ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f')
-ds[()] = u
-u = numpy.zeros(1)
-
-
 ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False, dtype='L')
 ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L')
 ds[()] = ids
diff --git a/examples/IsothermalPotential/run.sh b/examples/IsothermalPotential/run.sh
index 28a3cc0910f986f84bcd603091543643356f1c4a..976fbddc01cf7a3dcbb114d437ddb8f03b4d54bd 100755
--- a/examples/IsothermalPotential/run.sh
+++ b/examples/IsothermalPotential/run.sh
@@ -7,4 +7,7 @@ then
     python makeIC.py 1000 1
 fi
 
+rm -rf Isothermal_*.hdf5
 ../swift -g -t 1 isothermal.yml 2>&1 | tee output.log
+
+python energy_plot.py
diff --git a/examples/IsothermalPotential/test.pro b/examples/IsothermalPotential/test.pro
deleted file mode 100644
index edfa50121d2e5adb7e039f3c38d6d4c0b4d5e34f..0000000000000000000000000000000000000000
--- a/examples/IsothermalPotential/test.pro
+++ /dev/null
@@ -1,168 +0,0 @@
-;
-;  test energy / angular momentum conservation of test problem
-;
-
-iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy
-
-; set physical constants
-@physunits
-
-indir    = './'
-basefile = 'Isothermal_'
-
-; set properties of potential
-uL   = 1e3 * phys.pc             ; unit of length
-uM   = phys.msun                 ; unit of mass
-uV   = 1d5                       ; unit of velocity
-vrot = 200.                      ; km/s
-r200 = 100.                      ; virial radius
-
-; derived units
-constG   = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ;
-pcentre  = [100.,100.,100.] * 1d3 * pc / uL
-
-;
-infile = indir + basefile + '*'
-spawn,'ls -1 '+infile,res
-nfiles = n_elements(res)
-
-
-
-; choose: calculate change of energy and Lz, comparing first and last
-; snapshots for all particles, or do so for a subset
-
-; compare all
-ifile   = 0
-inf     = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
-id      = h5rd(inf,'PartType1/ParticleIDs')
-nfollow = n_elements(id)
-
-; follow a subset
-nfollow  = 500                    ; number of particles to follow
-
-;
-if (iplot eq 1) then begin
-   nskip = 1
-   nsave = nfiles
-endif else begin
-   nskip = nfiles - 2
-   nsave = 2
-endelse
-
-;
-lout     = fltarr(nfollow, nsave) ; Lz
-xout     = fltarr(nfollow, nsave) ; x
-yout     = fltarr(nfollow, nsave) ; y
-zout     = fltarr(nfollow, nsave) ; z
-eout     = fltarr(nfollow, nsave) ; energies
-ekin     = fltarr(nfollow, nsave)
-epot     = fltarr(nfollow, nsave)
-tout     = fltarr(nsave)
-
-
-
-ifile  = 0
-isave = 0
-for ifile=0,nfiles-1,nskip do begin
-   inf    = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5'
-   time   = h5ra(inf, 'Header','Time')
-   p      = h5rd(inf,'PartType1/Coordinates')
-   v      = h5rd(inf,'PartType1/Velocities')
-   id     = h5rd(inf,'PartType1/ParticleIDs')
-   indx   = sort(id)
-;
-   id     = id[indx]
-   for ic=0,2 do begin
-      tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx]
-      tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx]
-   endfor
-
-
-; calculate energy
-   dd  = size(p,/dimen) & npart = dd[1]
-   ener = fltarr(npart)
-   dr   = fltarr(npart) & dv = dr
-   for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2
-   for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2
-   xout[*,isave] = p[0,0:nfollow-1]-pcentre[0]
-   yout[*,isave] = p[1,0:nfollow-1]-pcentre[1]
-   zout[*,isave] = p[2,0:nfollow-1]-pcentre[2]
-   Lz  = (p[0,*]-pcentre[0]) * v[1,*] - (p[1,*]-pcentre[1]) * v[0,*]
-   dr = sqrt(dr)
-;   print,'time = ',time,p[0,0],v[0,0],id[0]
-   ek   = 0.5 * dv
-;   ep   = - constG * mextern / dr
-   ep   = -vrot*vrot * (1 + alog(r200/dr))
-   ener = ek + ep
-   tout(isave) = time
-   lout[*,isave] = lz[0:nfollow-1]
-   eout(*,isave) = ener[0:nfollow-1]
-   ekin(*,isave) = ek[0:nfollow-1]
-   epot(*,isave) = ep[0:nfollow-1]
-
-;  write some output
-;   print,' time= ',time,' e= ',eout[0],' Lz= ',lz[0],format='(%a %f %a
-;   %f)'
-   print,format='('' time= '',f7.1,'' E= '',f9.2,'' Lz= '',e9.2)', time,eout[0],lz[0]
-   isave = isave + 1
-   
-endfor
-x0 = reform(xout[0,*])
-y0 = reform(xout[1,*])
-z0 = reform(xout[2,*])
-
-; calculate relative energy change
-de    = 0.0 * eout
-dl    = 0.0 * lout
-nsave = isave
-for ifile=1, nsave-1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0]
-for ifile=1, nsave-1 do dl[*,ifile] = (lout[*,ifile] - lout[*,0])/lout[*,0]
-
-
-; calculate statistics of energy changes
-print,' relatve energy change: (per cent) ',minmax(de) * 100.
-print,' relative Lz    change: (per cent) ',minmax(dl) * 100.
-
-; plot enery and Lz conservation for some particles
-if(iplot eq 1) then begin
-; plot results on energy conservation for some particles
-   nplot = min(10, nfollow)
-   win,0
-   xr = [min(tout), max(tout)]
-   yr = [-2,2]*1d-2             ; in percent
-   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='time',ytitle='dE/E, dL/L (%)'
-   for i=0,nplot-1 do oplot,tout,de[i,*]
-   for i=0,nplot-1 do oplot,tout,dl[i,*],color=red
-   legend,['dE/E','dL/L'],linestyle=[0,0],color=[black,red],box=0,/bottom,/left
-   screen_to_png,'e-time.png'
-
-;  plot orbits of those particles
-   win,2
-   xr = [-100,100]
-   yr = xr
-   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/iso,/nodata,xtitle='x',ytitle='y'
-   color = floor(findgen(nplot)*255/float(nplot))
-   for i=0,nplot-1 do oplot,xout[i,*],yout[i,*],color=color(i)
-   screen_to_png,'orbit.png'
-
-; plot radial position of these particles
-   win,4
-   xr = [min(tout), max(tout)]
-   yr = [0,80]
-   plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='t',ytitle='r'
-   color = floor(findgen(nplot)*255/float(nplot))
-for i=0,nplot-1 do begin dr = sqrt(reform(xout[i,*])^2 + reform(yout[i,*])^2) &  oplot,tout,dr,color=color[i] & endfor
-   screen_to_png,'r-time.png'
-
-; make histogram of energy changes at end
-   win,6
-   ohist,de,x,y,-0.05,0.05,0.001
-   plot,x,y,psym=10,xtitle='de (%)'
-   screen_to_png,'de-hist.png'
-
-
-endif
-
-end
-
-
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 4da84788a485dacd2103fe85ad3e729ade6b582a..dd13fb7eb4b82fbbfbb1ae450e20d01b13f2a455 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS)
 AM_LDFLAGS = $(HDF5_LDFLAGS)
 
 # Extra libraries.
-EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
+EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS)
 
 # MPI libraries.
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
@@ -63,11 +63,11 @@ EXTRA_DIST = BigCosmoVolume/makeIC.py \
 	     EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \
 	     EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \
 	     EAGLE_50/eagle_50.yml EAGLE_50/getIC.sh EAGLE_50/README EAGLE_50/run.sh \
-	     ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/test.pro \
+	     ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/energy_plot.py \
 	     GreshoVortex_2D/getGlass.sh GreshoVortex_2D/gresho.yml GreshoVortex_2D/makeIC.py GreshoVortex_2D/plotSolution.py GreshoVortex_2D/run.sh \
 	     HydrostaticHalo/README HydrostaticHalo/hydrostatic.yml HydrostaticHalo/makeIC.py HydrostaticHalo/run.sh \
 	     HydrostaticHalo/density_profile.py HydrostaticHalo/velocity_profile.py HydrostaticHalo/internal_energy_profile.py HydrostaticHalo/test_energy_conservation.py \
-	     IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/test.pro IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \
+	     IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/energy_plot.py IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \
 	     KelvinHelmholtz_2D/kelvinHelmholtz.yml KelvinHelmholtz_2D/makeIC.py KelvinHelmholtz_2D/plotSolution.py KelvinHelmholtz_2D/run.sh \
 	     MultiTypes/makeIC.py  MultiTypes/multiTypes.yml MultiTypes/run.sh \
 	     PerturbedBox_2D/makeIC.py PerturbedBox_2D/perturbedPlane.yml \
diff --git a/examples/MultiTypes/makeIC.py b/examples/MultiTypes/makeIC.py
index 229450b67c02258553b588483d7cbd4fef887817..88330a6de25e23bf007615f9e9ca24e66065413c 100644
--- a/examples/MultiTypes/makeIC.py
+++ b/examples/MultiTypes/makeIC.py
@@ -36,6 +36,9 @@ eta = 1.2349             # 48 ngbs with cubic spline kernel
 rhoDM = 1.
 Ldm = int(sys.argv[2])  # Number of particles along one axis
 
+massStars = 0.1
+Lstars = int(sys.argv[3])  # Number of particles along one axis
+
 fileName = "multiTypes.hdf5"
 
 #---------------------------------------------------
@@ -46,6 +49,10 @@ internalEnergy = P / ((gamma - 1.)*rhoGas)
 numDM = Ldm**3
 massDM = boxSize**3 * rhoDM / numDM
 
+numStars = Lstars**3
+massStars = massDM * massStars
+
+
 #--------------------------------------------------
 
 #File
@@ -54,9 +61,9 @@ file = h5py.File(fileName, 'w')
 # Header
 grp = file.create_group("/Header")
 grp.attrs["BoxSize"] = boxSize
-grp.attrs["NumPart_Total"] =  [numGas, numDM, 0, 0, 0, 0]
+grp.attrs["NumPart_Total"] =  [numGas, numDM, 0, 0, numStars, 0]
 grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0]
-grp.attrs["NumPart_ThisFile"] = [numGas, numDM, 0, 0, 0, 0]
+grp.attrs["NumPart_ThisFile"] = [numGas, numDM, 0, 0, numStars, 0]
 grp.attrs["Time"] = 0.0
 grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, massDM, 0.0, 0.0, 0.0, 0.0]
@@ -142,4 +149,33 @@ coords[:,2] = x[:,0] * boxSize / Ldm + boxSize / (2*Ldm)
 ds = grp.create_dataset('Coordinates', (numDM, 3), 'd')
 ds[()] = coords
 
+
+
+# Star Particle group
+grp = file.create_group("/PartType4")
+
+v  = zeros((numStars, 3))
+ds = grp.create_dataset('Velocities', (numStars, 3), 'f')
+ds[()] = v
+v = zeros(1)
+
+m = full((numStars, 1), massStars)
+ds = grp.create_dataset('Masses', (numStars,1), 'f')
+ds[()] = m
+m = zeros(1)
+
+ids = linspace(0, numStars, numStars, endpoint=False).reshape((numStars,1))
+ds = grp.create_dataset('ParticleIDs', (numStars, 1), 'L')
+ds[()] = ids + Lgas**3 + 1
+x      = ids % Ldm;
+y      = ((ids - x) / Ldm) % Ldm;
+z      = (ids - x - Ldm * y) / Ldm**2;
+coords = zeros((numStars, 3))
+coords[:,0] = z[:,0] * boxSize / Ldm + boxSize / (2*Ldm)
+coords[:,1] = y[:,0] * boxSize / Ldm + boxSize / (2*Ldm)
+coords[:,2] = x[:,0] * boxSize / Ldm + boxSize / (2*Ldm)
+ds = grp.create_dataset('Coordinates', (numStars, 3), 'd')
+ds[()] = coords
+
+
 file.close()
diff --git a/examples/MultiTypes/run.sh b/examples/MultiTypes/run.sh
index 57465ce0ba6dde3988359df990f2a93323dbc617..508a5097f8961f446a51204e889875e33d4f634e 100755
--- a/examples/MultiTypes/run.sh
+++ b/examples/MultiTypes/run.sh
@@ -4,7 +4,7 @@
 if [ ! -e multiTypes.hdf5 ]
 then
     echo "Generating initial conditions for the multitype box example..."
-    python makeIC.py 50 60
+    python makeIC.py 17 24 12
 fi
 
-../swift -s -g -t 16 multiTypes.yml 2>&1 | tee output.log
+../swift -s -g -S -t 1 multiTypes.yml 2>&1 | tee output.log
diff --git a/examples/Stellar_Disk/makeIC.py b/examples/Stellar_Disk/makeIC.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5ec65d3fa76d8b377bdaacf3b43a36ab560115e
--- /dev/null
+++ b/examples/Stellar_Disk/makeIC.py
@@ -0,0 +1,3 @@
+import numpy as np
+import h5py as h5
+
diff --git a/examples/Stellar_Disk/stellar_disk.yml b/examples/Stellar_Disk/stellar_disk.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1d17f96be7cf476f1959544fd221d7c8b7919915
--- /dev/null
+++ b/examples/Stellar_Disk/stellar_disk.yml
@@ -0,0 +1 @@
+IsothermalPotential
\ No newline at end of file
diff --git a/examples/main.c b/examples/main.c
index 8b00c569e4e151fc00a695376528949462e529c6..9426597bca62174f68b09a2813aa3381962c4fe0 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -83,6 +83,7 @@ void print_help_message() {
          "Execute a fixed number of time steps. When unset use the time_end "
          "parameter to stop.");
   printf("  %2s %8s %s\n", "-s", "", "Run with SPH");
+  printf("  %2s %8s %s\n", "-S", "", "Run with stars");
   printf("  %2s %8s %s\n", "-t", "{int}",
          "The number of threads to use on each MPI rank. Defaults to 1 if not "
          "specified.");
@@ -156,6 +157,7 @@ int main(int argc, char *argv[]) {
   int with_cooling = 0;
   int with_self_gravity = 0;
   int with_hydro = 0;
+  int with_stars = 0;
   int with_fp_exceptions = 0;
   int with_drift_all = 0;
   int verbose = 0;
@@ -165,7 +167,7 @@ int main(int argc, char *argv[]) {
 
   /* Parse the parameters */
   int c;
-  while ((c = getopt(argc, argv, "acCdDef:FgGhn:st:v:y:")) != -1) switch (c) {
+  while ((c = getopt(argc, argv, "acCdDef:FgGhn:sSt:v:y:")) != -1) switch (c) {
       case 'a':
         with_aff = 1;
         break;
@@ -213,6 +215,9 @@ int main(int argc, char *argv[]) {
       case 's':
         with_hydro = 1;
         break;
+      case 'S':
+        with_stars = 1;
+        break;
       case 't':
         if (sscanf(optarg, "%d", &nr_threads) != 1) {
           if (myrank == 0)
@@ -269,6 +274,9 @@ int main(int argc, char *argv[]) {
   /* Genesis 1.1: And then, there was time ! */
   clocks_set_cpufreq(cpufreq);
 
+  /* How vocal are we ? */
+  const int talking = (verbose == 1 && myrank == 0) || (verbose == 2);
+
   if (myrank == 0 && dry_run)
     message(
         "Executing a dry run. No i/o or time integration will be performed.");
@@ -281,7 +289,7 @@ int main(int argc, char *argv[]) {
 
 /* Report host name(s). */
 #ifdef WITH_MPI
-  if (myrank == 0 || verbose > 1) {
+  if (talking) {
     message("Rank %d running on: %s", myrank, hostname());
   }
 #else
@@ -290,27 +298,27 @@ int main(int argc, char *argv[]) {
 
 /* Do we have debugging checks ? */
 #ifdef SWIFT_DEBUG_CHECKS
-  message("WARNING: Debugging checks activated. Code will be slower !");
+  if (myrank == 0)
+    message("WARNING: Debugging checks activated. Code will be slower !");
 #endif
 
   /* Do we choke on FP-exceptions ? */
   if (with_fp_exceptions) {
     feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
-    if (myrank == 0) message("Floating point exceptions will be reported.");
+    if (myrank == 0)
+      message("WARNING: Floating point exceptions will be reported.");
   }
 
   /* How large are the parts? */
   if (myrank == 0) {
     message("sizeof(struct part)  is %4zi bytes.", sizeof(struct part));
     message("sizeof(struct xpart) is %4zi bytes.", sizeof(struct xpart));
+    message("sizeof(struct spart) is %4zi bytes.", sizeof(struct spart));
     message("sizeof(struct gpart) is %4zi bytes.", sizeof(struct gpart));
     message("sizeof(struct task)  is %4zi bytes.", sizeof(struct task));
     message("sizeof(struct cell)  is %4zi bytes.", sizeof(struct cell));
   }
 
-  /* How vocal are we ? */
-  const int talking = (verbose == 1 && myrank == 0) || (verbose == 2);
-
   /* Read the parameter file */
   struct swift_params *params = malloc(sizeof(struct swift_params));
   if (params == NULL) error("Error allocating memory for the parameter file.");
@@ -358,7 +366,7 @@ int main(int argc, char *argv[]) {
 
   /* Initialise the hydro properties */
   struct hydro_props hydro_properties;
-  hydro_props_init(&hydro_properties, params);
+  if (with_hydro) hydro_props_init(&hydro_properties, params);
 
   /* Read particles and space information from (GADGET) ICs */
   char ICfileName[200] = "";
@@ -366,26 +374,32 @@ int main(int argc, char *argv[]) {
   if (myrank == 0) message("Reading ICs from file '%s'", ICfileName);
   fflush(stdout);
 
+  /* Get ready to read particles of all kinds */
   struct part *parts = NULL;
   struct gpart *gparts = NULL;
-  size_t Ngas = 0, Ngpart = 0;
+  struct spart *sparts = NULL;
+  size_t Ngas = 0, Ngpart = 0, Nspart = 0;
   double dim[3] = {0., 0., 0.};
   int periodic = 0;
   int flag_entropy_ICs = 0;
   if (myrank == 0) clocks_gettime(&tic);
 #if defined(WITH_MPI)
 #if defined(HAVE_PARALLEL_HDF5)
-  read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &Ngas, &Ngpart,
-                   &periodic, &flag_entropy_ICs, myrank, nr_nodes,
-                   MPI_COMM_WORLD, MPI_INFO_NULL, dry_run);
+  read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas,
+                   &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro,
+                   (with_external_gravity || with_self_gravity), with_stars,
+                   myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run);
 #else
-  read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &Ngas, &Ngpart,
-                 &periodic, &flag_entropy_ICs, myrank, nr_nodes, MPI_COMM_WORLD,
-                 MPI_INFO_NULL, dry_run);
+  read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart,
+                 &Nspart, &periodic, &flag_entropy_ICs, with_hydro,
+                 (with_external_gravity || with_self_gravity), with_stars,
+                 myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run);
 #endif
 #else
-  read_ic_single(ICfileName, &us, dim, &parts, &gparts, &Ngas, &Ngpart,
-                 &periodic, &flag_entropy_ICs, dry_run);
+  read_ic_single(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart,
+                 &Nspart, &periodic, &flag_entropy_ICs, with_hydro,
+                 (with_external_gravity || with_self_gravity), with_stars,
+                 dry_run);
 #endif
   if (myrank == 0) {
     clocks_gettime(&toc);
@@ -394,40 +408,40 @@ int main(int argc, char *argv[]) {
     fflush(stdout);
   }
 
-  /* Discard gparts if we don't have gravity
-   * (Better implementation of i/o will come)*/
-  if (!with_external_gravity && !with_self_gravity) {
-    free(gparts);
-    gparts = NULL;
-    for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL;
-    Ngpart = 0;
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check once and for all that we don't have unwanted links */
+  if (!with_stars) {
+    for (size_t k = 0; k < Ngpart; ++k)
+      if (gparts[k].type == swift_type_star) error("Linking problem");
   }
   if (!with_hydro) {
-    free(parts);
-    parts = NULL;
     for (size_t k = 0; k < Ngpart; ++k)
-      if (gparts[k].id_or_neg_offset < 0) error("Linking problem");
-    Ngas = 0;
+      if (gparts[k].type == swift_type_gas) error("Linking problem");
   }
+#endif
 
   /* Get the total number of particles across all nodes. */
-  long long N_total[2] = {0, 0};
+  long long N_total[3] = {0, 0, 0};
 #if defined(WITH_MPI)
-  long long N_long[2] = {Ngas, Ngpart};
-  MPI_Reduce(&N_long, &N_total, 2, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
+  long long N_long[3] = {Ngas, Ngpart, Nspart};
+  MPI_Reduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, 0,
+             MPI_COMM_WORLD);
 #else
   N_total[0] = Ngas;
   N_total[1] = Ngpart;
+  N_total[2] = Nspart;
 #endif
   if (myrank == 0)
-    message("Read %lld gas particles and %lld gparts from the ICs.", N_total[0],
-            N_total[1]);
+    message(
+        "Read %lld gas particles, %lld star particles and %lld gparts from the "
+        "ICs.",
+        N_total[0], N_total[2], N_total[1]);
 
   /* Initialize the space with these data. */
   if (myrank == 0) clocks_gettime(&tic);
   struct space s;
-  space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic,
-             with_self_gravity, talking, dry_run);
+  space_init(&s, params, dim, parts, gparts, sparts, Ngas, Ngpart, Nspart,
+             periodic, with_self_gravity, talking, dry_run);
   if (myrank == 0) {
     clocks_gettime(&toc);
     message("space_init took %.3f %s.", clocks_diff(&tic, &toc),
@@ -487,6 +501,7 @@ int main(int argc, char *argv[]) {
   if (with_cosmology) engine_policies |= engine_policy_cosmology;
   if (with_cooling) engine_policies |= engine_policy_cooling;
   if (with_sourceterms) engine_policies |= engine_policy_sourceterms;
+  if (with_stars) engine_policies |= engine_policy_stars;
 
   /* Initialize the engine with the space and policies. */
   if (myrank == 0) clocks_gettime(&tic);
@@ -508,11 +523,16 @@ int main(int argc, char *argv[]) {
 
   /* Get some info to the user. */
   if (myrank == 0) {
+    long long N_DM = N_total[1] - N_total[2] - N_total[0];
+    message(
+        "Running on %lld gas particles, %lld star particles and %lld DM "
+        "particles (%lld gravity particles)",
+        N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]);
     message(
-        "Running on %lld gas particles and %lld DM particles from t=%.3e until "
-        "t=%.3e with %d threads and %d queues (dt_min=%.3e, dt_max=%.3e)...",
-        N_total[0], N_total[1], e.timeBegin, e.timeEnd, e.nr_threads,
-        e.sched.nr_queues, e.dt_min, e.dt_max);
+        "from t=%.3e until t=%.3e with %d threads and %d queues (dt_min=%.3e, "
+        "dt_max=%.3e)...",
+        e.timeBegin, e.timeEnd, e.nr_threads, e.sched.nr_queues, e.dt_min,
+        e.dt_max);
     fflush(stdout);
   }
 
@@ -543,8 +563,9 @@ int main(int argc, char *argv[]) {
 
   /* Legend */
   if (myrank == 0)
-    printf("# %6s %14s %14s %10s %10s %16s [%s]\n", "Step", "Time", "Time-step",
-           "Updates", "g-Updates", "Wall-clock time", clocks_getunit());
+    printf("# %6s %14s %14s %10s %10s %10s %16s [%s]\n", "Step", "Time",
+           "Time-step", "Updates", "g-Updates", "s-Updates", "Wall-clock time",
+           clocks_getunit());
 
   /* Main simulation loop */
   for (int j = 0; !engine_is_done(&e) && e.step != nsteps; j++) {
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
index 6300d4831a50187b84c0de3d438e7692ef5719a3..6a35968e65f2fca0202320aea22bf75bd5d5e1b8 100644
--- a/examples/parameter_example.yml
+++ b/examples/parameter_example.yml
@@ -82,15 +82,7 @@ IsothermalPotential:
   position_z:      100.
   vrot:            200.     # Rotation speed of isothermal potential (internal units)
   timestep_mult:   0.03     # Dimensionless pre-factor for the time-step condition
-
-# External potential parameters
-SoftenedIsothermalPotential:
-  position_x:      0.       # Location of centre of isothermal potential with respect to centre of the box (internal units)
-  position_y:      0.
-  position_z:      0.	
-  vrot:            200.     # rotation speed of isothermal potential (internal units)
   epsilon:         0.1      # Softening size (internal units)
-  timestep_mult:   0.03     # controls time step
   
 # Disk-patch potential parameters
 DiscPatchPotential:
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 6295c81a5f2fdb1e726cdf0a8fb43713004800f1..978448b3cd049c6ff31a92c7255851390ccc700c 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -55,40 +55,44 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost",
-             "extra_ghost", "kick", "send", "recv",
-             "grav_gather_m", "grav_fft", "grav_mm", "grav_up",
-             "grav_external", "cooling", "count"]
-
-TASKCOLOURS = {"none": "black",
-               "sort": "lightblue",
-               "self": "greenyellow",
-               "pair": "navy",
-               "sub_self": "greenyellow",
-               "sub_pair": "navy",
-               "init": "indigo",
-               "ghost": "cyan",
-               "extra_ghost": "cyan",
-               "kick": "green",
-               "send": "yellow",
-               "recv": "magenta",
-               "grav_gather_m": "mediumorchid",
-               "grav_fft": "mediumnightblue",
-               "grav_mm": "mediumturquoise",
-               "grav_up": "mediumvioletred",
-               "grav_external": "darkred",
-               "cooling": "darkblue",
-               "count": "powerblue"}
-
-SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"]
-
-SUBCOLOURS = {"none": "black",
-              "density": "red",
-              "gradient": "powerblue",
-              "force": "blue",
-              "grav": "indigo",
-              "tend": "grey",
-              "count": "black"}
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
+             "init", "ghost", "extra_ghost", "drift", "kick1", "kick2",
+             "timestep", "send", "recv", "grav_gather_m", "grav_fft",
+             "grav_mm", "grav_up", "cooling", "sourceterms", "count"]
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
+            "tend", "xv", "rho", "gpart", "count"]
+
+#  Task/subtypes of interest.
+FULLTYPES = ["self/force", "self/density", "sub_self/force",
+             "sub_self/density", "pair/force", "pair/density", "sub_pair/force",
+             "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
+             "recv/tend", "send/tend"]
+
+#  Get a number of colours for the various types.
+colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon",
+           "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki",
+           "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen",
+           "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey",
+           "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue",
+           "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink",
+           "pink"]
+maxcolours = len(colours)
+
+#  Set colours of task/subtype.
+TASKCOLOURS = {}
+ncolours = 0
+for task in TASKTYPES:
+    TASKCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+SUBCOLOURS = {}
+for task in SUBTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+for task in FULLTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
 
 #  Show docs if help is requested.
 if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
@@ -149,39 +153,26 @@ num_lines = pl.size(data) / 10
 for line in range(num_lines):
     thread = int(data[line,0])
     tasks[thread].append({})
-    tasks[thread][-1]["type"] = TASKTYPES[int(data[line,1])]
-    tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,2])]
+    tasktype = TASKTYPES[int(data[line,1])]
+    subtype = SUBTYPES[int(data[line,2])]
+    tasks[thread][-1]["type"] = tasktype
+    tasks[thread][-1]["subtype"] = subtype
     tic = int(data[line,4]) / CPU_CLOCK * 1000
     toc = int(data[line,5]) / CPU_CLOCK * 1000
     tasks[thread][-1]["tic"] = tic
     tasks[thread][-1]["toc"] = toc
     tasks[thread][-1]["t"] = (toc + tic)/ 2
+    if "self" in tasktype or "pair" in tasktype:
+        fulltype = tasktype + "/" + subtype
+        if fulltype in SUBCOLOURS:
+            tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
+        else:
+            tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
+    else:
+        tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
     
-combtasks = {}
-combtasks[-1] = []
-for i in range(nthread):
-    combtasks[i] = []
-
 for thread in range(nthread):
     tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"])
-    lasttype = ""
-    types = []
-    for task in tasks[thread]:
-        if task["type"] not in types:
-            types.append(task["type"])
-        if lasttype == "" or not lasttype == task["type"]:
-            combtasks[thread].append({})
-            combtasks[thread][-1]["type"] = task["type"]
-            combtasks[thread][-1]["subtype"] = task["subtype"]
-            combtasks[thread][-1]["tic"] = task["tic"]
-            combtasks[thread][-1]["toc"] = task["toc"]
-            if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub":
-                combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]]
-            else:
-                combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]]
-            lasttype = task["type"]
-        else:
-            combtasks[thread][-1]["toc"] = task["toc"]
             
 typesseen = []
 fig = pl.figure()
@@ -192,11 +183,11 @@ tictoc = np.zeros(2)
 for i in range(nthread):
 
     #  Collect ranges and colours into arrays.
-    tictocs = np.zeros(len(combtasks[i])*2)
-    colours = np.empty(len(combtasks[i])*2, dtype='object')
+    tictocs = np.zeros(len(tasks[i])*2)
+    colours = np.empty(len(tasks[i])*2, dtype='object')
     coloursseen = []
     j = 0
-    for task in combtasks[i]:
+    for task in tasks[i]:
         tictocs[j] = task["tic"]
         tictocs[j+1] = task["toc"]
         colours[j] = task["colour"]
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index 734918b8cbf388ef8f1a064e014cfd28775edde2..c95bfa1fd2d087cc907b57201c1a1397cbeb1460 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -63,40 +63,44 @@ PLOT_PARAMS = {"axes.labelsize": 10,
 pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init",
-             "ghost", "extra_ghost", "kick", "send", "recv",
-             "grav_gather_m", "grav_fft", "grav_mm", "grav_up",
-             "grav_external", "cooling", "count"]
-
-TASKCOLOURS = {"none": "black",
-               "sort": "lightblue",
-               "self": "greenyellow",
-               "pair": "navy",
-               "sub_self": "greenyellow",
-               "sub_pair": "navy",
-               "init": "indigo",
-               "ghost": "cyan",
-               "extra_ghost": "cyan",
-               "kick": "green",
-               "send": "yellow",
-               "recv": "magenta",
-               "grav_gather_m": "mediumorchid",
-               "grav_fft": "mediumnightblue",
-               "grav_mm": "mediumturquoise",
-               "grav_up": "mediumvioletred",
-               "grav_external": "darkred",
-               "cooling": "darkblue",
-               "count": "powerblue"}
-
-SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"]
-
-SUBCOLOURS = {"none": "black",
-              "density": "red",
-              "gradient": "powerblue",
-              "force": "blue",
-              "grav": "indigo",
-              "tend": "grey",
-              "count": "black"}
+TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
+             "init", "ghost", "extra_ghost", "drift", "kick1", "kick2",
+             "timestep", "send", "recv", "grav_gather_m", "grav_fft",
+             "grav_mm", "grav_up", "cooling", "sourceterms", "count"]
+SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
+            "tend", "xv", "rho", "gpart", "count"]
+
+#  Task/subtypes of interest.
+FULLTYPES = ["self/force", "self/density", "sub_self/force",
+             "sub_self/density", "pair/force", "pair/density", "sub_pair/force",
+             "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
+             "recv/tend", "send/tend"]
+
+#  Get a number of colours for the various types.
+colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon",
+           "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki",
+           "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen",
+           "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey",
+           "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue",
+           "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink",
+           "pink"]
+maxcolours = len(colours)
+
+#  Set colours of task/subtype.
+TASKCOLOURS = {}
+ncolours = 0
+for task in TASKTYPES:
+    TASKCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+SUBCOLOURS = {}
+for task in SUBTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
+
+for task in FULLTYPES:
+    SUBCOLOURS[task] = colours[ncolours]
+    ncolours = (ncolours + 1) % maxcolours
 
 #  Show docs if help is requested.
 if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ):
@@ -185,39 +189,26 @@ for rank in range(nranks):
         for line in range(num_lines):
             thread = int(data[line,1])
             tasks[thread].append({})
-            tasks[thread][-1]["type"] = TASKTYPES[int(data[line,2])]
-            tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,3])]
+            tasktype = TASKTYPES[int(data[line,2])]
+            subtype = SUBTYPES[int(data[line,3])]
+            tasks[thread][-1]["type"] = tasktype
+            tasks[thread][-1]["subtype"] = subtype
             tic = int(data[line,5]) / CPU_CLOCK * 1000
             toc = int(data[line,6]) / CPU_CLOCK * 1000
             tasks[thread][-1]["tic"] = tic
             tasks[thread][-1]["toc"] = toc
             tasks[thread][-1]["t"] = (toc + tic)/ 2
-
-        combtasks = {}
-        combtasks[-1] = []
-        for i in range(nthread):
-            combtasks[i] = []
+            if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
+                fulltype = tasktype + "/" + subtype
+                if fulltype in SUBCOLOURS:
+                    tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
+                else:
+                    tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
+            else:
+                tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
 
         for thread in range(nthread):
             tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"])
-            lasttype = ""
-            types = []
-            for task in tasks[thread]:
-                if task["type"] not in types:
-                    types.append(task["type"])
-                if lasttype == "" or not lasttype == task["type"]:
-                    combtasks[thread].append({})
-                    combtasks[thread][-1]["type"] = task["type"]
-                    combtasks[thread][-1]["subtype"] = task["subtype"]
-                    combtasks[thread][-1]["tic"] = task["tic"]
-                    combtasks[thread][-1]["toc"] = task["toc"]
-                    if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub":
-                        combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]]
-                    else:
-                        combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]]
-                    lasttype = task["type"]
-                else:
-                    combtasks[thread][-1]["toc"] = task["toc"]
 
         fig = pl.figure()
         ax = fig.add_subplot(1,1,1)
@@ -227,11 +218,11 @@ for rank in range(nranks):
         for i in range(nthread):
 
             #  Collect ranges and colours into arrays.
-            tictocs = np.zeros(len(combtasks[i])*2)
-            colours = np.empty(len(combtasks[i])*2, dtype='object')
+            tictocs = np.zeros(len(tasks[i])*2)
+            colours = np.empty(len(tasks[i])*2, dtype='object')
             coloursseen = []
             j = 0
-            for task in combtasks[i]:
+            for task in tasks[i]:
                 tictocs[j] = task["tic"]
                 tictocs[j+1] = task["toc"]
                 colours[j] = task["colour"]
diff --git a/src/Makefile.am b/src/Makefile.am
index 826ec687d0b9c72768a798ba692755258320227f..515b8aed02e92334e92fb8414a5b4e90db5cbbe1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -25,7 +25,7 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0
 GIT_CMD = @GIT_CMD@
 
 # Additional dependencies for shared libraries.
-EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
+EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS)
 
 # MPI libraries.
 MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
@@ -44,7 +44,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
     physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \
-    sourceterms_struct.h statistics.h memswap.h profiler.h
+    sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \
+    dump.h logger.h active.h timeline.h
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
@@ -53,13 +54,13 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
     runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
-    statistics.c profiler.c
+    statistics.c runner_doiact_vec.c profiler.c dump.c logger.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
-		 kernel_long_gravity.h vector.h runner_doiact.h runner_doiact_grav.h runner_doiact_fft.h \
-                 units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
-		 dimension.h equation_of_state.h active.h \
+		 kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \
+                 runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
+		 dimension.h equation_of_state.h part_type.h \
 		 gravity.h gravity_io.h \
 		 gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
 		 gravity/Default/gravity_debug.h gravity/Default/gravity_part.h  \
@@ -77,13 +78,15 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h
                  hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \
 	         riemann/riemann_hllc.h riemann/riemann_trrs.h \
 		 riemann/riemann_exact.h riemann/riemann_vacuum.h \
+	 	 stars.h stars_io.h \
+		 stars/Default/star.h stars/Default/star_iact.h stars/Default/star_io.h \
+		 stars/Default/star_debug.h stars/Default/star_part.h  \
 	         potential/none/potential.h potential/point_mass/potential.h \
                  potential/isothermal/potential.h potential/disc_patch/potential.h \
-		 potential/softened_isothermal/potential.h \
 		 cooling/none/cooling.h cooling/none/cooling_struct.h \
 	         cooling/const_du/cooling.h cooling/const_du/cooling_struct.h \
                  cooling/const_lambda/cooling.h cooling/const_lambda/cooling_struct.h \
-                 memswap.h
+                 memswap.h dump.h logger.h
 
 
 # Sources and flags for regular library
diff --git a/src/active.h b/src/active.h
index e33f8baf6e5bd5d799e122e4e04610a7cab443bf..0c22a745fed4fbdf72ef1377fad45b78c86f178f 100644
--- a/src/active.h
+++ b/src/active.h
@@ -26,33 +26,28 @@
 #include "cell.h"
 #include "engine.h"
 #include "part.h"
+#include "timeline.h"
 
 /**
  * @brief Check that a cell been drifted to the current time.
  *
- * Only used for debugging. Calls error() if the cell has not
- * been drifted. Does nothing if SWIFT_DEBUG_CHECKS is not defined.
- *
  * @param c The #cell.
  * @param e The #engine containing information about the current time.
+ * @return 1 if the #cell has been drifted to the current time, 0 otherwise.
  */
-__attribute__((always_inline)) INLINE static void cell_is_drifted(
+__attribute__((always_inline)) INLINE static int cell_is_drifted(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (c->ti_old > e->ti_current)
     error(
-        "Cell has been drifted too far forward in time! c->ti_old=%d "
-        "e->ti_current=%d",
-        c->ti_old, e->ti_current);
-
-  if (c->ti_old != e->ti_current) {
-    error(
-        "Cell has not been drifted to the current time c->ti_old=%d, "
-        "e->ti_current=%d",
-        c->ti_old, e->ti_current);
-  }
+        "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
+        "and e->ti_current=%lld (t=%e)",
+        c->ti_old, c->ti_old * e->timeBase, e->ti_current,
+        e->ti_current * e->timeBase);
 #endif
+
+  return (c->ti_old == e->ti_current);
 }
 
 /**
@@ -60,14 +55,18 @@ __attribute__((always_inline)) INLINE static void cell_is_drifted(
  *
  * @param c The #cell.
  * @param e The #engine containing information about the current time.
+ * @return 1 if the #cell contains at least an active particle, 0 otherwise.
  */
 __attribute__((always_inline)) INLINE static int cell_is_active(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (c->ti_end_min < e->ti_current)
-    error("cell in an impossible time-zone! c->ti_end_min=%d e->ti_current=%d",
-          c->ti_end_min, e->ti_current);
+    error(
+        "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
+        "e->ti_current=%lld (t=%e)",
+        c->ti_end_min, c->ti_end_min * e->timeBase, e->ti_current,
+        e->ti_current * e->timeBase);
 #endif
 
   return (c->ti_end_min == e->ti_current);
@@ -78,14 +77,17 @@ __attribute__((always_inline)) INLINE static int cell_is_active(
  *
  * @param c The #cell.
  * @param e The #engine containing information about the current time.
+ * @return 1 if all particles in a #cell are active, 0 otherwise.
  */
 __attribute__((always_inline)) INLINE static int cell_is_all_active(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (c->ti_end_max < e->ti_current)
-    error("cell in an impossible time-zone! c->ti_end_max=%d e->ti_current=%d",
-          c->ti_end_max, e->ti_current);
+    error(
+        "cell in an impossible time-zone! c->ti_end_max=%lld "
+        "e->ti_current=%lld",
+        c->ti_end_max, e->ti_current);
 #endif
 
   return (c->ti_end_max == e->ti_current);
@@ -96,17 +98,23 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active(
  *
  * @param p The #part.
  * @param e The #engine containing information about the current time.
+ * @return 1 if the #part is active, 0 otherwise.
  */
 __attribute__((always_inline)) INLINE static int part_is_active(
     const struct part *p, const struct engine *e) {
 
+  const integertime_t ti_current = e->ti_current;
+  const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin);
+
 #ifdef SWIFT_DEBUG_CHECKS
-  if (p->ti_end < e->ti_current)
-    error("particle in an impossible time-zone! p->ti_end=%d e->ti_current=%d",
-          p->ti_end, e->ti_current);
+  if (ti_end < ti_current)
+    error(
+        "particle in an impossible time-zone! p->ti_end=%lld "
+        "e->ti_current=%lld",
+        ti_end, ti_current);
 #endif
 
-  return (p->ti_end == e->ti_current);
+  return (ti_end == ti_current);
 }
 
 /**
@@ -114,18 +122,47 @@ __attribute__((always_inline)) INLINE static int part_is_active(
  *
  * @param gp The #gpart.
  * @param e The #engine containing information about the current time.
+ * @return 1 if the #gpart is active, 0 otherwise.
  */
 __attribute__((always_inline)) INLINE static int gpart_is_active(
     const struct gpart *gp, const struct engine *e) {
 
+  const integertime_t ti_current = e->ti_current;
+  const integertime_t ti_end = get_integer_time_end(ti_current, gp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end < ti_current)
+    error(
+        "g-particle in an impossible time-zone! gp->ti_end=%lld "
+        "e->ti_current=%lld",
+        ti_end, ti_current);
+#endif
+
+  return (ti_end == ti_current);
+}
+
+/**
+ * @brief Is this s-particle active ?
+ *
+ * @param sp The #spart.
+ * @param e The #engine containing information about the current time.
+ * @return 1 if the #spart is active, 0 otherwise.
+ */
+__attribute__((always_inline)) INLINE static int spart_is_active(
+    const struct spart *sp, const struct engine *e) {
+
+  const integertime_t ti_current = e->ti_current;
+  const integertime_t ti_end = get_integer_time_end(ti_current, sp->time_bin);
+
 #ifdef SWIFT_DEBUG_CHECKS
-  if (gp->ti_end < e->ti_current)
+  if (ti_end < ti_current)
     error(
-        "g-particle in an impossible time-zone! gp->ti_end=%d e->ti_current=%d",
-        gp->ti_end, e->ti_current);
+        "s-particle in an impossible time-zone! gp->ti_end=%lld "
+        "e->ti_current=%lld",
+        ti_end, ti_current);
 #endif
 
-  return (gp->ti_end == e->ti_current);
+  return (ti_end == ti_current);
 }
 
 #endif /* SWIFT_ACTIVE_H */
diff --git a/src/align.h b/src/align.h
index 84e2909c0866c18f0f8378df9d0efc8d0f6545b5..915af33e6e2ba59be1a0849c4de0e2f1bd5b0d96 100644
--- a/src/align.h
+++ b/src/align.h
@@ -19,9 +19,13 @@
 #ifndef SWIFT_ALIGN_H
 #define SWIFT_ALIGN_H
 
+/**
+ * @brief The default struct alignment in SWIFT.
+ */
+#define SWIFT_STRUCT_ALIGNMENT 32
 /**
  * @brief Defines alignment of structures
  */
-#define SWIFT_STRUCT_ALIGN __attribute__((aligned(32)))
+#define SWIFT_STRUCT_ALIGN __attribute__((aligned(SWIFT_STRUCT_ALIGNMENT)))
 
 #endif /* SWIFT_ALIGN_H */
diff --git a/src/cache.h b/src/cache.h
new file mode 100644
index 0000000000000000000000000000000000000000..19d61b657b3aa1fe8675ee413fcde146071381e9
--- /dev/null
+++ b/src/cache.h
@@ -0,0 +1,183 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (jame.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_CACHE_H
+#define SWIFT_CACHE_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers */
+#include "cell.h"
+#include "error.h"
+#include "part.h"
+#include "vector.h"
+
+#define NUM_VEC_PROC 2
+#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
+#define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE
+
+/* Cache struct to hold a local copy of a cells' particle
+ * properties required for density/force calculations.*/
+struct cache {
+
+  /* Particle x position. */
+  float *restrict x __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle y position. */
+  float *restrict y __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle z position. */
+  float *restrict z __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle smoothing length. */
+  float *restrict h __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle mass. */
+  float *restrict m __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle x velocity. */
+  float *restrict vx __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle y velocity. */
+  float *restrict vy __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Particle z velocity. */
+  float *restrict vz __attribute__((aligned(sizeof(float) * VEC_SIZE)));
+
+  /* Cache size. */
+  int count;
+};
+
+/* Secondary cache struct to hold a list of interactions between two
+ * particles.*/
+struct c2_cache {
+
+  /* Separation between two particles squared. */
+  float r2q[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* x separation between two particles. */
+  float dxq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* y separation between two particles. */
+  float dyq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* z separation between two particles. */
+  float dzq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* Mass of particle pj. */
+  float mq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* x velocity of particle pj. */
+  float vxq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* y velocity of particle pj. */
+  float vyq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+
+  /* z velocity of particle pj. */
+  float vzq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN)));
+};
+
+/**
+ * @brief Allocate memory and initialise cache.
+ *
+ * @param c The cache.
+ * @param count Number of particles to allocate space for.
+ */
+__attribute__((always_inline)) INLINE void cache_init(struct cache *c,
+                                                      size_t count) {
+
+  /* Align cache on correct byte boundary and pad cache size to include 2 vector
+   * lengths for remainder operations. */
+  unsigned long alignment = sizeof(float) * VEC_SIZE;
+  unsigned int sizeBytes = (count + (2 * VEC_SIZE)) * sizeof(float);
+  int error = 0;
+
+  /* Free memory if cache has already been allocated. */
+  if (c->count > 0) {
+    free(c->x);
+    free(c->y);
+    free(c->z);
+    free(c->m);
+    free(c->vx);
+    free(c->vy);
+    free(c->vz);
+    free(c->h);
+  }
+
+  error += posix_memalign((void **)&c->x, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->y, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->z, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->m, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->vx, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->vy, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->vz, alignment, sizeBytes);
+  error += posix_memalign((void **)&c->h, alignment, sizeBytes);
+
+  if (error != 0)
+    error("Couldn't allocate cache, no. of particles: %d", (int)count);
+  c->count = count;
+}
+
+/**
+ * @brief Populate cache by reading in the particles in unsorted order.
+ *
+ * @param ci The #cell.
+ * @param ci_cache The cache.
+ */
+__attribute__((always_inline)) INLINE void cache_read_particles(
+    const struct cell *const ci, struct cache *const ci_cache) {
+
+#if defined(GADGET2_SPH)
+
+  /* Shift the particles positions to a local frame so single precision can be
+   * used instead of double precision. */
+  for (int i = 0; i < ci->count; i++) {
+    ci_cache->x[i] = ci->parts[i].x[0] - ci->loc[0];
+    ci_cache->y[i] = ci->parts[i].x[1] - ci->loc[1];
+    ci_cache->z[i] = ci->parts[i].x[2] - ci->loc[2];
+    ci_cache->h[i] = ci->parts[i].h;
+
+    ci_cache->m[i] = ci->parts[i].mass;
+    ci_cache->vx[i] = ci->parts[i].v[0];
+    ci_cache->vy[i] = ci->parts[i].v[1];
+    ci_cache->vz[i] = ci->parts[i].v[2];
+  }
+
+#endif
+}
+
+/**
+ * @brief Clean the memory allocated by a #cache object.
+ *
+ * @param c The #cache to clean.
+ */
+static INLINE void cache_clean(struct cache *c) {
+  if (c->count > 0) {
+    free(c->x);
+    free(c->y);
+    free(c->z);
+    free(c->m);
+    free(c->vx);
+    free(c->vy);
+    free(c->vz);
+    free(c->h);
+  }
+}
+
+#endif /* SWIFT_CACHE_H */
diff --git a/src/cell.c b/src/cell.c
index e2767cdaa9e1189ec87b5ef51cc578c91f8cfe4c..c31f28236a21bdc452f1aa9585ead5c98dd65c4c 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -49,6 +49,7 @@
 /* Local headers. */
 #include "active.h"
 #include "atomic.h"
+#include "drift.h"
 #include "error.h"
 #include "gravity.h"
 #include "hydro.h"
@@ -98,8 +99,10 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
   c->h_max = pc->h_max;
   c->ti_end_min = pc->ti_end_min;
   c->ti_end_max = pc->ti_end_max;
+  c->ti_old = pc->ti_old;
   c->count = pc->count;
   c->gcount = pc->gcount;
+  c->scount = pc->scount;
   c->tag = pc->tag;
 
   /* Number of new cells created. */
@@ -108,9 +111,11 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
   /* Fill the progeny recursively, depth-first. */
   for (int k = 0; k < 8; k++)
     if (pc->progeny[k] >= 0) {
-      struct cell *temp = space_getcell(s);
+      struct cell *temp;
+      space_getcells(s, 1, &temp);
       temp->count = 0;
       temp->gcount = 0;
+      temp->scount = 0;
       temp->loc[0] = c->loc[0];
       temp->loc[1] = c->loc[1];
       temp->loc[2] = c->loc[2];
@@ -191,6 +196,31 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) {
   return c->gcount;
 }
 
+/**
+ * @brief Link the cells recursively to the given #spart array.
+ *
+ * @param c The #cell.
+ * @param sparts The #spart array.
+ *
+ * @return The number of particles linked.
+ */
+int cell_link_sparts(struct cell *c, struct spart *sparts) {
+
+  c->sparts = sparts;
+
+  /* Fill the progeny recursively, depth-first. */
+  if (c->split) {
+    int offset = 0;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL)
+        offset += cell_link_sparts(c->progeny[k], &sparts[offset]);
+    }
+  }
+
+  /* Return the total number of linked particles. */
+  return c->scount;
+}
+
 /**
  * @brief Pack the data of the given cell and all it's sub-cells.
  *
@@ -208,8 +238,10 @@ int cell_pack(struct cell *c, struct pcell *pc) {
   pc->h_max = c->h_max;
   pc->ti_end_min = c->ti_end_min;
   pc->ti_end_max = c->ti_end_max;
+  pc->ti_old = c->ti_old;
   pc->count = c->count;
   pc->gcount = c->gcount;
+  pc->scount = c->scount;
   c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag;
 
   /* Fill in the progeny, depth-first recursion. */
@@ -239,7 +271,7 @@ int cell_pack(struct cell *c, struct pcell *pc) {
  *
  * @return The number of packed cells.
  */
-int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
+int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) {
 
 #ifdef WITH_MPI
 
@@ -270,7 +302,7 @@ int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
  *
  * @return The number of cells created.
  */
-int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
+int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends) {
 
 #ifdef WITH_MPI
 
@@ -421,6 +453,70 @@ int cell_glocktree(struct cell *c) {
   }
 }
 
+/**
+ * @brief Lock a cell for access to its array of #spart and hold its parents.
+ *
+ * @param c The #cell.
+ * @return 0 on success, 1 on failure
+ */
+int cell_slocktree(struct cell *c) {
+
+  TIMER_TIC
+
+  /* First of all, try to lock this cell. */
+  if (c->shold || lock_trylock(&c->slock) != 0) {
+    TIMER_TOC(timer_locktree);
+    return 1;
+  }
+
+  /* Did somebody hold this cell in the meantime? */
+  if (c->shold) {
+
+    /* Unlock this cell. */
+    if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell.");
+
+    /* Admit defeat. */
+    TIMER_TOC(timer_locktree);
+    return 1;
+  }
+
+  /* Climb up the tree and lock/hold/unlock. */
+  struct cell *finger;
+  for (finger = c->parent; finger != NULL; finger = finger->parent) {
+
+    /* Lock this cell. */
+    if (lock_trylock(&finger->slock) != 0) break;
+
+    /* Increment the hold. */
+    atomic_inc(&finger->shold);
+
+    /* Unlock the cell. */
+    if (lock_unlock(&finger->slock) != 0) error("Failed to unlock cell.");
+  }
+
+  /* If we reached the top of the tree, we're done. */
+  if (finger == NULL) {
+    TIMER_TOC(timer_locktree);
+    return 0;
+  }
+
+  /* Otherwise, we hit a snag. */
+  else {
+
+    /* Undo the holds up to finger. */
+    for (struct cell *finger2 = c->parent; finger2 != finger;
+         finger2 = finger2->parent)
+      atomic_dec(&finger2->shold);
+
+    /* Unlock this cell. */
+    if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell.");
+
+    /* Admit defeat. */
+    TIMER_TOC(timer_locktree);
+    return 1;
+  }
+}
+
 /**
  * @brief Unlock a cell's parents for access to #part array.
  *
@@ -459,39 +555,80 @@ void cell_gunlocktree(struct cell *c) {
   TIMER_TOC(timer_locktree);
 }
 
+/**
+ * @brief Unlock a cell's parents for access to #spart array.
+ *
+ * @param c The #cell.
+ */
+void cell_sunlocktree(struct cell *c) {
+
+  TIMER_TIC
+
+  /* First of all, try to unlock this cell. */
+  if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell.");
+
+  /* Climb up the tree and unhold the parents. */
+  for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent)
+    atomic_dec(&finger->shold);
+
+  TIMER_TOC(timer_locktree);
+}
+
 /**
  * @brief Sort the parts into eight bins along the given pivots.
  *
  * @param c The #cell array to be sorted.
  * @param parts_offset Offset of the cell parts array relative to the
  *        space's parts array, i.e. c->parts - s->parts.
+ * @param sparts_offset Offset of the cell sparts array relative to the
+ *        space's sparts array, i.e. c->sparts - s->sparts.
  * @param buff A buffer with at least max(c->count, c->gcount) entries,
  *        used for sorting indices.
+ * @param sbuff A buffer with at least max(c->scount, c->gcount) entries,
+ *        used for sorting indices for the sparts.
+ * @param gbuff A buffer with at least max(c->count, c->gcount) entries,
+ *        used for sorting indices for the gparts.
  */
-void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) {
+void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset,
+                struct cell_buff *buff, struct cell_buff *sbuff,
+                struct cell_buff *gbuff) {
 
-  const int count = c->count, gcount = c->gcount;
+  const int count = c->count, gcount = c->gcount, scount = c->scount;
   struct part *parts = c->parts;
   struct xpart *xparts = c->xparts;
   struct gpart *gparts = c->gparts;
+  struct spart *sparts = c->sparts;
   const double pivot[3] = {c->loc[0] + c->width[0] / 2,
                            c->loc[1] + c->width[1] / 2,
                            c->loc[2] + c->width[2] / 2};
   int bucket_count[8] = {0, 0, 0, 0, 0, 0, 0, 0};
   int bucket_offset[9];
 
-  /* If the buff is NULL, allocate it, and remember to free it. */
-  const int allocate_buffer = (buff == NULL);
-  if (allocate_buffer &&
-      (buff = (int *)malloc(sizeof(int) * max(count, gcount))) == NULL)
-    error("Failed to allocate temporary indices.");
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that the buffs are OK. */
+  for (int k = 0; k < count; k++) {
+    if (buff[k].x[0] != parts[k].x[0] || buff[k].x[1] != parts[k].x[1] ||
+        buff[k].x[2] != parts[k].x[2])
+      error("Inconsistent buff contents.");
+  }
+  for (int k = 0; k < gcount; k++) {
+    if (gbuff[k].x[0] != gparts[k].x[0] || gbuff[k].x[1] != gparts[k].x[1] ||
+        gbuff[k].x[2] != gparts[k].x[2])
+      error("Inconsistent gbuff contents.");
+  }
+  for (int k = 0; k < scount; k++) {
+    if (sbuff[k].x[0] != sparts[k].x[0] || sbuff[k].x[1] != sparts[k].x[1] ||
+        sbuff[k].x[2] != sparts[k].x[2])
+      error("Inconsistent sbuff contents.");
+  }
+#endif /* SWIFT_DEBUG_CHECKS */
 
   /* Fill the buffer with the indices. */
   for (int k = 0; k < count; k++) {
-    const int bid = (parts[k].x[0] > pivot[0]) * 4 +
-                    (parts[k].x[1] > pivot[1]) * 2 + (parts[k].x[2] > pivot[2]);
+    const int bid = (buff[k].x[0] > pivot[0]) * 4 +
+                    (buff[k].x[1] > pivot[1]) * 2 + (buff[k].x[2] > pivot[2]);
     bucket_count[bid]++;
-    buff[k] = bid;
+    buff[k].ind = bid;
   }
 
   /* Set the buffer offsets. */
@@ -505,23 +642,25 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) {
   for (int bucket = 0; bucket < 8; bucket++) {
     for (int k = bucket_offset[bucket] + bucket_count[bucket];
          k < bucket_offset[bucket + 1]; k++) {
-      int bid = buff[k];
+      int bid = buff[k].ind;
       if (bid != bucket) {
         struct part part = parts[k];
         struct xpart xpart = xparts[k];
+        struct cell_buff temp_buff = buff[k];
         while (bid != bucket) {
           int j = bucket_offset[bid] + bucket_count[bid]++;
-          while (buff[j] == bid) {
+          while (buff[j].ind == bid) {
             j++;
             bucket_count[bid]++;
           }
           memswap(&parts[j], &part, sizeof(struct part));
           memswap(&xparts[j], &xpart, sizeof(struct xpart));
-          memswap(&buff[j], &bid, sizeof(int));
+          memswap(&buff[j], &temp_buff, sizeof(struct cell_buff));
+          bid = temp_buff.ind;
         }
         parts[k] = part;
         xparts[k] = xpart;
-        buff[k] = bid;
+        buff[k] = temp_buff;
       }
       bucket_count[bid]++;
     }
@@ -535,9 +674,18 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) {
   }
 
   /* Re-link the gparts. */
-  if (count > 0 && gcount > 0) part_relink_gparts(parts, count, parts_offset);
+  if (count > 0 && gcount > 0)
+    part_relink_gparts_to_parts(parts, count, parts_offset);
 
 #ifdef SWIFT_DEBUG_CHECKS
+  /* Check that the buffs are OK. */
+  for (int k = 1; k < count; k++) {
+    if (buff[k].ind < buff[k - 1].ind) error("Buff not sorted.");
+    if (buff[k].x[0] != parts[k].x[0] || buff[k].x[1] != parts[k].x[1] ||
+        buff[k].x[2] != parts[k].x[2])
+      error("Inconsistent buff contents (k=%i).", k);
+  }
+
   /* Verify that _all_ the parts have been assigned to a cell. */
   for (int k = 1; k < 8; k++)
     if (&c->progeny[k - 1]->parts[c->progeny[k - 1]->count] !=
@@ -564,18 +712,95 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) {
         c->progeny[2]->parts[k].x[1] <= pivot[1] ||
         c->progeny[2]->parts[k].x[2] > pivot[2])
       error("Sorting failed (progeny=2).");
+  for (int k = 0; k < c->progeny[3]->count; k++)
+    if (c->progeny[3]->parts[k].x[0] > pivot[0] ||
+        c->progeny[3]->parts[k].x[1] <= pivot[1] ||
+        c->progeny[3]->parts[k].x[2] <= pivot[2])
+      error("Sorting failed (progeny=3).");
+  for (int k = 0; k < c->progeny[4]->count; k++)
+    if (c->progeny[4]->parts[k].x[0] <= pivot[0] ||
+        c->progeny[4]->parts[k].x[1] > pivot[1] ||
+        c->progeny[4]->parts[k].x[2] > pivot[2])
+      error("Sorting failed (progeny=4).");
+  for (int k = 0; k < c->progeny[5]->count; k++)
+    if (c->progeny[5]->parts[k].x[0] <= pivot[0] ||
+        c->progeny[5]->parts[k].x[1] > pivot[1] ||
+        c->progeny[5]->parts[k].x[2] <= pivot[2])
+      error("Sorting failed (progeny=5).");
+  for (int k = 0; k < c->progeny[6]->count; k++)
+    if (c->progeny[6]->parts[k].x[0] <= pivot[0] ||
+        c->progeny[6]->parts[k].x[1] <= pivot[1] ||
+        c->progeny[6]->parts[k].x[2] > pivot[2])
+      error("Sorting failed (progeny=6).");
+  for (int k = 0; k < c->progeny[7]->count; k++)
+    if (c->progeny[7]->parts[k].x[0] <= pivot[0] ||
+        c->progeny[7]->parts[k].x[1] <= pivot[1] ||
+        c->progeny[7]->parts[k].x[2] <= pivot[2])
+      error("Sorting failed (progeny=7).");
 #endif
 
-  /* Now do the same song and dance for the gparts. */
+  /* Now do the same song and dance for the sparts. */
+  for (int k = 0; k < 8; k++) bucket_count[k] = 0;
+
+  /* Fill the buffer with the indices. */
+  for (int k = 0; k < scount; k++) {
+    const int bid = (sbuff[k].x[0] > pivot[0]) * 4 +
+                    (sbuff[k].x[1] > pivot[1]) * 2 + (sbuff[k].x[2] > pivot[2]);
+    bucket_count[bid]++;
+    sbuff[k].ind = bid;
+  }
+
+  /* Set the buffer offsets. */
+  bucket_offset[0] = 0;
+  for (int k = 1; k <= 8; k++) {
+    bucket_offset[k] = bucket_offset[k - 1] + bucket_count[k - 1];
+    bucket_count[k - 1] = 0;
+  }
+
+  /* Run through the buckets, and swap particles to their correct spot. */
+  for (int bucket = 0; bucket < 8; bucket++) {
+    for (int k = bucket_offset[bucket] + bucket_count[bucket];
+         k < bucket_offset[bucket + 1]; k++) {
+      int bid = sbuff[k].ind;
+      if (bid != bucket) {
+        struct spart spart = sparts[k];
+        struct cell_buff temp_buff = sbuff[k];
+        while (bid != bucket) {
+          int j = bucket_offset[bid] + bucket_count[bid]++;
+          while (sbuff[j].ind == bid) {
+            j++;
+            bucket_count[bid]++;
+          }
+          memswap(&sparts[j], &spart, sizeof(struct spart));
+          memswap(&sbuff[j], &temp_buff, sizeof(struct cell_buff));
+          bid = temp_buff.ind;
+        }
+        sparts[k] = spart;
+        sbuff[k] = temp_buff;
+      }
+      bucket_count[bid]++;
+    }
+  }
+
+  /* Store the counts and offsets. */
+  for (int k = 0; k < 8; k++) {
+    c->progeny[k]->scount = bucket_count[k];
+    c->progeny[k]->sparts = &c->sparts[bucket_offset[k]];
+  }
+
+  /* Re-link the gparts. */
+  if (scount > 0 && gcount > 0)
+    part_relink_gparts_to_sparts(sparts, scount, sparts_offset);
+
+  /* Finally, do the same song and dance for the gparts. */
   for (int k = 0; k < 8; k++) bucket_count[k] = 0;
 
   /* Fill the buffer with the indices. */
   for (int k = 0; k < gcount; k++) {
-    const int bid = (gparts[k].x[0] > pivot[0]) * 4 +
-                    (gparts[k].x[1] > pivot[1]) * 2 +
-                    (gparts[k].x[2] > pivot[2]);
+    const int bid = (gbuff[k].x[0] > pivot[0]) * 4 +
+                    (gbuff[k].x[1] > pivot[1]) * 2 + (gbuff[k].x[2] > pivot[2]);
     bucket_count[bid]++;
-    buff[k] = bid;
+    gbuff[k].ind = bid;
   }
 
   /* Set the buffer offsets. */
@@ -589,20 +814,22 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) {
   for (int bucket = 0; bucket < 8; bucket++) {
     for (int k = bucket_offset[bucket] + bucket_count[bucket];
          k < bucket_offset[bucket + 1]; k++) {
-      int bid = buff[k];
+      int bid = gbuff[k].ind;
       if (bid != bucket) {
         struct gpart gpart = gparts[k];
+        struct cell_buff temp_buff = gbuff[k];
         while (bid != bucket) {
           int j = bucket_offset[bid] + bucket_count[bid]++;
-          while (buff[j] == bid) {
+          while (gbuff[j].ind == bid) {
             j++;
             bucket_count[bid]++;
           }
           memswap(&gparts[j], &gpart, sizeof(struct gpart));
-          memswap(&buff[j], &bid, sizeof(int));
+          memswap(&gbuff[j], &temp_buff, sizeof(struct cell_buff));
+          bid = temp_buff.ind;
         }
         gparts[k] = gpart;
-        buff[k] = bid;
+        gbuff[k] = temp_buff;
       }
       bucket_count[bid]++;
     }
@@ -616,7 +843,11 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) {
 
   /* Re-link the parts. */
   if (count > 0 && gcount > 0)
-    part_relink_parts(gparts, gcount, parts - parts_offset);
+    part_relink_parts_to_gparts(gparts, gcount, parts - parts_offset);
+
+  /* Re-link the sparts. */
+  if (scount > 0 && gcount > 0)
+    part_relink_sparts_to_gparts(gparts, gcount, sparts - sparts_offset);
 }
 
 /**
@@ -682,9 +913,10 @@ void cell_sanitize(struct cell *c) {
 void cell_convert_hydro(struct cell *c, void *data) {
 
   struct part *p = c->parts;
+  struct xpart *xp = c->xparts;
 
   for (int i = 0; i < c->count; ++i) {
-    hydro_convert_quantities(&p[i]);
+    hydro_convert_quantities(&p[i], &xp[i]);
   }
 }
 
@@ -711,10 +943,10 @@ void cell_clean_links(struct cell *c, void *data) {
  */
 void cell_check_drift_point(struct cell *c, void *data) {
 
-  const int ti_current = *(int *)data;
+  integertime_t ti_current = *(integertime_t *)data;
 
-  if (c->ti_old != ti_current)
-    error("Cell in an incorrect time-zone! c->ti_old=%d ti_current=%d",
+  if (c->ti_old != ti_current && c->nodeID == engine_rank)
+    error("Cell in an incorrect time-zone! c->ti_old=%lld ti_current=%lld",
           c->ti_old, ti_current);
 }
 
@@ -859,6 +1091,10 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e) {
  */
 int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
 
+#ifdef WITH_MPI
+  struct engine *e = s->space->e;
+#endif
+
   /* Un-skip the density tasks involved with this cell. */
   for (struct link *l = c->density; l != NULL; l = l->next) {
     struct task *t = l->t;
@@ -892,8 +1128,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
 
         /* Activate the tasks to recv foreign cell ci's data. */
         scheduler_activate(s, ci->recv_xv);
-        scheduler_activate(s, ci->recv_rho);
-        scheduler_activate(s, ci->recv_ti);
+        if (cell_is_active(ci, e)) {
+          scheduler_activate(s, ci->recv_rho);
+          scheduler_activate(s, ci->recv_ti);
+        }
 
         /* Look for the local cell cj's send tasks. */
         struct link *l = NULL;
@@ -903,24 +1141,34 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         if (l == NULL) error("Missing link to send_xv task.");
         scheduler_activate(s, l->t);
 
-        for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (cj->super->drift)
+          scheduler_activate(s, cj->super->drift);
+        else
+          error("Drift task missing !");
+
+        if (cell_is_active(cj, e)) {
+          for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
 
       } else if (cj->nodeID != engine_rank) {
 
         /* Activate the tasks to recv foreign cell cj's data. */
         scheduler_activate(s, cj->recv_xv);
-        scheduler_activate(s, cj->recv_rho);
-        scheduler_activate(s, cj->recv_ti);
+        if (cell_is_active(cj, e)) {
+          scheduler_activate(s, cj->recv_rho);
+          scheduler_activate(s, cj->recv_ti);
+        }
+
         /* Look for the local cell ci's send tasks. */
         struct link *l = NULL;
         for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID;
@@ -929,17 +1177,24 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         if (l == NULL) error("Missing link to send_xv task.");
         scheduler_activate(s, l->t);
 
-        for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (ci->super->drift)
+          scheduler_activate(s, ci->super->drift);
+        else
+          error("Drift task missing !");
+
+        if (cell_is_active(ci, e)) {
+          for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
       }
 #endif
     }
@@ -955,7 +1210,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
   if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost);
   if (c->ghost != NULL) scheduler_activate(s, c->ghost);
   if (c->init != NULL) scheduler_activate(s, c->init);
-  if (c->kick != NULL) scheduler_activate(s, c->kick);
+  if (c->drift != NULL) scheduler_activate(s, c->drift);
+  if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
+  if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
+  if (c->timestep != NULL) scheduler_activate(s, c->timestep);
   if (c->cooling != NULL) scheduler_activate(s, c->cooling);
   if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
 
@@ -981,3 +1239,130 @@ void cell_set_super(struct cell *c, struct cell *super) {
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super);
 }
+
+/**
+ * @brief Recursively drifts all particles and g-particles in a cell hierarchy.
+ *
+ * @param c The #cell.
+ * @param e The #engine (to get ti_current).
+ */
+void cell_drift(struct cell *c, const struct engine *e) {
+
+  const double timeBase = e->timeBase;
+  const integertime_t ti_old = c->ti_old;
+  const integertime_t ti_current = e->ti_current;
+  struct part *const parts = c->parts;
+  struct xpart *const xparts = c->xparts;
+  struct gpart *const gparts = c->gparts;
+  struct spart *const sparts = c->sparts;
+
+  /* Drift from the last time the cell was drifted to the current time */
+  const double dt = (ti_current - ti_old) * timeBase;
+  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
+
+  /* Check that we are actually going to move forward. */
+  if (ti_current < ti_old) error("Attempt to drift to the past");
+
+  /* Are we not in a leaf ? */
+  if (c->split) {
+
+    /* Loop over the progeny and collect their data. */
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        cell_drift(cp, e);
+        dx_max = max(dx_max, cp->dx_max);
+        h_max = max(h_max, cp->h_max);
+      }
+
+  } else if (ti_current > ti_old) {
+
+    /* Loop over all the g-particles in the cell */
+    const size_t nr_gparts = c->gcount;
+    for (size_t k = 0; k < nr_gparts; k++) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *const gp = &gparts[k];
+
+      /* Drift... */
+      drift_gpart(gp, dt, timeBase, ti_old, ti_current);
+
+      /* Compute (square of) motion since last cell construction */
+      const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
+                        gp->x_diff[1] * gp->x_diff[1] +
+                        gp->x_diff[2] * gp->x_diff[2];
+      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
+    }
+
+    /* Loop over all the gas particles in the cell */
+    const size_t nr_parts = c->count;
+    for (size_t k = 0; k < nr_parts; k++) {
+
+      /* Get a handle on the part. */
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
+
+      /* Drift... */
+      drift_part(p, xp, dt, timeBase, ti_old, ti_current);
+
+      /* Compute (square of) motion since last cell construction */
+      const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
+                        xp->x_diff[1] * xp->x_diff[1] +
+                        xp->x_diff[2] * xp->x_diff[2];
+      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
+
+      /* Maximal smoothing length */
+      h_max = (h_max > p->h) ? h_max : p->h;
+    }
+
+    /* Loop over all the star particles in the cell */
+    const size_t nr_sparts = c->scount;
+    for (size_t k = 0; k < nr_sparts; k++) {
+
+      /* Get a handle on the spart. */
+      struct spart *const sp = &sparts[k];
+
+      /* Drift... */
+      drift_spart(sp, dt, timeBase, ti_old, ti_current);
+
+      /* Note: no need to compute dx_max as all spart have a gpart */
+    }
+
+    /* Now, get the maximal particle motion from its square */
+    dx_max = sqrtf(dx2_max);
+
+  } else {
+
+    h_max = c->h_max;
+    dx_max = c->dx_max;
+  }
+
+  /* Store the values */
+  c->h_max = h_max;
+  c->dx_max = dx_max;
+
+  /* Update the time of the last drift */
+  c->ti_old = ti_current;
+}
+
+/**
+ * @brief Recursively checks that all particles in a cell have a time-step
+ */
+void cell_check_timesteps(struct cell *c) {
+#ifdef SWIFT_DEBUG_CHECKS
+
+  if (c->ti_end_min == 0 && c->nr_tasks > 0)
+    error("Cell without assigned time-step");
+
+  if (c->split) {
+    for (int k = 0; k < 8; ++k)
+      if (c->progeny[k] != NULL) cell_check_timesteps(c->progeny[k]);
+  } else {
+
+    if (c->nodeID == engine_rank)
+      for (int i = 0; i < c->count; ++i)
+        if (c->parts[i].time_bin == 0)
+          error("Particle without assigned time-bin");
+  }
+#endif
+}
diff --git a/src/cell.h b/src/cell.h
index 2cd13cf2ab6b934f6aab84bcbacf510270892866..5e170580015e9113aa61575b26a0de09b12e3c1f 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -35,6 +35,7 @@
 #include "multipole.h"
 #include "part.h"
 #include "task.h"
+#include "timeline.h"
 
 /* Avoid cyclic inclusions */
 struct engine;
@@ -52,6 +53,12 @@ struct scheduler;
 /* Global variables. */
 extern int cell_next_tag;
 
+/* Struct to temporarily buffer the particle locations and bin id. */
+struct cell_buff {
+  double x[3];
+  int ind;
+} SWIFT_STRUCT_ALIGN;
+
 /* Mini struct to link cells to tasks. Used as a linked list. */
 struct link {
 
@@ -67,10 +74,10 @@ struct pcell {
 
   /* Stats on this cell's particles. */
   double h_max;
-  int ti_end_min, ti_end_max;
+  integertime_t ti_end_min, ti_end_max, ti_old;
 
   /* Number of particles in this cell. */
-  int count, gcount;
+  int count, gcount, scount;
 
   /* tag used for MPI communication. */
   int tag;
@@ -111,6 +118,9 @@ struct cell {
   /*! Pointer to the #gpart data. */
   struct gpart *gparts;
 
+  /*! Pointer to the #spart data. */
+  struct spart *sparts;
+
   /*! Pointer for the sorted indices. */
   struct entry *sort;
 
@@ -147,8 +157,17 @@ struct cell {
   /*! The extra ghost task for complex hydro schemes */
   struct task *extra_ghost;
 
-  /*! The kick task */
-  struct task *kick;
+  /*! The drift task */
+  struct task *drift;
+
+  /*! The first kick task */
+  struct task *kick1;
+
+  /*! The second kick task */
+  struct task *kick2;
+
+  /*! The task to compute time-steps */
+  struct task *timestep;
 
   /*! Task constructing the multipole from the particles */
   struct task *grav_up;
@@ -203,13 +222,13 @@ struct cell {
 #endif
 
   /*! Minimum end of (integer) time step in this cell. */
-  int ti_end_min;
+  integertime_t ti_end_min;
 
   /*! Maximum end of (integer) time step in this cell. */
-  int ti_end_max;
+  integertime_t ti_end_max;
 
   /*! Last (integer) time the cell's content was drifted forward in time. */
-  int ti_old;
+  integertime_t ti_old;
 
   /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */
   float dmin;
@@ -223,6 +242,9 @@ struct cell {
   /*! Nr of #gpart in this cell. */
   int gcount;
 
+  /*! Nr of #spart in this cell. */
+  int scount;
+
   /*! The size of the sort array */
   int sortsize;
 
@@ -235,6 +257,9 @@ struct cell {
   /*! Spin lock for various uses (#gpart case). */
   swift_lock_type glock;
 
+  /*! Spin lock for various uses (#spart case). */
+  swift_lock_type slock;
+
   /*! ID of the previous owner, e.g. runner. */
   int owner;
 
@@ -244,6 +269,9 @@ struct cell {
   /*! Number of #gpart updated in this cell. */
   int g_updated;
 
+  /*! Number of #spart updated in this cell. */
+  int s_updated;
+
   /*! ID of the node this cell lives on. */
   int nodeID;
 
@@ -253,6 +281,9 @@ struct cell {
   /*! Is the #gpart data of this cell being used in a sub-cell? */
   int ghold;
 
+  /*! Is the #spart data of this cell being used in a sub-cell? */
+  int shold;
+
   /*! Number of tasks that are associated with this cell. */
   short int nr_tasks;
 
@@ -272,19 +303,24 @@ struct cell {
   ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i)))
 
 /* Function prototypes. */
-void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff);
+void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset,
+                struct cell_buff *buff, struct cell_buff *sbuff,
+                struct cell_buff *gbuff);
 void cell_sanitize(struct cell *c);
 int cell_locktree(struct cell *c);
 void cell_unlocktree(struct cell *c);
 int cell_glocktree(struct cell *c);
 void cell_gunlocktree(struct cell *c);
+int cell_slocktree(struct cell *c);
+void cell_sunlocktree(struct cell *c);
 int cell_pack(struct cell *c, struct pcell *pc);
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
-int cell_pack_ti_ends(struct cell *c, int *ti_ends);
-int cell_unpack_ti_ends(struct cell *c, int *ti_ends);
+int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends);
+int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends);
 int cell_getsize(struct cell *c);
 int cell_link_parts(struct cell *c, struct part *parts);
 int cell_link_gparts(struct cell *c, struct gpart *gparts);
+int cell_link_sparts(struct cell *c, struct spart *sparts);
 void cell_convert_hydro(struct cell *c, void *data);
 void cell_clean_links(struct cell *c, void *data);
 int cell_are_neighbours(const struct cell *restrict ci,
@@ -295,5 +331,7 @@ void cell_check_drift_point(struct cell *c, void *data);
 int cell_is_drift_needed(struct cell *c, const struct engine *e);
 int cell_unskip_tasks(struct cell *c, struct scheduler *s);
 void cell_set_super(struct cell *c, struct cell *super);
+void cell_drift(struct cell *c, const struct engine *e);
+void cell_check_timesteps(struct cell *c);
 
 #endif /* SWIFT_CELL_H */
diff --git a/src/common_io.c b/src/common_io.c
index 1f1ec401547c81e137b4e7d836ab58cb87280d8b..82c00cf5bed7118276e0595e3d9c590d29bdda74 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -390,6 +390,8 @@ void writeCodeDescription(hid_t h_file) {
   H5Gclose(h_grpcode);
 }
 
+#endif /* HAVE_HDF5 */
+
 /* ------------------------------------------------------------------------------------------------
  * This part writes the XMF file descriptor enabling a visualisation through
  * ParaView
@@ -586,6 +588,9 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) {
     if (gparts[i].id_or_neg_offset <= 0)
       error("0 or negative ID for DM particle %zu: ID=%lld", i,
             gparts[i].id_or_neg_offset);
+
+    /* Set gpart type */
+    gparts[i].type = swift_type_dark_matter;
   }
 }
 
@@ -597,7 +602,7 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) {
  *
  * @param parts The array of #part freshly read in.
  * @param gparts The array of #gpart freshly read in with all the DM particles
- *at the start
+ * at the start
  * @param Ngas The number of gas particles read in.
  * @param Ndm The number of DM particles read in.
  */
@@ -618,12 +623,53 @@ void duplicate_hydro_gparts(struct part* const parts,
 
     gparts[i + Ndm].mass = hydro_get_mass(&parts[i]);
 
+    /* Set gpart type */
+    gparts[i + Ndm].type = swift_type_gas;
+
     /* Link the particles */
     gparts[i + Ndm].id_or_neg_offset = -i;
     parts[i].gpart = &gparts[i + Ndm];
   }
 }
 
+/**
+ * @brief Copy every #spart into the corresponding #gpart and link them.
+ *
+ * This function assumes that the DM particles and gas particles are all at
+ * the start of the gparts array and adds the star particles afterwards
+ *
+ * @param sparts The array of #spart freshly read in.
+ * @param gparts The array of #gpart freshly read in with all the DM and gas
+ * particles at the start.
+ * @param Nstars The number of stars particles read in.
+ * @param Ndm The number of DM and gas particles read in.
+ */
+void duplicate_star_gparts(struct spart* const sparts,
+                           struct gpart* const gparts, size_t Nstars,
+                           size_t Ndm) {
+
+  for (size_t i = 0; i < Nstars; ++i) {
+
+    /* Duplicate the crucial information */
+    gparts[i + Ndm].x[0] = sparts[i].x[0];
+    gparts[i + Ndm].x[1] = sparts[i].x[1];
+    gparts[i + Ndm].x[2] = sparts[i].x[2];
+
+    gparts[i + Ndm].v_full[0] = sparts[i].v[0];
+    gparts[i + Ndm].v_full[1] = sparts[i].v[1];
+    gparts[i + Ndm].v_full[2] = sparts[i].v[2];
+
+    gparts[i + Ndm].mass = sparts[i].mass;
+
+    /* Set gpart type */
+    gparts[i + Ndm].type = swift_type_star;
+
+    /* Link the particles */
+    gparts[i + Ndm].id_or_neg_offset = -i;
+    sparts[i].gpart = &gparts[i + Ndm];
+  }
+}
+
 /**
  * @brief Copy every DM #gpart into the dmparts array.
  *
@@ -644,7 +690,7 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
      * gparts[i].part); */
 
     /* And collect the DM ones */
-    if (gparts[i].id_or_neg_offset > 0) {
+    if (gparts[i].type == swift_type_dark_matter) {
       dmparts[count] = gparts[i];
       count++;
     }
@@ -655,5 +701,3 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
     error("Collected the wrong number of dm particles (%zu vs. %zu expected)",
           count, Ndm);
 }
-
-#endif
diff --git a/src/common_io.h b/src/common_io.h
index 7aedee0f2624dcff916a8398e244009a87109915..bf1840d497c46f58568d1bed7cb3409f60e047ee 100644
--- a/src/common_io.h
+++ b/src/common_io.h
@@ -75,6 +75,9 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm);
 void duplicate_hydro_gparts(struct part* const parts,
                             struct gpart* const gparts, size_t Ngas,
                             size_t Ndm);
+void duplicate_star_gparts(struct spart* const sparts,
+                           struct gpart* const gparts, size_t Nstars,
+                           size_t Ndm);
 
 void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data);
 
diff --git a/src/cooling/const_du/cooling.h b/src/cooling/const_du/cooling.h
index 448af9c3765e3bb6d4cbf4cc94e245a3976d5314..30ae644bdecbe795794505f64ba1ed767419d82b 100644
--- a/src/cooling/const_du/cooling.h
+++ b/src/cooling/const_du/cooling.h
@@ -62,26 +62,30 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part(
     const struct cooling_function_data* restrict cooling,
     struct part* restrict p, struct xpart* restrict xp, float dt) {
 
-  /* Get current internal energy (dt=0) */
-  const float u_old = hydro_get_internal_energy(p, 0.f);
+  /* Internal energy floor */
+  const float u_floor = cooling->min_energy;
+
+  /* Get current internal energy */
+  const float u_old = hydro_get_internal_energy(p);
+
+  /* Current du_dt */
+  const float hydro_du_dt = hydro_get_internal_energy_dt(p);
 
   /* Get cooling function properties */
-  const float du_dt = -cooling->cooling_rate;
-  const float u_floor = cooling->min_energy;
+  float cooling_du_dt = -cooling->cooling_rate;
 
-  /* Constant cooling with a minimal floor */
-  float u_new;
-  if (u_old - du_dt * dt > u_floor) {
-    u_new = u_old + du_dt * dt;
-  } else {
-    u_new = u_floor;
+  /* Integrate cooling equation to enforce energy floor */
+  if (u_old + hydro_du_dt * dt < u_floor) {
+    cooling_du_dt = 0.f;
+  } else if (u_old + (hydro_du_dt + cooling_du_dt) * dt < u_floor) {
+    cooling_du_dt = (u_old + dt * hydro_du_dt - u_floor) / dt;
   }
 
-  /* Update the internal energy */
-  hydro_set_internal_energy(p, u_new);
+  /* Update the internal energy time derivative */
+  hydro_set_internal_energy_dt(p, hydro_du_dt + cooling_du_dt);
 
   /* Store the radiated energy */
-  xp->cooling_data.radiated_energy += hydro_get_mass(p) * (u_old - u_new);
+  xp->cooling_data.radiated_energy += -hydro_get_mass(p) * cooling_du_dt * dt;
 }
 
 /**
@@ -102,7 +106,7 @@ __attribute__((always_inline)) INLINE static float cooling_timestep(
     const struct UnitSystem* restrict us, const struct part* restrict p) {
 
   const float cooling_rate = cooling->cooling_rate;
-  const float internal_energy = hydro_get_internal_energy(p, 0);
+  const float internal_energy = hydro_get_internal_energy(p);
   return cooling->cooling_tstep_mult * internal_energy / fabsf(cooling_rate);
 }
 
diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h
index cb9db2dc34a6014ea15a24d368a006fee3838d67..9fadd51e3c2a3c5462c8476e0aac893e3a2d530d 100644
--- a/src/cooling/const_lambda/cooling.h
+++ b/src/cooling/const_lambda/cooling.h
@@ -76,31 +76,29 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part(
     const struct cooling_function_data* restrict cooling,
     struct part* restrict p, struct xpart* restrict xp, float dt) {
 
-  /* Get current internal energy (dt=0) */
-  const float u_old = hydro_get_internal_energy(p, 0.f);
-
   /* Internal energy floor */
   const float u_floor = cooling->min_energy;
 
-  /* Calculate du_dt */
-  const float du_dt = cooling_rate(phys_const, us, cooling, p);
+  /* Current energy */
+  const float u_old = hydro_get_internal_energy(p);
 
-  /* Integrate cooling equation, but enforce energy floor */
-  float u_new;
-  if (u_old + du_dt * dt > u_floor) {
-    u_new = u_old + du_dt * dt;
-  } else {
-    u_new = u_floor;
-  }
+  /* Current du_dt */
+  const float hydro_du_dt = hydro_get_internal_energy_dt(p);
+
+  /* Calculate cooling du_dt */
+  float cooling_du_dt = cooling_rate(phys_const, us, cooling, p);
 
-  /* Don't allow particle to cool too much in one timestep */
-  if (u_new < 0.5f * u_old) u_new = 0.5f * u_old;
+  /* Integrate cooling equation to enforce energy floor */
+  /* Factor of 1.5 included since timestep could potentially double */
+  if (u_old + (hydro_du_dt + cooling_du_dt) * 1.5f * dt < u_floor) {
+    cooling_du_dt = -(u_old + 1.5f * dt * hydro_du_dt - u_floor) / (1.5f * dt);
+  }
 
-  /* Update the internal energy */
-  hydro_set_internal_energy(p, u_new);
+  /* Update the internal energy time derivative */
+  hydro_set_internal_energy_dt(p, hydro_du_dt + cooling_du_dt);
 
   /* Store the radiated energy */
-  xp->cooling_data.radiated_energy += hydro_get_mass(p) * (u_old - u_new);
+  xp->cooling_data.radiated_energy += -hydro_get_mass(p) * cooling_du_dt * dt;
 }
 
 /**
@@ -116,12 +114,11 @@ __attribute__((always_inline)) INLINE static float cooling_timestep(
     const struct phys_const* restrict phys_const,
     const struct UnitSystem* restrict us, const struct part* restrict p) {
 
-  /* Get current internal energy (dt=0) */
-  const float u = hydro_get_internal_energy(p, 0.f);
+  /* Get current internal energy */
+  const float u = hydro_get_internal_energy(p);
   const float du_dt = cooling_rate(phys_const, us, cooling, p);
 
-  /* If we are close to (or below) the energy floor, we ignore cooling timestep
-   */
+  /* If we are close to (or below) the energy floor, we ignore the condition */
   if (u < 1.01f * cooling->min_energy)
     return FLT_MAX;
   else
diff --git a/src/debug.c b/src/debug.c
index 48572df7f046944613d2598b0d340e949ad3ab7e..f5f2f4974a6f2d0e8da8fce71e98233a2ed3deeb 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -194,7 +194,8 @@ int checkSpacehmax(struct space *s) {
 
 /**
  * @brief Check if the h_max and dx_max values of a cell's hierarchy are
- * consistent with the particles. Report verbosely if not.
+ * consistent with the particles. Also checks if particles are correctly
+ * in a cell. Report verbosely if not.
  *
  * @param c the top cell of the hierarchy.
  * @param depth the recursion depth for use in messages. Set to 0 initially.
@@ -206,24 +207,50 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
 
   float h_max = 0.0f;
   float dx_max = 0.0f;
-  if (!c->split) {
-    const size_t nr_parts = c->count;
-    struct part *parts = c->parts;
-    for (size_t k = 0; k < nr_parts; k++) {
-      h_max = (h_max > parts[k].h) ? h_max : parts[k].h;
+  int result = 1;
+
+  const double loc_min[3] = {c->loc[0], c->loc[1], c->loc[2]};
+  const double loc_max[3] = {c->loc[0] + c->width[0], c->loc[1] + c->width[1],
+                             c->loc[2] + c->width[2]};
+
+  const size_t nr_parts = c->count;
+  struct part *parts = c->parts;
+  struct xpart *xparts = c->xparts;
+  for (size_t k = 0; k < nr_parts; k++) {
+
+    struct part *const p = &parts[k];
+    struct xpart *const xp = &xparts[k];
+
+    if (p->x[0] < loc_min[0] || p->x[0] > loc_max[0] || p->x[1] < loc_min[1] ||
+        p->x[1] > loc_max[1] || p->x[2] < loc_min[2] || p->x[2] > loc_max[2]) {
+
+      message(
+          "Inconsistent part position p->x=[%e %e %e], c->loc=[%e %e %e] "
+          "c->width=[%e %e %e]",
+          p->x[0], p->x[1], p->x[2], c->loc[0], c->loc[1], c->loc[2],
+          c->width[0], c->width[1], c->width[2]);
+
+      result = 0;
     }
-  } else {
-    for (int k = 0; k < 8; k++)
+
+    const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
+                      xp->x_diff[1] * xp->x_diff[1] +
+                      xp->x_diff[2] * xp->x_diff[2];
+
+    h_max = max(h_max, p->h);
+    dx_max = max(dx_max, sqrt(dx2));
+  }
+
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
       if (c->progeny[k] != NULL) {
         struct cell *cp = c->progeny[k];
         checkCellhdxmax(cp, depth);
-        dx_max = max(dx_max, cp->dx_max);
-        h_max = max(h_max, cp->h_max);
       }
+    }
   }
 
   /* Check. */
-  int result = 1;
   if (c->h_max != h_max) {
     message("%d Inconsistent h_max: cell %f != parts %f", *depth, c->h_max,
             h_max);
@@ -236,13 +263,6 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
     result = 0;
   }
 
-  /* Check rebuild criterion. */
-  if (h_max > c->dmin) {
-    message("%d Inconsistent c->dmin: %f > %f", *depth, h_max, c->dmin);
-    message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]);
-    result = 0;
-  }
-
   return result;
 }
 
diff --git a/src/drift.h b/src/drift.h
index bd1b35926740d49a67291ede4676f3387cd66748..687f8d8885a5fedca489f76d65ea8113101626c6 100644
--- a/src/drift.h
+++ b/src/drift.h
@@ -39,8 +39,8 @@
  * @param ti_current Integer end of time-step
  */
 __attribute__((always_inline)) INLINE static void drift_gpart(
-    struct gpart *restrict gp, float dt, double timeBase, int ti_old,
-    int ti_current) {
+    struct gpart *restrict gp, float dt, double timeBase, integertime_t ti_old,
+    integertime_t ti_current) {
   /* Drift... */
   gp->x[0] += gp->v_full[0] * dt;
   gp->x[1] += gp->v_full[1] * dt;
@@ -64,7 +64,17 @@ __attribute__((always_inline)) INLINE static void drift_gpart(
  */
 __attribute__((always_inline)) INLINE static void drift_part(
     struct part *restrict p, struct xpart *restrict xp, float dt,
-    double timeBase, int ti_old, int ti_current) {
+    double timeBase, integertime_t ti_old, integertime_t ti_current) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (p->ti_drift != ti_old)
+    error(
+        "Particle has not been drifted to the current time p->ti_drift=%lld, "
+        "c->ti_old=%lld, ti_current=%lld",
+        p->ti_drift, ti_old, ti_current);
+
+  p->ti_drift = ti_current;
+#endif
 
   /* Drift... */
   p->x[0] += xp->v_full[0] * dt;
@@ -77,7 +87,7 @@ __attribute__((always_inline)) INLINE static void drift_part(
   p->v[2] += p->a_hydro[2] * dt;
 
   /* Predict the values of the extra fields */
-  hydro_predict_extra(p, xp, dt, ti_old, ti_current, timeBase);
+  hydro_predict_extra(p, xp, dt);
 
   /* Compute offset since last cell construction */
   xp->x_diff[0] -= xp->v_full[0] * dt;
@@ -85,4 +95,23 @@ __attribute__((always_inline)) INLINE static void drift_part(
   xp->x_diff[2] -= xp->v_full[2] * dt;
 }
 
+/**
+ * @brief Perform the 'drift' operation on a #spart
+ *
+ * @param sp The #spart to drift.
+ * @param dt The drift time-step
+ * @param timeBase The minimal allowed time-step size.
+ * @param ti_old Integer start of time-step
+ * @param ti_current Integer end of time-step
+ */
+__attribute__((always_inline)) INLINE static void drift_spart(
+    struct spart *restrict sp, float dt, double timeBase, integertime_t ti_old,
+    integertime_t ti_current) {
+
+  /* Drift... */
+  sp->x[0] += sp->v[0] * dt;
+  sp->x[1] += sp->v[1] * dt;
+  sp->x[2] += sp->v[2] * dt;
+}
+
 #endif /* SWIFT_DRIFT_H */
diff --git a/src/dump.c b/src/dump.c
new file mode 100644
index 0000000000000000000000000000000000000000..2c0cf221ebd897bab0d047c196ce8a2aeddc6eae
--- /dev/null
+++ b/src/dump.c
@@ -0,0 +1,153 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* This object's header. */
+#include "dump.h"
+
+/* Local headers. */
+#include "atomic.h"
+#include "error.h"
+
+/**
+ * @brief Obtain a chunk of memory from a dump.
+ *
+ * @param d The #dump.
+ * @param count The number of bytes requested.
+ * @param offset The offset of the returned memory address within the dump file.
+ * @return A pointer to the memory-mapped chunk of data.
+ */
+
+void *dump_get(struct dump *d, size_t count, size_t *offset) {
+  size_t local_offset = atomic_add(&d->count, count);
+  *offset = local_offset + d->file_offset;
+  return (char *)d->data + local_offset;
+}
+
+/**
+ * @brief Ensure that at least size bytes are available in the #dump.
+ */
+
+void dump_ensure(struct dump *d, size_t size) {
+
+  /* If we have enough space already, just bail. */
+  if (d->size - d->count > size) return;
+
+  /* Unmap the current data. */
+  size_t trunc_count = d->count & d->page_mask;
+  if (munmap(d->data, trunc_count > 0 ? trunc_count : 1) != 0) {
+    error("Failed to unmap %zi bytes of dump data (%s).", trunc_count,
+          strerror(errno));
+  }
+
+  /* Update the size and count. */
+  d->file_offset += trunc_count;
+  d->count -= trunc_count;
+  d->size = (size * dump_grow_ensure_factor + ~d->page_mask) & d->page_mask;
+
+  /* Re-allocate the file size. */
+  if (posix_fallocate(d->fd, d->file_offset, d->size) != 0) {
+    error("Failed to pre-allocate the dump file.");
+  }
+
+  /* Re-map starting at the end of the file. */
+  if ((d->data = mmap(NULL, d->size, PROT_WRITE, MAP_SHARED, d->fd,
+                      d->file_offset)) == MAP_FAILED) {
+    error("Failed to allocate map of size %zi bytes (%s).", d->size,
+          strerror(errno));
+  }
+}
+
+/**
+ * @brief Flush the #dump to disk.
+ */
+
+void dump_sync(struct dump *d) {
+  if (msync(d->data, d->count, MS_SYNC) != 0)
+    error("Failed to sync memory-mapped data.");
+}
+
+/**
+ * @brief Finalize the #dump.
+ */
+
+void dump_close(struct dump *d) {
+  /* Unmap the data in memory. */
+  if (munmap(d->data, d->count) != 0) {
+    error("Failed to unmap dump data (%s).", strerror(errno));
+  }
+
+  /* Truncate the file to the correct length. */
+  if (ftruncate(d->fd, d->file_offset + d->count) != 0) {
+    error("Failed to truncate dump file (%s).", strerror(errno));
+  }
+
+  /* Close the memory-mapped file. */
+  if (close(d->fd) != 0) error("Failed to close memory-mapped file.");
+}
+
+/**
+ * @brief Initialize a file dump.
+ *
+ * @param d The #dump to initialize.
+ * @param filename The fully qualified name of the file in which to dump,
+ *                 note that it will be overwritten.
+ * @param size The initial buffer size for this #dump.
+ */
+
+void dump_init(struct dump *d, const char *filename, size_t size) {
+
+  /* Create the output file. */
+  if ((d->fd = open(filename, O_CREAT | O_RDWR, 0660)) == -1) {
+    error("Failed to create dump file '%s' (%s).", filename, strerror(errno));
+  }
+
+  /* Adjust the size to be at least the page size. */
+  const size_t page_mask = ~(sysconf(_SC_PAGE_SIZE) - 1);
+  size = (size + ~page_mask) & page_mask;
+
+  /* Pre-allocate the file size. */
+  if (posix_fallocate(d->fd, 0, size) != 0) {
+    error("Failed to pre-allocate the dump file.");
+  }
+
+  /* Map memory to the created file. */
+  if ((d->data = mmap(NULL, size, PROT_WRITE, MAP_SHARED, d->fd, 0)) ==
+      MAP_FAILED) {
+    error("Failed to allocate map of size %zi bytes (%s).", size,
+          strerror(errno));
+  }
+
+  /* Init some counters. */
+  d->size = size;
+  d->count = 0;
+  d->file_offset = 0;
+  d->page_mask = page_mask;
+}
diff --git a/src/dump.h b/src/dump.h
new file mode 100644
index 0000000000000000000000000000000000000000..a7e934218c271d2f82b99d39f278e5af3047be6e
--- /dev/null
+++ b/src/dump.h
@@ -0,0 +1,57 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DUMP_H
+#define SWIFT_DUMP_H
+
+/* Includes. */
+#include "lock.h"
+
+/* Some constants. */
+#define dump_grow_ensure_factor 10
+
+/** The dump struct. */
+struct dump {
+
+  /* The memory-mapped data of this dump. */
+  void *data;
+
+  /* The size of the memory-mapped data, in bytes. */
+  size_t size;
+
+  /* The number of bytes that have been dumped. */
+  size_t count;
+
+  /* The offset of the data within the current file. */
+  size_t file_offset;
+
+  /* The file with which this memory is associated. */
+  int fd;
+
+  /* Mask containing the significant bits for page addresses. */
+  size_t page_mask;
+};
+
+/* Function prototypes. */
+void dump_init(struct dump *d, const char *filename, size_t size);
+void dump_ensure(struct dump *d, size_t size);
+void dump_sync(struct dump *d);
+void dump_close(struct dump *d);
+void *dump_get(struct dump *d, size_t count, size_t *offset);
+
+#endif /* SWIFT_DUMP_H */
diff --git a/src/engine.c b/src/engine.c
index 88406493a174a85bab9b2fc4c77024bd43821ed3..2019d54a7715e5b75e48f5673ca24d7c6cc67b0c 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -48,6 +48,7 @@
 #include "engine.h"
 
 /* Local headers. */
+#include "active.h"
 #include "atomic.h"
 #include "cell.h"
 #include "clocks.h"
@@ -70,6 +71,9 @@
 #include "units.h"
 #include "version.h"
 
+/* Particle cache size. */
+#define CACHE_SIZE 512
+
 const char *engine_policy_names[16] = {"none",
                                        "rand",
                                        "steal",
@@ -84,7 +88,8 @@ const char *engine_policy_names[16] = {"none",
                                        "cosmology_integration",
                                        "drift_all",
                                        "cooling",
-                                       "sourceterms"};
+                                       "sourceterms",
+                                       "stars"};
 
 /** The rank of the engine as a global variable (for messages). */
 int engine_rank;
@@ -140,8 +145,25 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
       c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c,
                                   NULL, 0);
 
-      c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, 0, c,
-                                  NULL, 0);
+      /* Add the two half kicks */
+      c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
+                                   c, NULL, 0);
+
+      c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0,
+                                   c, NULL, 0);
+
+      /* Add the time-step calculation task and its dependency */
+      c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none,
+                                      0, 0, c, NULL, 0);
+
+      scheduler_addunlock(s, c->kick2, c->timestep);
+
+      /* Add the drift task and its dependencies. */
+      c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0,
+                                   c, NULL, 0);
+
+      scheduler_addunlock(s, c->kick1, c->drift);
+      scheduler_addunlock(s, c->drift, c->init);
 
       /* Generate the ghost task. */
       if (is_hydro)
@@ -156,13 +178,18 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
 #endif
 
       /* Cooling task */
-      if (is_with_cooling)
+      if (is_with_cooling) {
         c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
                                        0, 0, c, NULL, 0);
+
+        scheduler_addunlock(s, c->cooling, c->kick2);
+      }
+
       /* add source terms */
-      if (is_with_sourceterms)
+      if (is_with_sourceterms) {
         c->sourceterms = scheduler_addtask(s, task_type_sourceterms,
                                            task_subtype_none, 0, 0, c, NULL, 0);
+      }
     }
 
   } else { /* We are above the super-cell so need to go deeper */
@@ -210,24 +237,30 @@ void engine_redistribute(struct engine *e) {
   struct part *parts = s->parts;
   struct xpart *xparts = s->xparts;
   struct gpart *gparts = s->gparts;
+  struct spart *sparts = s->sparts;
   ticks tic = getticks();
 
   /* Allocate temporary arrays to store the counts of particles to be sent
      and the destination of each particle */
-  int *counts, *g_counts;
+  int *counts, *g_counts, *s_counts;
   if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
-    error("Failed to allocate count temporary buffer.");
+    error("Failed to allocate counts temporary buffer.");
   if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
-    error("Failed to allocate gcount temporary buffer.");
+    error("Failed to allocate g_gcount temporary buffer.");
+  if ((s_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate s_counts temporary buffer.");
   bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
   bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes);
+  bzero(s_counts, sizeof(int) * nr_nodes * nr_nodes);
 
   /* Allocate the destination index arrays. */
-  int *dest, *g_dest;
+  int *dest, *g_dest, *s_dest;
   if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
     error("Failed to allocate dest temporary buffer.");
   if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
     error("Failed to allocate g_dest temporary buffer.");
+  if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL)
+    error("Failed to allocate s_dest temporary buffer.");
 
   /* Get destination of each particle */
   for (size_t k = 0; k < s->nr_parts; k++) {
@@ -255,7 +288,32 @@ void engine_redistribute(struct engine *e) {
   }
 
   /* Sort the particles according to their cell index. */
-  space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
+  if (s->nr_parts > 0)
+    space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the part have been sorted correctly. */
+  for (size_t k = 0; k < s->nr_parts; k++) {
+    const struct part *p = &s->parts[k];
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
+                   p->x[2] * s->iwidth[2]);
+
+    /* New cell of this part */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (dest[k] != new_node)
+      error("part's new node index not matching sorted index.");
+
+    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
+        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
+        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
+      error("part not sorted into the right top-level cell!");
+  }
+#endif
 
   /* We need to re-link the gpart partners of parts. */
   if (s->nr_parts > 0) {
@@ -267,7 +325,7 @@ void engine_redistribute(struct engine *e) {
         /* As the addresses will be invalidated by the communications, we will
          * instead store the absolute index from the start of the sub-array of
          * particles to be sent to a given node.
-         * Recall that gparts without partners have a negative id.
+         * Recall that gparts without partners have a positive id.
          * We will restore the pointers on the receiving node later on. */
         if (dest[k] != current_dest) {
           current_dest = dest[k];
@@ -275,7 +333,7 @@ void engine_redistribute(struct engine *e) {
         }
 
 #ifdef SWIFT_DEBUG_CHECKS
-        if (s->parts[k].gpart->id_or_neg_offset >= 0)
+        if (s->parts[k].gpart->id_or_neg_offset > 0)
           error("Trying to link a partnerless gpart !");
 #endif
 
@@ -285,6 +343,87 @@ void engine_redistribute(struct engine *e) {
     }
   }
 
+  /* Get destination of each s-particle */
+  for (size_t k = 0; k < s->nr_sparts; k++) {
+
+    /* Periodic boundary conditions */
+    for (int j = 0; j < 3; j++) {
+      if (sparts[k].x[j] < 0.0)
+        sparts[k].x[j] += dim[j];
+      else if (sparts[k].x[j] >= dim[j])
+        sparts[k].x[j] -= dim[j];
+    }
+    const int cid =
+        cell_getid(cdim, sparts[k].x[0] * iwidth[0], sparts[k].x[1] * iwidth[1],
+                   sparts[k].x[2] * iwidth[2]);
+#ifdef SWIFT_DEBUG_CHECKS
+    if (cid < 0 || cid >= s->nr_cells)
+      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
+            sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]);
+#endif
+
+    s_dest[k] = cells[cid].nodeID;
+
+    /* The counts array is indexed as count[from * nr_nodes + to]. */
+    s_counts[nodeID * nr_nodes + s_dest[k]] += 1;
+  }
+
+  /* Sort the particles according to their cell index. */
+  if (s->nr_sparts > 0)
+    space_sparts_sort(s, s_dest, s->nr_sparts, 0, nr_nodes - 1, e->verbose);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the spart have been sorted correctly. */
+  for (size_t k = 0; k < s->nr_sparts; k++) {
+    const struct spart *sp = &s->sparts[k];
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
+                   sp->x[2] * s->iwidth[2]);
+
+    /* New cell of this spart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (s_dest[k] != new_node)
+      error("spart's new node index not matching sorted index.");
+
+    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
+        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
+        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
+      error("spart not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* We need to re-link the gpart partners of sparts. */
+  if (s->nr_sparts > 0) {
+    int current_dest = s_dest[0];
+    size_t count_this_dest = 0;
+    for (size_t k = 0; k < s->nr_sparts; ++k) {
+      if (s->sparts[k].gpart != NULL) {
+
+        /* As the addresses will be invalidated by the communications, we will
+         * instead store the absolute index from the start of the sub-array of
+         * particles to be sent to a given node.
+         * Recall that gparts without partners have a positive id.
+         * We will restore the pointers on the receiving node later on. */
+        if (s_dest[k] != current_dest) {
+          current_dest = s_dest[k];
+          count_this_dest = 0;
+        }
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (s->sparts[k].gpart->id_or_neg_offset > 0)
+          error("Trying to link a partnerless gpart !");
+#endif
+
+        s->sparts[k].gpart->id_or_neg_offset = -count_this_dest;
+        count_this_dest++;
+      }
+    }
+  }
+
   /* Get destination of each g-particle */
   for (size_t k = 0; k < s->nr_gparts; k++) {
 
@@ -311,48 +450,96 @@ void engine_redistribute(struct engine *e) {
   }
 
   /* Sort the gparticles according to their cell index. */
-  space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
+  if (s->nr_gparts > 0)
+    space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the gpart have been sorted correctly. */
+  for (size_t k = 0; k < s->nr_gparts; k++) {
+    const struct gpart *gp = &s->gparts[k];
+
+    /* New cell index */
+    const int new_cid =
+        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
+                   gp->x[2] * s->iwidth[2]);
+
+    /* New cell of this gpart */
+    const struct cell *c = &s->cells_top[new_cid];
+    const int new_node = c->nodeID;
+
+    if (g_dest[k] != new_node)
+      error("gpart's new node index not matching sorted index.");
+
+    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
+        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
+        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
+      error("gpart not sorted into the right top-level cell!");
+  }
+#endif
 
   /* Get all the counts from all the nodes. */
   if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                     MPI_COMM_WORLD) != MPI_SUCCESS)
     error("Failed to allreduce particle transfer counts.");
 
+  /* Get all the s_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce gparticle transfer counts.");
+
+  /* Get all the g_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce sparticle transfer counts.");
+
   /* Report how many particles will be moved. */
   if (e->verbose) {
     if (e->nodeID == 0) {
-      size_t total = 0;
-      size_t unmoved = 0;
+      size_t total = 0, g_total = 0, s_total = 0;
+      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0;
       for (int p = 0, r = 0; p < nr_nodes; p++) {
         for (int s = 0; s < nr_nodes; s++) {
           total += counts[r];
-          if (p == s) unmoved += counts[r];
+          g_total += g_counts[r];
+          s_total += s_counts[r];
+          if (p == s) {
+            unmoved += counts[r];
+            g_unmoved += g_counts[r];
+            s_unmoved += s_counts[r];
+          }
           r++;
         }
       }
-      message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, total,
-              100.0 * (double)(total - unmoved) / (double)total);
+      if (total > 0)
+        message("%ld of %ld (%.2f%%) of particles moved", total - unmoved,
+                total, 100.0 * (double)(total - unmoved) / (double)total);
+      if (g_total > 0)
+        message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved,
+                g_total,
+                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
+      if (s_total > 0)
+        message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved,
+                s_total,
+                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
     }
   }
 
-  /* Get all the g_counts from all the nodes. */
-  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
-                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
-    error("Failed to allreduce gparticle transfer counts.");
-
-  /* Each node knows how many parts and gparts will be transferred to every
-     other node. We can start preparing to receive data */
+  /* Each node knows how many parts, sparts and gparts will be transferred
+     to every other node. We can start preparing to receive data */
 
   /* Get the new number of parts and gparts for this node */
-  size_t nr_parts = 0, nr_gparts = 0;
+  size_t nr_parts = 0, nr_gparts = 0, nr_sparts = 0;
   for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
   for (int k = 0; k < nr_nodes; k++)
     nr_gparts += g_counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_sparts += s_counts[k * nr_nodes + nodeID];
 
   /* Allocate the new arrays with some extra margin */
   struct part *parts_new = NULL;
   struct xpart *xparts_new = NULL;
   struct gpart *gparts_new = NULL;
+  struct spart *sparts_new = NULL;
   if (posix_memalign((void **)&parts_new, part_align,
                      sizeof(struct part) * nr_parts *
                          engine_redistribute_alloc_margin) != 0)
@@ -365,17 +552,22 @@ void engine_redistribute(struct engine *e) {
                      sizeof(struct gpart) * nr_gparts *
                          engine_redistribute_alloc_margin) != 0)
     error("Failed to allocate new gpart data.");
+  if (posix_memalign((void **)&sparts_new, spart_align,
+                     sizeof(struct spart) * nr_sparts *
+                         engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new spart data.");
 
   /* Prepare MPI requests for the asynchronous communications */
   MPI_Request *reqs;
-  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) ==
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 8 * nr_nodes)) ==
       NULL)
     error("Failed to allocate MPI request list.");
-  for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
+  for (int k = 0; k < 8 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
 
   /* Emit the sends and recvs for the particle and gparticle data. */
   size_t offset_send = 0, offset_recv = 0;
   size_t g_offset_send = 0, g_offset_recv = 0;
+  size_t s_offset_send = 0, s_offset_recv = 0;
   for (int k = 0; k < nr_nodes; k++) {
 
     /* Indices in the count arrays of the node of interest */
@@ -399,12 +591,12 @@ void engine_redistribute(struct engine *e) {
         /* Else, emit some communications */
       } else {
         if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type,
-                      k, 3 * ind_send + 0, MPI_COMM_WORLD,
-                      &reqs[6 * k]) != MPI_SUCCESS)
+                      k, 4 * ind_send + 0, MPI_COMM_WORLD,
+                      &reqs[8 * k + 0]) != MPI_SUCCESS)
           error("Failed to isend parts to node %i.", k);
         if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type,
-                      k, 3 * ind_send + 1, MPI_COMM_WORLD,
-                      &reqs[6 * k + 1]) != MPI_SUCCESS)
+                      k, 4 * ind_send + 1, MPI_COMM_WORLD,
+                      &reqs[8 * k + 1]) != MPI_SUCCESS)
           error("Failed to isend xparts to node %i.", k);
         offset_send += counts[ind_send];
       }
@@ -425,24 +617,46 @@ void engine_redistribute(struct engine *e) {
         /* Else, emit some communications */
       } else {
         if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send],
-                      gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD,
-                      &reqs[6 * k + 2]) != MPI_SUCCESS)
+                      gpart_mpi_type, k, 4 * ind_send + 2, MPI_COMM_WORLD,
+                      &reqs[8 * k + 2]) != MPI_SUCCESS)
           error("Failed to isend gparts to node %i.", k);
         g_offset_send += g_counts[ind_send];
       }
     }
 
+    /* Are we sending any spart ? */
+    if (s_counts[ind_send] > 0) {
+
+      /* message("Sending %d spart to node %d", s_counts[ind_send], k); */
+
+      /* If the send is to the same node, just copy */
+      if (k == nodeID) {
+        memcpy(&sparts_new[s_offset_recv], &s->sparts[s_offset_send],
+               sizeof(struct spart) * s_counts[ind_recv]);
+        s_offset_send += s_counts[ind_send];
+        s_offset_recv += s_counts[ind_recv];
+
+        /* Else, emit some communications */
+      } else {
+        if (MPI_Isend(&s->sparts[s_offset_send], s_counts[ind_send],
+                      spart_mpi_type, k, 4 * ind_send + 3, MPI_COMM_WORLD,
+                      &reqs[8 * k + 3]) != MPI_SUCCESS)
+          error("Failed to isend gparts to node %i.", k);
+        s_offset_send += s_counts[ind_send];
+      }
+    }
+
     /* Now emit the corresponding Irecv() */
 
     /* Are we receiving any part/xpart from this node ? */
     if (k != nodeID && counts[ind_recv] > 0) {
       if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k,
-                    3 * ind_recv + 0, MPI_COMM_WORLD,
-                    &reqs[6 * k + 3]) != MPI_SUCCESS)
+                    4 * ind_recv + 0, MPI_COMM_WORLD,
+                    &reqs[8 * k + 4]) != MPI_SUCCESS)
         error("Failed to emit irecv of parts from node %i.", k);
       if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type,
-                    k, 3 * ind_recv + 1, MPI_COMM_WORLD,
-                    &reqs[6 * k + 4]) != MPI_SUCCESS)
+                    k, 4 * ind_recv + 1, MPI_COMM_WORLD,
+                    &reqs[8 * k + 5]) != MPI_SUCCESS)
         error("Failed to emit irecv of xparts from node %i.", k);
       offset_recv += counts[ind_recv];
     }
@@ -450,18 +664,27 @@ void engine_redistribute(struct engine *e) {
     /* Are we receiving any gpart from this node ? */
     if (k != nodeID && g_counts[ind_recv] > 0) {
       if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv],
-                    gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD,
-                    &reqs[6 * k + 5]) != MPI_SUCCESS)
+                    gpart_mpi_type, k, 4 * ind_recv + 2, MPI_COMM_WORLD,
+                    &reqs[8 * k + 6]) != MPI_SUCCESS)
         error("Failed to emit irecv of gparts from node %i.", k);
       g_offset_recv += g_counts[ind_recv];
     }
+
+    /* Are we receiving any spart from this node ? */
+    if (k != nodeID && s_counts[ind_recv] > 0) {
+      if (MPI_Irecv(&sparts_new[s_offset_recv], s_counts[ind_recv],
+                    spart_mpi_type, k, 4 * ind_recv + 3, MPI_COMM_WORLD,
+                    &reqs[8 * k + 7]) != MPI_SUCCESS)
+        error("Failed to emit irecv of sparts from node %i.", k);
+      s_offset_recv += s_counts[ind_recv];
+    }
   }
 
   /* Wait for all the sends and recvs to tumble in. */
-  MPI_Status stats[6 * nr_nodes];
+  MPI_Status stats[8 * nr_nodes];
   int res;
-  if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
-    for (int k = 0; k < 6 * nr_nodes; k++) {
+  if ((res = MPI_Waitall(8 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
+    for (int k = 0; k < 8 * nr_nodes; k++) {
       char buff[MPI_MAX_ERROR_STRING];
       MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
       message("request %i has error '%s'.", k, buff);
@@ -469,19 +692,23 @@ void engine_redistribute(struct engine *e) {
     error("Failed during waitall for part data.");
   }
 
-  /* We now need to restore the part<->gpart links */
-  size_t offset_parts = 0, offset_gparts = 0;
+  /* All particles have now arrived. Time for some final operations on the
+     stuff we just received */
+
+  /* Restore the part<->gpart and spart<->gpart links */
+  size_t offset_parts = 0, offset_sparts = 0, offset_gparts = 0;
   for (int node = 0; node < nr_nodes; ++node) {
 
     const int ind_recv = node * nr_nodes + nodeID;
     const size_t count_parts = counts[ind_recv];
     const size_t count_gparts = g_counts[ind_recv];
+    const size_t count_sparts = s_counts[ind_recv];
 
     /* Loop over the gparts received from that node */
     for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) {
 
-      /* Does this gpart have a partner ? */
-      if (gparts_new[k].id_or_neg_offset <= 0) {
+      /* Does this gpart have a gas partner ? */
+      if (gparts_new[k].type == swift_type_gas) {
 
         const ptrdiff_t partner_index =
             offset_parts - gparts_new[k].id_or_neg_offset;
@@ -490,10 +717,22 @@ void engine_redistribute(struct engine *e) {
         gparts_new[k].id_or_neg_offset = -partner_index;
         parts_new[partner_index].gpart = &gparts_new[k];
       }
+
+      /* Does this gpart have a star partner ? */
+      if (gparts_new[k].type == swift_type_star) {
+
+        const ptrdiff_t partner_index =
+            offset_sparts - gparts_new[k].id_or_neg_offset;
+
+        /* Re-link */
+        gparts_new[k].id_or_neg_offset = -partner_index;
+        sparts_new[partner_index].gpart = &gparts_new[k];
+      }
     }
 
     offset_parts += count_parts;
     offset_gparts += count_gparts;
+    offset_sparts += count_sparts;
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -506,41 +745,43 @@ void engine_redistribute(struct engine *e) {
       error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
             cells[cid].nodeID);
   }
-
-  /* Verify that the links are correct */
-  for (size_t k = 0; k < nr_gparts; ++k) {
-
-    if (gparts_new[k].id_or_neg_offset <= 0) {
-
-      struct part *part = &parts_new[-gparts_new[k].id_or_neg_offset];
-
-      if (part->gpart != &gparts_new[k]) error("Linking problem !");
-
-      if (gparts_new[k].x[0] != part->x[0] ||
-          gparts_new[k].x[1] != part->x[1] || gparts_new[k].x[2] != part->x[2])
-        error("Linked particles are not at the same position !");
-    }
+  for (size_t k = 0; k < nr_gparts; k++) {
+    const int cid = cell_getid(cdim, gparts_new[k].x[0] * iwidth[0],
+                               gparts_new[k].x[1] * iwidth[1],
+                               gparts_new[k].x[2] * iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
   }
-  for (size_t k = 0; k < nr_parts; ++k) {
-
-    if (parts_new[k].gpart != NULL &&
-        parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
-      error("Linking problem !");
-    }
+  for (size_t k = 0; k < nr_sparts; k++) {
+    const int cid = cell_getid(cdim, sparts_new[k].x[0] * iwidth[0],
+                               sparts_new[k].x[1] * iwidth[1],
+                               sparts_new[k].x[2] * iwidth[2]);
+    if (cells[cid].nodeID != nodeID)
+      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
+            k, cells[cid].nodeID);
   }
+
+  /* Verify that the links are correct */
+  part_verify_links(parts_new, gparts_new, sparts_new, nr_parts, nr_gparts,
+                    nr_sparts, e->verbose);
 #endif
 
   /* Set the new part data, free the old. */
   free(parts);
   free(xparts);
   free(gparts);
+  free(sparts);
   s->parts = parts_new;
   s->xparts = xparts_new;
   s->gparts = gparts_new;
+  s->sparts = sparts_new;
   s->nr_parts = nr_parts;
   s->nr_gparts = nr_gparts;
+  s->nr_sparts = nr_sparts;
   s->size_parts = engine_redistribute_alloc_margin * nr_parts;
   s->size_gparts = engine_redistribute_alloc_margin * nr_gparts;
+  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts;
 
   /* Clean up the temporary stuff. */
   free(reqs);
@@ -552,8 +793,8 @@ void engine_redistribute(struct engine *e) {
     int my_cells = 0;
     for (int k = 0; k < nr_cells; k++)
       if (cells[k].nodeID == nodeID) my_cells += 1;
-    message("node %i now has %zu parts and %zu gparts in %i cells.", nodeID,
-            nr_parts, nr_gparts, my_cells);
+    message("node %i now has %zu parts, %zu sparts and %zu gparts in %i cells.",
+            nodeID, nr_parts, nr_sparts, nr_gparts, my_cells);
   }
 
   if (e->verbose)
@@ -576,6 +817,10 @@ void engine_repartition(struct engine *e) {
   ticks tic = getticks();
 
 #ifdef SWIFT_DEBUG_CHECKS
+  /* Be verbose about this. */
+  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
+  fflush(stdout);
+
   /* Check that all cells have been drifted to the current time */
   space_check_drift_point(e->s, e->ti_current);
 #endif
@@ -668,20 +913,25 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 
     /* Create the tasks and their dependencies? */
     if (t_xv == NULL) {
-      t_xv = scheduler_addtask(s, task_type_send, task_subtype_none,
-                               4 * ci->tag, 0, ci, cj, 0);
-      t_rho = scheduler_addtask(s, task_type_send, task_subtype_none,
+
+      if (ci->super->drift == NULL)
+        ci->super->drift = scheduler_addtask(
+            s, task_type_drift, task_subtype_none, 0, 0, ci->super, NULL, 0);
+
+      t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag,
+                               0, ci, cj, 0);
+      t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho,
                                 4 * ci->tag + 1, 0, ci, cj, 0);
       t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
                                4 * ci->tag + 2, 0, ci, cj, 0);
 #ifdef EXTRA_HYDRO_LOOP
-      t_gradient = scheduler_addtask(s, task_type_send, task_subtype_none,
+      t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient,
                                      4 * ci->tag + 3, 0, ci, cj, 0);
 #endif
 
 #ifdef EXTRA_HYDRO_LOOP
 
-      scheduler_addunlock(s, t_gradient, ci->super->kick);
+      scheduler_addunlock(s, t_gradient, ci->super->kick2);
 
       scheduler_addunlock(s, ci->super->extra_ghost, t_gradient);
 
@@ -696,17 +946,21 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 
 #else
       /* The send_rho task should unlock the super-cell's kick task. */
-      scheduler_addunlock(s, t_rho, ci->super->kick);
+      scheduler_addunlock(s, t_rho, ci->super->kick2);
 
       /* The send_rho task depends on the cell's ghost task. */
       scheduler_addunlock(s, ci->super->ghost, t_rho);
 
       /* The send_xv task should unlock the super-cell's ghost task. */
       scheduler_addunlock(s, t_xv, ci->super->ghost);
+
 #endif
 
-      /* The super-cell's kick task should unlock the send_ti task. */
-      if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick, t_ti);
+      /* Drift before you send */
+      scheduler_addunlock(s, ci->super->drift, t_xv);
+
+      /* The super-cell's timestep task should unlock the send_ti task. */
+      scheduler_addunlock(s, ci->super->timestep, t_ti);
     }
 
     /* Add them to the local cell. */
@@ -715,7 +969,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 #ifdef EXTRA_HYDRO_LOOP
     engine_addlink(e, &ci->send_gradient, t_gradient);
 #endif
-    if (t_ti != NULL) engine_addlink(e, &ci->send_ti, t_ti);
+    engine_addlink(e, &ci->send_ti, t_ti);
   }
 
   /* Recurse? */
@@ -753,14 +1007,14 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   if (t_xv == NULL && c->density != NULL) {
 
     /* Create the tasks. */
-    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 4 * c->tag,
-                             0, c, NULL, 0);
-    t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none,
+    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0,
+                             c, NULL, 0);
+    t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho,
                               4 * c->tag + 1, 0, c, NULL, 0);
     t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
                              4 * c->tag + 2, 0, c, NULL, 0);
 #ifdef EXTRA_HYDRO_LOOP
-    t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_none,
+    t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient,
                                    4 * c->tag + 3, 0, c, NULL, 0);
 #endif
   }
@@ -781,7 +1035,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   }
   for (struct link *l = c->force; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_gradient, l->t);
-    if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti);
+    scheduler_addunlock(s, l->t, t_ti);
   }
   if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
 #else
@@ -791,7 +1045,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
   }
   for (struct link *l = c->force; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_rho, l->t);
-    if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti);
+    scheduler_addunlock(s, l->t, t_ti);
   }
   if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
 #endif
@@ -894,11 +1148,12 @@ void engine_exchange_cells(struct engine *e) {
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
-  size_t count_parts_in = 0, count_gparts_in = 0;
+  size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0;
   for (int k = 0; k < nr_proxies; k++)
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
       count_parts_in += e->proxies[k].cells_in[j]->count;
       count_gparts_in += e->proxies[k].cells_in[j]->gcount;
+      count_sparts_in += e->proxies[k].cells_in[j]->scount;
     }
   if (count_parts_in > s->size_parts_foreign) {
     if (s->parts_foreign != NULL) free(s->parts_foreign);
@@ -914,20 +1169,31 @@ void engine_exchange_cells(struct engine *e) {
                        sizeof(struct gpart) * s->size_gparts_foreign) != 0)
       error("Failed to allocate foreign gpart data.");
   }
+  if (count_sparts_in > s->size_sparts_foreign) {
+    if (s->sparts_foreign != NULL) free(s->sparts_foreign);
+    s->size_sparts_foreign = 1.1 * count_sparts_in;
+    if (posix_memalign((void **)&s->sparts_foreign, spart_align,
+                       sizeof(struct spart) * s->size_sparts_foreign) != 0)
+      error("Failed to allocate foreign spart data.");
+  }
 
   /* Unpack the cells and link to the particle data. */
   struct part *parts = s->parts_foreign;
   struct gpart *gparts = s->gparts_foreign;
+  struct spart *sparts = s->sparts_foreign;
   for (int k = 0; k < nr_proxies; k++) {
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
       cell_link_parts(e->proxies[k].cells_in[j], parts);
       cell_link_gparts(e->proxies[k].cells_in[j], gparts);
+      cell_link_sparts(e->proxies[k].cells_in[j], sparts);
       parts = &parts[e->proxies[k].cells_in[j]->count];
       gparts = &gparts[e->proxies[k].cells_in[j]->gcount];
+      sparts = &sparts[e->proxies[k].cells_in[j]->scount];
     }
   }
   s->nr_parts_foreign = parts - s->parts_foreign;
   s->nr_gparts_foreign = gparts - s->gparts_foreign;
+  s->nr_sparts_foreign = sparts - s->sparts_foreign;
 
   /* Free the pcell buffer. */
   free(pcells);
@@ -942,7 +1208,7 @@ void engine_exchange_cells(struct engine *e) {
 }
 
 /**
- * @brief Exchange straying parts with other nodes.
+ * @brief Exchange straying particles with other nodes.
  *
  * @param e The #engine.
  * @param offset_parts The index in the parts array as of which the foreign
@@ -955,13 +1221,20 @@ void engine_exchange_cells(struct engine *e) {
  * @param ind_gpart The foreign #cell ID of each gpart.
  * @param Ngpart The number of stray gparts, contains the number of gparts
  *        received on return.
+ * @param offset_sparts The index in the sparts array as of which the foreign
+ *        parts reside.
+ * @param ind_spart The foreign #cell ID of each spart.
+ * @param Nspart The number of stray sparts, contains the number of sparts
+ *        received on return.
  *
  * Note that this function does not mess-up the linkage between parts and
  * gparts, i.e. the received particles have correct linkeage.
  */
 void engine_exchange_strays(struct engine *e, size_t offset_parts,
                             int *ind_part, size_t *Npart, size_t offset_gparts,
-                            int *ind_gpart, size_t *Ngpart) {
+                            int *ind_gpart, size_t *Ngpart,
+                            size_t offset_sparts, int *ind_spart,
+                            size_t *Nspart) {
 
 #ifdef WITH_MPI
 
@@ -972,9 +1245,10 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   for (int k = 0; k < e->nr_proxies; k++) {
     e->proxies[k].nr_parts_out = 0;
     e->proxies[k].nr_gparts_out = 0;
+    e->proxies[k].nr_sparts_out = 0;
   }
 
-  /* Put the parts and gparts into the corresponding proxies. */
+  /* Put the parts into the corresponding proxies. */
   for (size_t k = 0; k < *Npart; k++) {
     /* Get the target node and proxy ID. */
     const int node_id = e->s->cells_top[ind_part[k]].nodeID;
@@ -1000,6 +1274,32 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
     proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k],
                      &s->xparts[offset_parts + k], 1);
   }
+
+  /* Put the sparts into the corresponding proxies. */
+  for (size_t k = 0; k < *Nspart; k++) {
+    const int node_id = e->s->cells_top[ind_spart[k]].nodeID;
+    if (node_id < 0 || node_id >= e->nr_nodes)
+      error("Bad node ID %i.", node_id);
+    const int pid = e->proxy_ind[node_id];
+    if (pid < 0)
+      error(
+          "Do not have a proxy for the requested nodeID %i for part with "
+          "id=%lld, x=[%e,%e,%e].",
+          node_id, s->sparts[offset_sparts + k].id,
+          s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1],
+          s->sparts[offset_sparts + k].x[2]);
+
+    /* Re-link the associated gpart with the buffer offset of the spart. */
+    if (s->sparts[offset_sparts + k].gpart != NULL) {
+      s->sparts[offset_sparts + k].gpart->id_or_neg_offset =
+          -e->proxies[pid].nr_sparts_out;
+    }
+
+    /* Load the spart into the proxy */
+    proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1);
+  }
+
+  /* Put the gparts into the corresponding proxies. */
   for (size_t k = 0; k < *Ngpart; k++) {
     const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
@@ -1009,15 +1309,17 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
       error(
           "Do not have a proxy for the requested nodeID %i for part with "
           "id=%lli, x=[%e,%e,%e].",
-          node_id, s->gparts[offset_parts + k].id_or_neg_offset,
-          s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1],
+          node_id, s->gparts[offset_gparts + k].id_or_neg_offset,
+          s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1],
           s->gparts[offset_gparts + k].x[2]);
+
+    /* Load the gpart into the proxy */
     proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1);
   }
 
   /* Launch the proxies. */
-  MPI_Request reqs_in[3 * engine_maxproxies];
-  MPI_Request reqs_out[3 * engine_maxproxies];
+  MPI_Request reqs_in[4 * engine_maxproxies];
+  MPI_Request reqs_out[4 * engine_maxproxies];
   for (int k = 0; k < e->nr_proxies; k++) {
     proxy_parts_exch1(&e->proxies[k]);
     reqs_in[k] = e->proxies[k].req_parts_count_in;
@@ -1043,14 +1345,19 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
      enough space to accommodate them. */
   int count_parts_in = 0;
   int count_gparts_in = 0;
+  int count_sparts_in = 0;
   for (int k = 0; k < e->nr_proxies; k++) {
     count_parts_in += e->proxies[k].nr_parts_in;
     count_gparts_in += e->proxies[k].nr_gparts_in;
+    count_sparts_in += e->proxies[k].nr_sparts_in;
   }
   if (e->verbose) {
-    message("sent out %zu/%zu parts/gparts, got %i/%i back.", *Npart, *Ngpart,
-            count_parts_in, count_gparts_in);
+    message("sent out %zu/%zu/%zu parts/gparts/sparts, got %i/%i/%i back.",
+            *Npart, *Ngpart, *Nspart, count_parts_in, count_gparts_in,
+            count_sparts_in);
   }
+
+  /* Reallocate the particle arrays if necessary */
   if (offset_parts + count_parts_in > s->size_parts) {
     message("re-allocating parts array.");
     s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow;
@@ -1073,6 +1380,22 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
       }
     }
   }
+  if (offset_sparts + count_sparts_in > s->size_sparts) {
+    message("re-allocating sparts array.");
+    s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow;
+    struct spart *sparts_new = NULL;
+    if (posix_memalign((void **)&sparts_new, spart_align,
+                       sizeof(struct spart) * s->size_sparts) != 0)
+      error("Failed to allocate new spart data.");
+    memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts);
+    free(s->sparts);
+    s->sparts = sparts_new;
+    for (size_t k = 0; k < offset_sparts; k++) {
+      if (s->sparts[k].gpart != NULL) {
+        s->sparts[k].gpart->id_or_neg_offset = -k;
+      }
+    }
+  }
   if (offset_gparts + count_gparts_in > s->size_gparts) {
     message("re-allocating gparts array.");
     s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow;
@@ -1083,9 +1406,12 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
     memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts);
     free(s->gparts);
     s->gparts = gparts_new;
+
     for (size_t k = 0; k < offset_gparts; k++) {
-      if (s->gparts[k].id_or_neg_offset < 0) {
+      if (s->gparts[k].type == swift_type_gas) {
         s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      } else if (s->gparts[k].type == swift_type_star) {
+        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
       }
     }
   }
@@ -1094,39 +1420,52 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   int nr_in = 0, nr_out = 0;
   for (int k = 0; k < e->nr_proxies; k++) {
     if (e->proxies[k].nr_parts_in > 0) {
-      reqs_in[3 * k] = e->proxies[k].req_parts_in;
-      reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in;
+      reqs_in[4 * k] = e->proxies[k].req_parts_in;
+      reqs_in[4 * k + 1] = e->proxies[k].req_xparts_in;
       nr_in += 2;
     } else {
-      reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL;
+      reqs_in[4 * k] = reqs_in[4 * k + 1] = MPI_REQUEST_NULL;
     }
     if (e->proxies[k].nr_gparts_in > 0) {
-      reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in;
+      reqs_in[4 * k + 2] = e->proxies[k].req_gparts_in;
+      nr_in += 1;
+    } else {
+      reqs_in[4 * k + 2] = MPI_REQUEST_NULL;
+    }
+    if (e->proxies[k].nr_sparts_in > 0) {
+      reqs_in[4 * k + 3] = e->proxies[k].req_sparts_in;
       nr_in += 1;
     } else {
-      reqs_in[3 * k + 2] = MPI_REQUEST_NULL;
+      reqs_in[4 * k + 3] = MPI_REQUEST_NULL;
     }
+
     if (e->proxies[k].nr_parts_out > 0) {
-      reqs_out[3 * k] = e->proxies[k].req_parts_out;
-      reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out;
+      reqs_out[4 * k] = e->proxies[k].req_parts_out;
+      reqs_out[4 * k + 1] = e->proxies[k].req_xparts_out;
       nr_out += 2;
     } else {
-      reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL;
+      reqs_out[4 * k] = reqs_out[4 * k + 1] = MPI_REQUEST_NULL;
     }
     if (e->proxies[k].nr_gparts_out > 0) {
-      reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out;
+      reqs_out[4 * k + 2] = e->proxies[k].req_gparts_out;
       nr_out += 1;
     } else {
-      reqs_out[3 * k + 2] = MPI_REQUEST_NULL;
+      reqs_out[4 * k + 2] = MPI_REQUEST_NULL;
+    }
+    if (e->proxies[k].nr_sparts_out > 0) {
+      reqs_out[4 * k + 3] = e->proxies[k].req_sparts_out;
+      nr_out += 1;
+    } else {
+      reqs_out[4 * k + 3] = MPI_REQUEST_NULL;
     }
   }
 
   /* Wait for each part array to come in and collect the new
      parts from the proxies. */
-  int count_parts = 0, count_gparts = 0;
+  int count_parts = 0, count_gparts = 0, count_sparts = 0;
   for (int k = 0; k < nr_in; k++) {
     int err, pid;
-    if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid,
+    if ((err = MPI_Waitany(4 * e->nr_proxies, reqs_in, &pid,
                            MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
       char buff[MPI_MAX_ERROR_STRING];
       int res;
@@ -1134,21 +1473,24 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
       error("MPI_Waitany failed (%s).", buff);
     }
     if (pid == MPI_UNDEFINED) break;
-    // message( "request from proxy %i has arrived." , pid / 3 );
-    pid = 3 * (pid / 3);
+    // message( "request from proxy %i has arrived." , pid / 4 );
+    pid = 4 * (pid / 4);
 
     /* If all the requests for a given proxy have arrived... */
     if (reqs_in[pid + 0] == MPI_REQUEST_NULL &&
         reqs_in[pid + 1] == MPI_REQUEST_NULL &&
-        reqs_in[pid + 2] == MPI_REQUEST_NULL) {
+        reqs_in[pid + 2] == MPI_REQUEST_NULL &&
+        reqs_in[pid + 3] == MPI_REQUEST_NULL) {
       /* Copy the particle data to the part/xpart/gpart arrays. */
-      struct proxy *prox = &e->proxies[pid / 3];
+      struct proxy *prox = &e->proxies[pid / 4];
       memcpy(&s->parts[offset_parts + count_parts], prox->parts_in,
              sizeof(struct part) * prox->nr_parts_in);
       memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in,
              sizeof(struct xpart) * prox->nr_parts_in);
       memcpy(&s->gparts[offset_gparts + count_gparts], prox->gparts_in,
              sizeof(struct gpart) * prox->nr_gparts_in);
+      memcpy(&s->sparts[offset_sparts + count_sparts], prox->sparts_in,
+             sizeof(struct spart) * prox->nr_sparts_in);
       /* for (int k = offset; k < offset + count; k++)
          message(
             "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.",
@@ -1158,23 +1500,30 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
       /* Re-link the gparts. */
       for (int kk = 0; kk < prox->nr_gparts_in; kk++) {
         struct gpart *gp = &s->gparts[offset_gparts + count_gparts + kk];
-        if (gp->id_or_neg_offset <= 0) {
+
+        if (gp->type == swift_type_gas) {
           struct part *p =
-              &s->parts[offset_gparts + count_parts - gp->id_or_neg_offset];
+              &s->parts[offset_parts + count_parts - gp->id_or_neg_offset];
           gp->id_or_neg_offset = s->parts - p;
           p->gpart = gp;
+        } else if (gp->type == swift_type_star) {
+          struct spart *sp =
+              &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset];
+          gp->id_or_neg_offset = s->sparts - sp;
+          sp->gpart = gp;
         }
       }
 
       /* Advance the counters. */
       count_parts += prox->nr_parts_in;
       count_gparts += prox->nr_gparts_in;
+      count_sparts += prox->nr_sparts_in;
     }
   }
 
   /* Wait for all the sends to have finished too. */
   if (nr_out > 0)
-    if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
+    if (MPI_Waitall(4 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
         MPI_SUCCESS)
       error("MPI_Waitall on sends failed.");
 
@@ -1185,6 +1534,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
   /* Return the number of harvested parts. */
   *Npart = count_parts;
   *Ngpart = count_gparts;
+  *Nspart = count_sparts;
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -1441,7 +1791,7 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched,
 
   /* init --> gravity --> kick */
   scheduler_addunlock(sched, c->super->init, gravity);
-  scheduler_addunlock(sched, gravity, c->super->kick);
+  scheduler_addunlock(sched, gravity, c->super->kick2);
 
   /* grav_up --> gravity ( --> kick) */
   scheduler_addunlock(sched, c->super->grav_up, gravity);
@@ -1460,7 +1810,7 @@ static inline void engine_make_external_gravity_dependencies(
 
   /* init --> external gravity --> kick */
   scheduler_addunlock(sched, c->super->init, gravity);
-  scheduler_addunlock(sched, gravity, c->super->kick);
+  scheduler_addunlock(sched, gravity, c->super->kick2);
 }
 
 /**
@@ -1499,7 +1849,7 @@ void engine_link_gravity_tasks(struct engine *e) {
 
       /* Gather the multipoles --> mm interaction --> kick */
       scheduler_addunlock(sched, gather, t);
-      scheduler_addunlock(sched, t, t->ci->super->kick);
+      scheduler_addunlock(sched, t, t->ci->super->kick2);
 
       /* init --> mm interaction */
       scheduler_addunlock(sched, t->ci->super->init, t);
@@ -1578,19 +1928,24 @@ void engine_link_gravity_tasks(struct engine *e) {
  * @param force The force task to link.
  * @param c The cell.
  */
-static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
-                                                        struct task *density,
-                                                        struct task *gradient,
-                                                        struct task *force,
-                                                        struct cell *c) {
+static inline void engine_make_hydro_loops_dependencies(
+    struct scheduler *sched, struct task *density, struct task *gradient,
+    struct task *force, struct cell *c, int with_cooling) {
   /* init --> density loop --> ghost --> gradient loop --> extra_ghost */
-  /* extra_ghost --> force loop --> kick */
+  /* extra_ghost --> force loop  */
   scheduler_addunlock(sched, c->super->init, density);
   scheduler_addunlock(sched, density, c->super->ghost);
   scheduler_addunlock(sched, c->super->ghost, gradient);
   scheduler_addunlock(sched, gradient, c->super->extra_ghost);
   scheduler_addunlock(sched, c->super->extra_ghost, force);
-  scheduler_addunlock(sched, force, c->super->kick);
+
+  if (with_cooling) {
+    /* force loop --> cooling (--> kick2)  */
+    scheduler_addunlock(sched, force, c->super->cooling);
+  } else {
+    /* force loop --> kick2 */
+    scheduler_addunlock(sched, force, c->super->kick2);
+  }
 }
 
 #else
@@ -1602,16 +1957,25 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
  * @param density The density task to link.
  * @param force The force task to link.
  * @param c The cell.
+ * @param with_cooling Are we running with cooling switched on ?
  */
 static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
                                                         struct task *density,
                                                         struct task *force,
-                                                        struct cell *c) {
-  /* init --> density loop --> ghost --> force loop --> kick */
+                                                        struct cell *c,
+                                                        int with_cooling) {
+  /* init --> density loop --> ghost --> force loop */
   scheduler_addunlock(sched, c->super->init, density);
   scheduler_addunlock(sched, density, c->super->ghost);
   scheduler_addunlock(sched, c->super->ghost, force);
-  scheduler_addunlock(sched, force, c->super->kick);
+
+  if (with_cooling) {
+    /* force loop --> cooling (--> kick2)  */
+    scheduler_addunlock(sched, force, c->super->cooling);
+  } else {
+    /* force loop --> kick2 */
+    scheduler_addunlock(sched, force, c->super->kick2);
+  }
 }
 
 #endif
@@ -1632,6 +1996,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nr_tasks = sched->nr_tasks;
   const int nodeID = e->nodeID;
+  const int with_cooling = (e->policy & engine_policy_cooling);
 
   for (int ind = 0; ind < nr_tasks; ind++) {
     struct task *t = &sched->tasks[ind];
@@ -1651,7 +2016,8 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       engine_addlink(e, &t->ci->force, t3);
 
       /* Now, build all the dependencies for the hydro */
-      engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+      engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
+                                           with_cooling);
 
 #else
 
@@ -1663,7 +2029,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       engine_addlink(e, &t->ci->force, t2);
 
       /* Now, build all the dependencies for the hydro */
-      engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+      engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
 #endif
     }
 
@@ -1686,10 +2052,12 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
+                                             with_cooling);
       }
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj,
+                                             with_cooling);
       }
 
 #else
@@ -1705,10 +2073,10 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
       }
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling);
       }
 
 #endif
@@ -1736,7 +2104,8 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
+                                             with_cooling);
       }
 
 #else
@@ -1751,7 +2120,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
       }
 #endif
     }
@@ -1779,10 +2148,12 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
+                                             with_cooling);
       }
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj,
+                                             with_cooling);
       }
 
 #else
@@ -1798,25 +2169,13 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       /* Now, build all the dependencies for the hydro for the cells */
       /* that are local and are not descendant of the same super-cells */
       if (t->ci->nodeID == nodeID) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
       }
       if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj);
+        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling);
       }
 #endif
     }
-    /* Cooling tasks should depend on kick and unlock sourceterms */
-    else if (t->type == task_type_cooling) {
-      scheduler_addunlock(sched, t->ci->kick, t);
-    }
-    /* source terms depend on cooling if performed, else on kick. It is the last
-       task */
-    else if (t->type == task_type_sourceterms) {
-      if (e->policy == engine_policy_cooling)
-        scheduler_addunlock(sched, t->ci->cooling, t);
-      else
-        scheduler_addunlock(sched, t->ci->kick, t);
-    }
   }
 }
 
@@ -1985,9 +2344,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
                              void *extra_data) {
   /* Unpack the arguments. */
   struct task *tasks = (struct task *)map_data;
-  const int ti_end = ((size_t *)extra_data)[0];
   size_t *rebuild_space = &((size_t *)extra_data)[1];
   struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]);
+  struct engine *e = (struct engine *)((size_t *)extra_data)[0];
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct task *t = &tasks[ind];
@@ -1998,7 +2357,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         t->type == task_type_sourceterms || t->type == task_type_sub_self) {
 
       /* Set this task's skip. */
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Pair? */
@@ -2016,7 +2375,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         *rebuild_space = 1;
 
       /* Set this task's skip, otherwise nothing to do. */
-      if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end)
+      if (cell_is_active(t->ci, e) || cell_is_active(t->cj, e))
         scheduler_activate(s, t);
       else
         continue;
@@ -2043,8 +2402,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
 
         /* Activate the tasks to recv foreign cell ci's data. */
         scheduler_activate(s, ci->recv_xv);
-        scheduler_activate(s, ci->recv_rho);
-        scheduler_activate(s, ci->recv_ti);
+        if (cell_is_active(ci, e)) {
+          scheduler_activate(s, ci->recv_rho);
+          scheduler_activate(s, ci->recv_ti);
+        }
 
         /* Look for the local cell cj's send tasks. */
         struct link *l = NULL;
@@ -2054,24 +2415,33 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (l == NULL) error("Missing link to send_xv task.");
         scheduler_activate(s, l->t);
 
-        for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (cj->super->drift)
+          scheduler_activate(s, cj->super->drift);
+        else
+          error("Drift task missing !");
+
+        if (cell_is_active(cj, e)) {
+          for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
 
       } else if (cj->nodeID != engine_rank) {
 
         /* Activate the tasks to recv foreign cell cj's data. */
         scheduler_activate(s, cj->recv_xv);
-        scheduler_activate(s, cj->recv_rho);
-        scheduler_activate(s, cj->recv_ti);
+        if (cell_is_active(cj, e)) {
+          scheduler_activate(s, cj->recv_rho);
+          scheduler_activate(s, cj->recv_ti);
+        }
 
         /* Look for the local cell ci's send tasks. */
         struct link *l = NULL;
@@ -2081,32 +2451,41 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (l == NULL) error("Missing link to send_xv task.");
         scheduler_activate(s, l->t);
 
-        for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_rho task.");
-        scheduler_activate(s, l->t);
-
-        for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
-             l = l->next)
-          ;
-        if (l == NULL) error("Missing link to send_ti task.");
-        scheduler_activate(s, l->t);
+        if (ci->super->drift)
+          scheduler_activate(s, ci->super->drift);
+        else
+          error("Drift task missing !");
+
+        if (cell_is_active(ci, e)) {
+          for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_rho task.");
+          scheduler_activate(s, l->t);
+
+          for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
+               l = l->next)
+            ;
+          if (l == NULL) error("Missing link to send_ti task.");
+          scheduler_activate(s, l->t);
+        }
       }
 
 #endif
     }
 
-    /* Kick? */
-    else if (t->type == task_type_kick) {
-      t->ci->updated = 0;
-      t->ci->g_updated = 0;
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
+    /* Kick/Drift/Init? */
+    else if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
+             t->type == task_type_drift || t->type == task_type_init) {
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
-    /* Init? */
-    else if (t->type == task_type_init) {
-      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
+    /* Time-step? */
+    else if (t->type == task_type_timestep) {
+      t->ci->updated = 0;
+      t->ci->g_updated = 0;
+      t->ci->s_updated = 0;
+      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Tasks with no cells should not be skipped? */
@@ -2129,7 +2508,7 @@ int engine_marktasks(struct engine *e) {
   int rebuild_space = 0;
 
   /* Run through the tasks and mark as skip or not. */
-  size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched};
+  size_t extra_data[3] = {(size_t)e, rebuild_space, (size_t)&e->sched};
   threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks,
                  sizeof(struct task), 10000, extra_data);
   rebuild_space = extra_data[1];
@@ -2225,14 +2604,20 @@ void engine_rebuild(struct engine *e) {
  * @brief Prepare the #engine by re-building the cells and tasks.
  *
  * @param e The #engine to prepare.
- * @param nodrift Whether to drift particles before rebuilding or not. Will
+ * @param drift_all Whether to drift particles before rebuilding or not. Will
  *                not be necessary if all particles have already been
  *                drifted (before repartitioning for instance).
+ * @param postrepart If we have just repartitioned, if so we need to defer the
+ *                   skip until after the rebuild and not check the if all
+ *                   cells have been drifted.
  */
-void engine_prepare(struct engine *e, int nodrift) {
+void engine_prepare(struct engine *e, int drift_all, int postrepart) {
 
   TIMER_TIC;
 
+  /* Unskip active tasks and check for rebuild */
+  if (!postrepart) engine_unskip(e);
+
   /* Run through the tasks and mark as skip or not. */
   int rebuild = e->forcerebuild;
 
@@ -2249,21 +2634,18 @@ void engine_prepare(struct engine *e, int nodrift) {
   if (rebuild) {
 
     /* Drift all particles to the current time if needed. */
-    if (!nodrift) {
-      e->drift_all = 1;
-      engine_drift(e);
-
-      /* Restore the default drifting policy */
-      e->drift_all = (e->policy & engine_policy_drift_all);
-    }
+    if (drift_all) engine_drift_all(e);
 
 #ifdef SWIFT_DEBUG_CHECKS
-    /* Check that all cells have been drifted to the current time */
-    space_check_drift_point(e->s, e->ti_current);
+    /* Check that all cells have been drifted to the current time, unless
+     * we have just repartitioned, that can include cells that have not
+     * previously been active on this rank. */
+    if (!postrepart) space_check_drift_point(e->s, e->ti_current);
 #endif
 
     engine_rebuild(e);
   }
+  if (postrepart) engine_unskip(e);
 
   /* Re-rank the tasks every now and then. */
   if (e->tasks_age % engine_tasksreweight == 1) {
@@ -2324,36 +2706,35 @@ void engine_barrier(struct engine *e, int tid) {
  */
 void engine_collect_kick(struct cell *c) {
 
-  /* Skip super-cells (Their values are already set) */
-  if (c->kick != NULL) return;
+/* Skip super-cells (Their values are already set) */
+#ifdef WITH_MPI
+  if (c->timestep != NULL || c->recv_ti != NULL) return;
+#else
+  if (c->timestep != NULL) return;
+#endif /* WITH_MPI */
 
   /* Counters for the different quantities. */
-  int updated = 0, g_updated = 0;
-  int ti_end_min = max_nr_timesteps;
-
-  /* Only do something is the cell is non-empty */
-  if (c->count != 0 || c->gcount != 0) {
-
-    /* If this cell is not split, I'm in trouble. */
-    if (!c->split) error("Cell is not split.");
+  int updated = 0, g_updated = 0, s_updated = 0;
+  integertime_t ti_end_min = max_nr_timesteps;
 
-    /* Collect the values from the progeny. */
-    for (int k = 0; k < 8; k++) {
-      struct cell *cp = c->progeny[k];
-      if (cp != NULL) {
+  /* Collect the values from the progeny. */
+  for (int k = 0; k < 8; k++) {
+    struct cell *cp = c->progeny[k];
+    if (cp != NULL && (cp->count > 0 || cp->gcount > 0 || cp->scount > 0)) {
 
-        /* Recurse */
-        engine_collect_kick(cp);
+      /* Recurse */
+      engine_collect_kick(cp);
 
-        /* And update */
-        ti_end_min = min(ti_end_min, cp->ti_end_min);
-        updated += cp->updated;
-        g_updated += cp->g_updated;
+      /* And update */
+      ti_end_min = min(ti_end_min, cp->ti_end_min);
+      updated += cp->updated;
+      g_updated += cp->g_updated;
+      s_updated += cp->s_updated;
 
-        /* Collected, so clear for next time. */
-        cp->updated = 0;
-        cp->g_updated = 0;
-      }
+      /* Collected, so clear for next time. */
+      cp->updated = 0;
+      cp->g_updated = 0;
+      cp->s_updated = 0;
     }
   }
 
@@ -2361,6 +2742,7 @@ void engine_collect_kick(struct cell *c) {
   c->ti_end_min = ti_end_min;
   c->updated = updated;
   c->g_updated = g_updated;
+  c->s_updated = s_updated;
 }
 
 /**
@@ -2372,14 +2754,14 @@ void engine_collect_kick(struct cell *c) {
 void engine_collect_timestep(struct engine *e) {
 
   const ticks tic = getticks();
-  int updates = 0, g_updates = 0;
-  int ti_end_min = max_nr_timesteps;
+  int updates = 0, g_updates = 0, s_updates = 0;
+  integertime_t ti_end_min = max_nr_timesteps;
   const struct space *s = e->s;
 
   /* Collect the cell data. */
-  for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells_top[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells_top[k];
+  for (int k = 0; k < s->nr_cells; k++) {
+    struct cell *c = &s->cells_top[k];
+    if (c->count > 0 || c->gcount > 0 || c->scount > 0) {
 
       /* Make the top-cells recurse */
       engine_collect_kick(c);
@@ -2388,38 +2770,44 @@ void engine_collect_timestep(struct engine *e) {
       ti_end_min = min(ti_end_min, c->ti_end_min);
       updates += c->updated;
       g_updates += c->g_updated;
+      s_updates += c->s_updated;
 
       /* Collected, so clear for next time. */
       c->updated = 0;
       c->g_updated = 0;
+      c->s_updated = 0;
     }
+  }
 
 /* Aggregate the data from the different nodes. */
 #ifdef WITH_MPI
   {
-    int in_i[1], out_i[1];
+    integertime_t in_i[1], out_i[1];
     in_i[0] = 0;
     out_i[0] = ti_end_min;
-    if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) !=
-        MPI_SUCCESS)
+    if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN,
+                      MPI_COMM_WORLD) != MPI_SUCCESS)
       error("Failed to aggregate t_end_min.");
     ti_end_min = in_i[0];
   }
   {
-    unsigned long long in_ll[2], out_ll[2];
+    long long in_ll[3], out_ll[3];
     out_ll[0] = updates;
     out_ll[1] = g_updates;
-    if (MPI_Allreduce(out_ll, in_ll, 2, MPI_LONG_LONG_INT, MPI_SUM,
+    out_ll[2] = s_updates;
+    if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM,
                       MPI_COMM_WORLD) != MPI_SUCCESS)
       error("Failed to aggregate energies.");
     updates = in_ll[0];
     g_updates = in_ll[1];
+    s_updates = in_ll[2];
   }
 #endif
 
   e->ti_end_min = ti_end_min;
   e->updates = updates;
   e->g_updates = g_updates;
+  e->s_updates = s_updates;
 
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -2463,7 +2851,7 @@ void engine_print_stats(struct engine *e) {
 }
 
 /**
- * @brief Sets all the force and kick tasks to be skipped.
+ * @brief Sets all the force, drift and kick tasks to be skipped.
  *
  * @param e The #engine to act on.
  */
@@ -2477,12 +2865,33 @@ void engine_skip_force_and_kick(struct engine *e) {
     struct task *t = &tasks[i];
 
     /* Skip everything that updates the particles */
-    if (t->subtype == task_subtype_force || t->type == task_type_kick ||
-        t->type == task_type_cooling || t->type == task_type_sourceterms)
+    if (t->type == task_type_drift || t->type == task_type_kick1 ||
+        t->type == task_type_kick2 || t->type == task_type_timestep ||
+        t->subtype == task_subtype_force || t->type == task_type_cooling ||
+        t->type == task_type_sourceterms)
       t->skip = 1;
   }
 }
 
+/**
+ * @brief Sets all the drift and first kick tasks to be skipped.
+ *
+ * @param e The #engine to act on.
+ */
+void engine_skip_drift_and_kick(struct engine *e) {
+
+  struct task *tasks = e->sched.tasks;
+  const int nr_tasks = e->sched.nr_tasks;
+
+  for (int i = 0; i < nr_tasks; ++i) {
+
+    struct task *t = &tasks[i];
+
+    /* Skip everything that updates the particles */
+    if (t->type == task_type_drift || t->type == task_type_kick1) t->skip = 1;
+  }
+}
+
 /**
  * @brief Launch the runners.
  *
@@ -2538,9 +2947,9 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
   struct clocks_time time1, time2;
   clocks_gettime(&time1);
 
-  if (e->nodeID == 0) message("Running initialisation fake time-step.");
+  if (e->nodeID == 0) message("Computing initial gas densities.");
 
-  engine_prepare(e, 1);
+  engine_prepare(e, 0, 0);
 
   engine_marktasks(e);
 
@@ -2555,8 +2964,12 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
   /* Apply some conversions (e.g. internal energy -> entropy) */
   if (!flag_entropy_ICs) {
 
+    if (e->nodeID == 0) message("Converting internal energy variable.");
+
     /* Apply the conversion */
-    space_map_cells_pre(s, 0, cell_convert_hydro, NULL);
+    // space_map_cells_pre(s, 0, cell_convert_hydro, NULL);
+    for (size_t i = 0; i < s->nr_parts; ++i)
+      hydro_convert_quantities(&s->parts[i], &s->xparts[i]);
 
     /* Correct what we did (e.g. in PE-SPH, need to recompute rho_bar) */
     if (hydro_need_extra_init_loop) {
@@ -2566,10 +2979,25 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
     }
   }
 
+  /* Now time to get ready for the first time-step */
+  if (e->nodeID == 0) message("Running initial fake time-step.");
+
+  engine_marktasks(e);
+
+  engine_skip_drift_and_kick(e);
+
+  engine_launch(e, e->nr_threads);
+
   clocks_gettime(&time2);
 
+#ifdef SWIFT_DEBUG_CHECKS
+  space_check_timesteps(e->s);
+  part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts,
+                    e->s->nr_gparts, e->s->nr_sparts, e->verbose);
+#endif
+
   /* Ready to go */
-  e->step = -1;
+  e->step = 0;
   e->forcerebuild = 1;
   e->wallclock_time = (float)clocks_diff(&time1, &time2);
 
@@ -2649,11 +3077,7 @@ void engine_step(struct engine *e, struct repartition *repartition) {
     snapshot_drift_time = e->timeStep;
 
     /* Drift everybody to the snapshot position */
-    e->drift_all = 1;
-    engine_drift(e);
-
-    /* Restore the default drifting policy */
-    e->drift_all = (e->policy & engine_policy_drift_all);
+    engine_drift_all(e);
 
     /* Dump... */
     engine_dump_snapshot(e);
@@ -2673,43 +3097,42 @@ void engine_step(struct engine *e, struct repartition *repartition) {
   if (e->nodeID == 0) {
 
     /* Print some information to the screen */
-    printf("  %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time,
-           e->timeStep, e->updates, e->g_updates, e->wallclock_time);
+    printf("  %6d %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->time,
+           e->timeStep, e->updates, e->g_updates, e->s_updates,
+           e->wallclock_time);
     fflush(stdout);
 
-    fprintf(e->file_timesteps, "  %6d %14e %14e %10zu %10zu %21.3f\n", e->step,
-            e->time, e->timeStep, e->updates, e->g_updates, e->wallclock_time);
+    fprintf(e->file_timesteps, "  %6d %14e %14e %10zu %10zu %10zu %21.3f\n",
+            e->step, e->time, e->timeStep, e->updates, e->g_updates,
+            e->s_updates, e->wallclock_time);
     fflush(e->file_timesteps);
   }
 
   /* Drift only the necessary particles, that means all particles
    * if we are about to repartition. */
   const int repart = (e->forcerepart != REPART_NONE);
-  e->drift_all = repart || e->drift_all;
-  engine_drift(e);
+  const int drift_all = (e->policy & engine_policy_drift_all);
+  if (repart || drift_all) engine_drift_all(e);
 
   /* Re-distribute the particles amongst the nodes? */
   if (repart) engine_repartition(e);
 
   /* Prepare the space. */
-  engine_prepare(e, e->drift_all);
-
-  /* Restore the default drifting policy */
-  e->drift_all = (e->policy & engine_policy_drift_all);
+  engine_prepare(e, !(drift_all || repart), repart);
 
   if (e->verbose) engine_print_task_counts(e);
 
-  /* Send off the runners. */
-  TIMER_TIC;
-  engine_launch(e, e->nr_threads);
-  TIMER_TOC(timer_runners);
-
   /* Save some statistics */
   if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) {
     engine_print_stats(e);
     e->timeLastStatistics += e->deltaTimeStatistics;
   }
 
+  /* Send off the runners. */
+  TIMER_TIC;
+  engine_launch(e, e->nr_threads);
+  TIMER_TOC(timer_runners);
+
   TIMER_TOC2(timer_step);
 
   clocks_gettime(&time2);
@@ -2734,19 +3157,40 @@ int engine_is_done(struct engine *e) {
 }
 
 /**
- * @brief Drift particles using the current engine drift policy.
+ * @brief Unskip all the tasks that act on active cells at this time.
  *
  * @param e The #engine.
  */
-void engine_drift(struct engine *e) {
+void engine_unskip(struct engine *e) {
+
+  const ticks tic = getticks();
+  threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->cells_top,
+                 e->s->nr_cells, sizeof(struct cell), 1, e);
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+/**
+ * @brief Drift *all* particles forward to the current time.
+ *
+ * @param e The #engine.
+ */
+void engine_drift_all(struct engine *e) {
 
   const ticks tic = getticks();
   threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that all cells have been drifted to the current time. */
+  space_check_drift_point(e->s, e->ti_current);
+#endif
+
   if (e->verbose)
-    message("took %.3f %s (including task unskipping).",
-            clocks_from_ticks(getticks() - tic), clocks_getunit());
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 }
 
 /**
@@ -2889,9 +3333,26 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   s->parts = parts_new;
   s->xparts = xparts_new;
 
-  /* Re-link the gparts. */
+  /* Re-link the gparts to their parts. */
   if (s->nr_parts > 0 && s->nr_gparts > 0)
-    part_relink_gparts(s->parts, s->nr_parts, 0);
+    part_relink_gparts_to_parts(s->parts, s->nr_parts, 0);
+
+  /* Re-allocate the local sparts. */
+  if (e->verbose)
+    message("Re-allocating sparts array from %zu to %zu.", s->size_sparts,
+            (size_t)(s->nr_sparts * 1.2));
+  s->size_sparts = s->nr_sparts * 1.2;
+  struct spart *sparts_new = NULL;
+  if (posix_memalign((void **)&sparts_new, spart_align,
+                     sizeof(struct spart) * s->size_sparts) != 0)
+    error("Failed to allocate new spart data.");
+  memcpy(sparts_new, s->sparts, sizeof(struct spart) * s->nr_sparts);
+  free(s->sparts);
+  s->sparts = sparts_new;
+
+  /* Re-link the gparts to their sparts. */
+  if (s->nr_sparts > 0 && s->nr_gparts > 0)
+    part_relink_gparts_to_sparts(s->sparts, s->nr_sparts, 0);
 
   /* Re-allocate the local gparts. */
   if (e->verbose)
@@ -2908,31 +3369,17 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
 
   /* Re-link the parts. */
   if (s->nr_parts > 0 && s->nr_gparts > 0)
-    part_relink_parts(s->gparts, s->nr_gparts, s->parts);
+    part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts);
+
+  /* Re-link the sparts. */
+  if (s->nr_sparts > 0 && s->nr_gparts > 0)
+    part_relink_sparts_to_gparts(s->gparts, s->nr_gparts, s->sparts);
 
 #ifdef SWIFT_DEBUG_CHECKS
 
   /* Verify that the links are correct */
-  for (size_t k = 0; k < s->nr_gparts; ++k) {
-
-    if (s->gparts[k].id_or_neg_offset <= 0) {
-
-      struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset];
-
-      if (part->gpart != &s->gparts[k]) error("Linking problem !");
-
-      if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] ||
-          s->gparts[k].x[2] != part->x[2])
-        error("Linked particles are not at the same position !");
-    }
-  }
-  for (size_t k = 0; k < s->nr_parts; ++k) {
-
-    if (s->parts[k].gpart != NULL &&
-        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k)
-      error("Linking problem !");
-  }
-
+  part_verify_links(s->parts, s->gparts, s->sparts, s->nr_parts, s->nr_gparts,
+                    s->nr_sparts, e->verbose);
 #endif
 
 #else
@@ -2995,7 +3442,7 @@ static cpu_set_t *engine_entry_affinity() {
 
 /**
  * @brief  Ensure the NUMA node on which we initialise (first touch) everything
- *  doesn't change before engine_init allocates NUMA-local workers.
+ * doesn't change before engine_init allocates NUMA-local workers.
  */
 void engine_pin() {
 
@@ -3088,7 +3535,6 @@ void engine_init(struct engine *e, struct space *s,
   e->timeStep = 0.;
   e->timeBase = 0.;
   e->timeBase_inv = 0.;
-  e->drift_all = (policy & engine_policy_drift_all);
   e->internalUnits = internal_units;
   e->timeFirstSnapshot =
       parser_get_param_double(params, "Snapshots:time_first");
@@ -3233,6 +3679,7 @@ void engine_init(struct engine *e, struct space *s,
 #endif
 
   if (with_aff) {
+#ifdef HAVE_SETAFFINITY
 #ifdef WITH_MPI
     printf("[%04i] %s engine_init: cpu map is [ ", nodeID,
            clocks_get_timesincestart());
@@ -3241,6 +3688,7 @@ void engine_init(struct engine *e, struct space *s,
 #endif
     for (int i = 0; i < nr_affinity_cores; i++) printf("%i ", cpuid[i]);
     printf("].\n");
+#endif
   }
 
   /* Are we doing stuff in parallel? */
@@ -3293,9 +3741,9 @@ void engine_init(struct engine *e, struct space *s,
             e->hydro_properties->delta_neighbours,
             e->hydro_properties->eta_neighbours);
 
-    fprintf(e->file_timesteps, "# %6s %14s %14s %10s %10s %16s [%s]\n", "Step",
-            "Time", "Time-step", "Updates", "g-Updates", "Wall-clock time",
-            clocks_getunit());
+    fprintf(e->file_timesteps, "# %6s %14s %14s %10s %10s %10s %16s [%s]\n",
+            "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates",
+            "Wall-clock time", clocks_getunit());
     fflush(e->file_timesteps);
   }
 
@@ -3430,6 +3878,11 @@ void engine_init(struct engine *e, struct space *s,
       e->runners[k].cpuid = k;
       e->runners[k].qid = k * nr_queues / e->nr_threads;
     }
+
+    /* Allocate particle cache. */
+    e->runners[k].par_cache.count = 0;
+    cache_init(&e->runners[k].par_cache, CACHE_SIZE);
+
     if (verbose) {
       if (with_aff)
         message("runner %i on cpuid=%i with qid=%i.", e->runners[k].id,
@@ -3473,7 +3926,7 @@ void engine_print_policy(struct engine *e) {
 #else
   printf("%s engine_policy: engine policies are [ ",
          clocks_get_timesincestart());
-  for (int k = 1; k < 32; k++)
+  for (int k = 1; k < 31; k++)
     if (e->policy & (1 << k)) printf(" %s ", engine_policy_names[k + 1]);
   printf(" ]\n");
   fflush(stdout);
@@ -3515,6 +3968,8 @@ void engine_compute_next_snapshot_time(struct engine *e) {
  */
 void engine_clean(struct engine *e) {
 
+  for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].par_cache);
+  free(e->runners);
   free(e->snapshotUnits);
   free(e->links);
   scheduler_clean(&e->sched);
diff --git a/src/engine.h b/src/engine.h
index 8490a8907203e92b0e73738b7f469f61e9c49f2b..2514179078d6dc679f371ec14c3e53737c5b682e 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -65,7 +65,8 @@ enum engine_policy {
   engine_policy_cosmology = (1 << 10),
   engine_policy_drift_all = (1 << 11),
   engine_policy_cooling = (1 << 12),
-  engine_policy_sourceterms = (1 << 13)
+  engine_policy_sourceterms = (1 << 13),
+  engine_policy_stars = (1 << 14)
 };
 
 extern const char *engine_policy_names[];
@@ -82,9 +83,6 @@ extern const char *engine_policy_names[];
 /* The rank of the engine as a global variable (for messages). */
 extern int engine_rank;
 
-/* The maximal number of timesteps in a simulation */
-#define max_nr_timesteps (1 << 28)
-
 /* Data structure for the engine. */
 struct engine {
 
@@ -117,11 +115,11 @@ struct engine {
 
   /* The previous system time. */
   double timeOld;
-  int ti_old;
+  integertime_t ti_old;
 
   /* The current system time. */
   double time;
-  int ti_current;
+  integertime_t ti_current;
 
   /* Time step */
   double timeStep;
@@ -131,13 +129,10 @@ struct engine {
   double timeBase_inv;
 
   /* Minimal ti_end for the next time-step */
-  int ti_end_min;
-
-  /* Are we drifting all particles now ? */
-  int drift_all;
+  integertime_t ti_end_min;
 
   /* Number of particles updated */
-  size_t updates, g_updates;
+  size_t updates, g_updates, s_updates;
 
   /* Total numbers of particles in the system. */
   size_t total_nr_parts, total_nr_gparts;
@@ -148,7 +143,7 @@ struct engine {
   /* Snapshot information */
   double timeFirstSnapshot;
   double deltaTimeSnapshot;
-  int ti_nextSnapshot;
+  integertime_t ti_nextSnapshot;
   char snapshotBaseName[200];
   int snapshotCompression;
   struct UnitSystem *snapshotUnits;
@@ -228,7 +223,8 @@ struct engine {
 /* Function prototypes. */
 void engine_barrier(struct engine *e, int tid);
 void engine_compute_next_snapshot_time(struct engine *e);
-void engine_drift(struct engine *e);
+void engine_unskip(struct engine *e);
+void engine_drift_all(struct engine *e);
 void engine_dump_snapshot(struct engine *e);
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
@@ -240,7 +236,7 @@ void engine_init(struct engine *e, struct space *s,
                  const struct cooling_function_data *cooling,
                  struct sourceterms *sourceterms);
 void engine_launch(struct engine *e, int nr_runners);
-void engine_prepare(struct engine *e, int nodrift);
+void engine_prepare(struct engine *e, int drift_all, int postrepart);
 void engine_print(struct engine *e);
 void engine_init_particles(struct engine *e, int flag_entropy_ICs);
 void engine_step(struct engine *e, struct repartition *repartition);
@@ -248,7 +244,9 @@ void engine_maketasks(struct engine *e);
 void engine_split(struct engine *e, struct partition *initial_partition);
 void engine_exchange_strays(struct engine *e, size_t offset_parts,
                             int *ind_part, size_t *Npart, size_t offset_gparts,
-                            int *ind_gpart, size_t *Ngpart);
+                            int *ind_gpart, size_t *Ngpart,
+                            size_t offset_sparts, int *ind_spart,
+                            size_t *Nspart);
 void engine_rebuild(struct engine *e);
 void engine_repartition(struct engine *e);
 void engine_makeproxies(struct engine *e);
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index 9e0ca81edff06b8a32afb185f24a88b41dc87da7..a0bfee05f8b7f93cce65e8b9a3e7e322e166569d 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -42,22 +42,6 @@ gravity_compute_timestep_self(const struct gpart* const gp) {
   return dt;
 }
 
-/**
- * @brief Initialises the g-particles for the first time
- *
- * This function is called only once just after the ICs have been
- * read in to do some conversions.
- *
- * @param gp The particle to act upon
- */
-__attribute__((always_inline)) INLINE static void gravity_first_init_gpart(
-    struct gpart* gp) {
-
-  gp->ti_begin = 0;
-  gp->ti_end = 0;
-  gp->epsilon = 0.;  // MATTHIEU
-}
-
 /**
  * @brief Prepares a g-particle for the gravity calculation
  *
@@ -97,9 +81,25 @@ __attribute__((always_inline)) INLINE static void gravity_end_force(
  *
  * @param gp The particle to act upon
  * @param dt The time-step for this kick
- * @param half_dt The half time-step for this kick
  */
 __attribute__((always_inline)) INLINE static void gravity_kick_extra(
-    struct gpart* gp, float dt, float half_dt) {}
+    struct gpart* gp, float dt) {}
+
+/**
+ * @brief Initialises the g-particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline)) INLINE static void gravity_first_init_gpart(
+    struct gpart* gp) {
+
+  gp->time_bin = 0;
+  gp->epsilon = 0.;  // MATTHIEU
+
+  gravity_init_gpart(gp);
+}
 
 #endif /* SWIFT_DEFAULT_GRAVITY_H */
diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h
index c284f543b3be06297600c010e302423eb683adc9..f0d145647ab3f973f3c0ffc2f995ee01d534bc72 100644
--- a/src/gravity/Default/gravity_debug.h
+++ b/src/gravity/Default/gravity_debug.h
@@ -22,12 +22,10 @@
 __attribute__((always_inline)) INLINE static void gravity_debug_particle(
     const struct gpart* p) {
   printf(
-      "x=[%.3e,%.3e,%.3e], "
-      "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
-      "mass=%.3e t_begin=%d, t_end=%d\n",
-      p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2],
-      p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin,
-      p->ti_end);
+      "mass=%.3e epsilon=%.5e time_bin=%d\n"
+      "x=[%.5e,%.5e,%.5e], v_full=[%.5e,%.5e,%.5e], a=[%.5e,%.5e,%.5e]\n",
+      p->mass, p->epsilon, p->time_bin, p->x[0], p->x[1], p->x[2], p->v_full[0],
+      p->v_full[1], p->v_full[2], p->a_grav[0], p->a_grav[1], p->a_grav[2]);
 }
 
 #endif /* SWIFT_DEFAULT_GRAVITY_DEBUG_H */
diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h
index f06e65e5b30ebcd609c0c6204de33da17b770add..f484b13663059fa5f4f822aa78748fe4ef9d5926 100644
--- a/src/gravity/Default/gravity_part.h
+++ b/src/gravity/Default/gravity_part.h
@@ -19,12 +19,13 @@
 #ifndef SWIFT_DEFAULT_GRAVITY_PART_H
 #define SWIFT_DEFAULT_GRAVITY_PART_H
 
-/* Some standard headers. */
-#include <stdlib.h>
-
 /* Gravity particle. */
 struct gpart {
 
+  /* Particle ID. If negative, it is the negative offset of the #part with
+     which this gpart is linked. */
+  long long id_or_neg_offset;
+
   /* Particle position. */
   double x[3];
 
@@ -43,15 +44,21 @@ struct gpart {
   /* Softening length */
   float epsilon;
 
-  /* Particle time of beginning of time-step. */
-  int ti_begin;
+  /* Time-step length */
+  timebin_t time_bin;
 
-  /* Particle time of end of time-step. */
-  int ti_end;
+  /* Type of the #gpart (DM, gas, star, ...) */
+  enum part_type type;
 
-  /* Particle ID. If negative, it is the negative offset of the #part with
-     which this gpart is linked. */
-  long long id_or_neg_offset;
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
 
 } SWIFT_STRUCT_ALIGN;
 
diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h
index 3fd357a2d8778f5ca8b014935d538350eccb99c6..bfb5cd1ce39a9908573c66406f41b56561a870d6 100644
--- a/src/hydro/Default/hydro.h
+++ b/src/hydro/Default/hydro.h
@@ -33,7 +33,7 @@
  * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return p->u;
 }
@@ -45,7 +45,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
  * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_pressure(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return gas_pressure_from_internal_energy(p->rho, p->u);
 }
@@ -57,7 +57,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_pressure(
  * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_entropy(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return gas_entropy_from_internal_energy(p->rho, p->u);
 }
@@ -69,7 +69,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_entropy(
  * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return p->force.soundspeed;
 }
@@ -97,34 +97,30 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass(
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed internal
- * energy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overrides the current state of the particle but does *not* change its
- * time-derivatives
+ * We assume a constant density.
  *
- * @param p The particle
- * @param u The new internal energy
+ * @param p The particle of interest
  */
-__attribute__((always_inline)) INLINE static void hydro_set_internal_energy(
-    struct part *restrict p, float u) {
+__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt(
+    const struct part *restrict p) {
 
-  p->u = u;
+  return p->force.u_dt;
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed entropy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overrides the current state of the particle but does *not* change its
- * time-derivatives
+ * We assume a constant density.
  *
- * @param p The particle
- * @param S The new entropy
+ * @param p The particle of interest.
+ * @param du_dt The new time derivative of the internal energy.
  */
-__attribute__((always_inline)) INLINE static void hydro_set_entropy(
-    struct part *restrict p, float S) {
+__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt(
+    struct part *restrict p, float du_dt) {
 
-  p->u = gas_internal_energy_from_entropy(p->rho, S);
+  p->force.u_dt = du_dt;
 }
 
 /**
@@ -152,26 +148,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
   return min(dt_cfl, dt_u_change);
 }
 
-/**
- * @brief Initialises the particles for the first time
- *
- * This function is called only once just after the ICs have been
- * read in to do some conversions.
- *
- * @param p The particle to act upon
- * @param xp The extended particle data to act upon
- */
-__attribute__((always_inline)) INLINE static void hydro_first_init_part(
-    struct part *restrict p, struct xpart *restrict xp) {
-
-  p->ti_begin = 0;
-  p->ti_end = 0;
-  xp->v_full[0] = p->v[0];
-  xp->v_full[1] = p->v[1];
-  xp->v_full[2] = p->v[2];
-  xp->u_full = p->u;
-}
-
 /**
  * @brief Prepares a particle for the density calculation.
  *
@@ -244,8 +220,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
  * @param time The current time
  */
 __attribute__((always_inline)) INLINE static void hydro_prepare_force(
-    struct part *restrict p, struct xpart *restrict xp, int ti_current,
-    double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   /* Some smoothing length multiples. */
   const float h = p->h;
@@ -270,17 +245,18 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
   p->force.balsara = normDiv_v / (normDiv_v + normRot_v + 0.0001f * fc * h_inv);
 
   /* Viscosity parameter decay time */
-  const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed);
+  /* const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed);
+   */
 
   /* Viscosity source term */
-  const float S = max(-normDiv_v, 0.f);
+  /* const float S = max(-normDiv_v, 0.f); */
 
   /* Compute the particle's viscosity parameter time derivative */
-  const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau +
-                          (const_viscosity_alpha_max - p->alpha) * S;
+  /* const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau + */
+  /*                         (const_viscosity_alpha_max - p->alpha) * S; */
 
   /* Update particle's viscosity paramter */
-  p->alpha += alpha_dot * (p->ti_end - p->ti_begin) * timeBase;
+  /* p->alpha += alpha_dot * (p->ti_end - p->ti_begin) * timeBase; */  // MATTHIEU
 }
 
 /**
@@ -305,6 +281,22 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
   p->force.v_sig = 0.0f;
 }
 
+/**
+ * @brief Sets the values to be predicted in the drifts to their values at a
+ * kick time
+ *
+ * @param p The particle.
+ * @param xp The extended data of this particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values(
+    struct part *restrict p, const struct xpart *restrict xp) {
+
+  /* Re-set the predicted velocities */
+  p->v[0] = xp->v_full[0];
+  p->v[1] = xp->v_full[1];
+  p->v[2] = xp->v_full[2];
+}
+
 /**
  * @brief Predict additional particle fields forward in time when drifting
  *
@@ -316,8 +308,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, struct xpart *restrict xp, float dt, int t0,
-    int t1, double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp, float dt) {
   float u, w;
 
   const float h_inv = 1.f / p->h;
@@ -368,8 +359,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
  * @param half_dt The half time-step for this kick
  */
 __attribute__((always_inline)) INLINE static void hydro_kick_extra(
-    struct part *restrict p, struct xpart *restrict xp, float dt,
-    float half_dt) {}
+    struct part *restrict p, struct xpart *restrict xp, float dt) {}
 
 /**
  *  @brief Converts hydro quantity of a particle at the start of a run
@@ -379,6 +369,28 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
  * @param p The particle to act upon
  */
 __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
-    struct part *restrict p) {}
+    struct part *restrict p, struct xpart *restrict xp) {}
+
+/**
+ * @brief Initialises the particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline)) INLINE static void hydro_first_init_part(
+    struct part *restrict p, struct xpart *restrict xp) {
+
+  p->time_bin = 0;
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+  xp->u_full = p->u;
+
+  hydro_reset_acceleration(p);
+  hydro_init_part(p);
+}
 
 #endif /* SWIFT_DEFAULT_HYDRO_H */
diff --git a/src/hydro/Default/hydro_debug.h b/src/hydro/Default/hydro_debug.h
index d02d3ef82c1b3d751731f49850c06df4b146b164..3be9c9e1760591423edbd218d19b46ddf9aad01e 100644
--- a/src/hydro/Default/hydro_debug.h
+++ b/src/hydro/Default/hydro_debug.h
@@ -25,11 +25,10 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "x=[%.3e,%.3e,%.3e], "
       "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "h=%.3e, "
-      "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, t_begin=%d, t_end=%d\n",
+      "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, time_bin=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
-      p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->ti_begin,
-      p->ti_end);
+      p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->time_bin);
 }
 
 #endif /* SWIFT_DEFAULT_HYDRO_DEBUG_H */
diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h
index c7464bcf338b1c5b81ffa91d92264c2bd35e9313..332eecb27fb65a6b4da48cbb595450a432c44615 100644
--- a/src/hydro/Default/hydro_part.h
+++ b/src/hydro/Default/hydro_part.h
@@ -55,12 +55,6 @@ struct part {
   /* Particle cutoff radius. */
   float h;
 
-  /* Particle time of beginning of time-step. */
-  int ti_begin;
-
-  /* Particle time of end of time-step. */
-  int ti_end;
-
   /* Particle internal energy. */
   float u;
 
@@ -125,6 +119,9 @@ struct part {
   /* Pointer to corresponding gravity part. */
   struct gpart* gpart;
 
+  /* Particle time-bin */
+  timebin_t time_bin;
+
 } SWIFT_STRUCT_ALIGN;
 
 #endif /* SWIFT_DEFAULT_HYDRO_PART_H */
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index 157893bc9e27806d2b97ac5f5a81d0f6fbb1c589..160a2d8b5d25a97cefb2afd5e22d8e6bcea0006e 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -43,50 +43,42 @@
  * @brief Returns the internal energy of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
-    const struct part *restrict p, float dt) {
-
-  const float entropy = p->entropy + p->entropy_dt * dt;
+    const struct part *restrict p) {
 
-  return gas_internal_energy_from_entropy(p->rho, entropy);
+  return gas_internal_energy_from_entropy(p->rho, p->entropy);
 }
 
 /**
  * @brief Returns the pressure of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_pressure(
-    const struct part *restrict p, float dt) {
-
-  const float entropy = p->entropy + p->entropy_dt * dt;
+    const struct part *restrict p) {
 
-  return gas_pressure_from_entropy(p->rho, entropy);
+  return gas_pressure_from_entropy(p->rho, p->entropy);
 }
 
 /**
  * @brief Returns the entropy of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_entropy(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
-  return p->entropy + p->entropy_dt * dt;
+  return p->entropy;
 }
 
 /**
  * @brief Returns the sound speed of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return p->force.soundspeed;
 }
@@ -114,70 +106,30 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass(
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed internal
- * energy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overwrites the current state of the particle but does *not* change its
- * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be
- * updated.
+ * We assume a constant density.
  *
- * @param p The particle
- * @param u The new internal energy
+ * @param p The particle of interest
  */
-__attribute__((always_inline)) INLINE static void hydro_set_internal_energy(
-    struct part *restrict p, float u) {
-
-  p->entropy = gas_entropy_from_internal_energy(p->rho, u);
-
-  /* Compute the new pressure */
-  const float pressure = gas_pressure_from_internal_energy(p->rho, u);
-
-  /* Compute the new sound speed */
-  const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
-
-  /* Update the signal velocity */
-  const float v_sig_old = p->force.v_sig;
-  const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed;
-  const float v_sig = max(v_sig_old, v_sig_new);
-
-  const float rho_inv = 1.f / p->rho;
+__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt(
+    const struct part *restrict p) {
 
-  p->force.soundspeed = soundspeed;
-  p->force.P_over_rho2 = pressure * rho_inv * rho_inv;
-  p->force.v_sig = v_sig;
+  return gas_internal_energy_from_entropy(p->rho, p->entropy_dt);
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed entropy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overwrites the current state of the particle but does *not* change its
- * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be
- * updated.
+ * We assume a constant density.
  *
- * @param p The particle
- * @param S The new entropy
+ * @param p The particle of interest.
+ * @param du_dt The new time derivative of the internal energy.
  */
-__attribute__((always_inline)) INLINE static void hydro_set_entropy(
-    struct part *restrict p, float S) {
-
-  p->entropy = S;
-
-  /* Compute the pressure */
-  const float pressure = gas_pressure_from_entropy(p->rho, p->entropy);
-
-  /* Compute the new sound speed */
-  const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
-
-  /* Update the signal velocity */
-  const float v_sig_old = p->force.v_sig;
-  const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed;
-  const float v_sig = max(v_sig_old, v_sig_new);
+__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt(
+    struct part *restrict p, float du_dt) {
 
-  const float rho_inv = 1.f / p->rho;
-
-  p->force.soundspeed = soundspeed;
-  p->force.P_over_rho2 = pressure * rho_inv * rho_inv;
-  p->force.v_sig = v_sig;
+  p->entropy_dt = gas_entropy_from_internal_energy(p->rho, du_dt);
 }
 
 /**
@@ -200,25 +152,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
   return dt_cfl;
 }
 
-/**
- * @brief Initialises the particles for the first time
- *
- * This function is called only once just after the ICs have been
- * read in to do some conversions.
- *
- * @param p The particle to act upon
- * @param xp The extended particle data to act upon
- */
-__attribute__((always_inline)) INLINE static void hydro_first_init_part(
-    struct part *restrict p, struct xpart *restrict xp) {
-
-  p->ti_begin = 0;
-  p->ti_end = 0;
-  xp->v_full[0] = p->v[0];
-  xp->v_full[1] = p->v[1];
-  xp->v_full[2] = p->v[2];
-}
-
 /**
  * @brief Prepares a particle for the density calculation.
  *
@@ -229,9 +162,10 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part(
  */
 __attribute__((always_inline)) INLINE static void hydro_init_part(
     struct part *restrict p) {
+
+  p->rho = 0.f;
   p->density.wcount = 0.f;
   p->density.wcount_dh = 0.f;
-  p->rho = 0.f;
   p->density.rho_dh = 0.f;
   p->density.div_v = 0.f;
   p->density.rot_v[0] = 0.f;
@@ -289,8 +223,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
  * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_prepare_force(
-    struct part *restrict p, struct xpart *restrict xp, int ti_current,
-    double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   const float fac_mu = 1.f; /* Will change with cosmological integration */
 
@@ -303,8 +236,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
   const float abs_div_v = fabsf(p->density.div_v);
 
   /* Compute the pressure */
-  const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
-  const float pressure = hydro_get_pressure(p, half_dt);
+  const float pressure = gas_pressure_from_entropy(p->rho, p->entropy);
 
   /* Compute the sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
@@ -352,6 +284,25 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
   p->force.v_sig = 0.0f;
 }
 
+/**
+ * @brief Sets the values to be predicted in the drifts to their values at a
+ * kick time
+ *
+ * @param p The particle.
+ * @param xp The extended data of this particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values(
+    struct part *restrict p, const struct xpart *restrict xp) {
+
+  /* Re-set the predicted velocities */
+  p->v[0] = xp->v_full[0];
+  p->v[1] = xp->v_full[1];
+  p->v[2] = xp->v_full[2];
+
+  /* Re-set the entropy */
+  p->entropy = xp->entropy_full;
+}
+
 /**
  * @brief Predict additional particle fields forward in time when drifting
  *
@@ -363,8 +314,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, const struct xpart *restrict xp, float dt, int t0,
-    int t1, double timeBase) {
+    struct part *restrict p, const struct xpart *restrict xp, float dt) {
 
   const float h_inv = 1.f / p->h;
 
@@ -382,9 +332,11 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
   else
     p->rho *= expf(w2);
 
-  /* Drift the pressure */
-  const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase;
-  const float pressure = hydro_get_pressure(p, dt_entr);
+  /* Predict the entropy */
+  p->entropy += p->entropy_dt * dt;
+
+  /* Re-compute the pressure */
+  const float pressure = gas_pressure_from_entropy(p->rho, p->entropy);
 
   /* Compute the new sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
@@ -420,25 +372,21 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
  * @param p The particle to act upon
  * @param xp The particle extended data to act upon
  * @param dt The time-step for this kick
- * @param half_dt The half time-step for this kick
  */
 __attribute__((always_inline)) INLINE static void hydro_kick_extra(
-    struct part *restrict p, struct xpart *restrict xp, float dt,
-    float half_dt) {
-
-  /* Do not decrease the entropy (temperature) by more than a factor of 2*/
-  const float entropy_change = p->entropy_dt * dt;
-  if (entropy_change > -0.5f * p->entropy)
-    p->entropy += entropy_change;
-  else
-    p->entropy *= 0.5f;
+    struct part *restrict p, struct xpart *restrict xp, float dt) {
 
-  /* Do not 'overcool' when timestep increases */
-  if (p->entropy + p->entropy_dt * half_dt < 0.5f * p->entropy)
-    p->entropy_dt = -0.5f * p->entropy / half_dt;
+  /* Do not decrease the entropy by more than a factor of 2 */
+  if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) {
+    /* message("Warning! Limiting entropy_dt. Possible cooling error.\n
+     * entropy_full = %g \n entropy_dt * dt =%g \n", */
+    /* 	     xp->entropy_full,p->entropy_dt * dt); */
+    p->entropy_dt = -0.5f * xp->entropy_full / dt;
+  }
+  xp->entropy_full += p->entropy_dt * dt;
 
   /* Compute the pressure */
-  const float pressure = gas_pressure_from_entropy(p->rho, p->entropy);
+  const float pressure = gas_pressure_from_entropy(p->rho, xp->entropy_full);
 
   /* Compute the new sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
@@ -459,10 +407,11 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
  * @param p The particle to act upon
  */
 __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
-    struct part *restrict p) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   /* We read u in the entropy field. We now get S from u */
-  p->entropy = gas_entropy_from_internal_energy(p->rho, p->entropy);
+  xp->entropy_full = gas_entropy_from_internal_energy(p->rho, p->entropy);
+  p->entropy = xp->entropy_full;
 
   /* Compute the pressure */
   const float pressure = gas_pressure_from_entropy(p->rho, p->entropy);
@@ -478,4 +427,26 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
   p->force.P_over_rho2 = P_over_rho2;
 }
 
+/**
+ * @brief Initialises the particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline)) INLINE static void hydro_first_init_part(
+    struct part *restrict p, struct xpart *restrict xp) {
+
+  p->time_bin = 0;
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+  xp->entropy_full = p->entropy;
+
+  hydro_reset_acceleration(p);
+  hydro_init_part(p);
+}
+
 #endif /* SWIFT_GADGET2_HYDRO_H */
diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h
index 656299b38374f68824ec20d85ece169d5f1fd599..6500d1126bd5b5a65d3e511c13afb8364574e0ba 100644
--- a/src/hydro/Gadget2/hydro_debug.h
+++ b/src/hydro/Gadget2/hydro_debug.h
@@ -27,14 +27,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, "
       "P=%.3e, P_over_rho2=%.3e, S=%.3e, dS/dt=%.3e, c=%.3e\n"
       "divV=%.3e, rotV=[%.3e,%.3e,%.3e], balsara=%.3e \n "
-      "v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n",
+      "v_sig=%e dh/dt=%.3e time_bin=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh,
-      p->rho, hydro_get_pressure(p, 0.), p->force.P_over_rho2, p->entropy,
+      p->rho, hydro_get_pressure(p), p->force.P_over_rho2, p->entropy,
       p->entropy_dt, p->force.soundspeed, p->density.div_v, p->density.rot_v[0],
       p->density.rot_v[1], p->density.rot_v[2], p->force.balsara,
-      p->force.v_sig, p->force.h_dt, p->ti_begin, p->ti_end);
+      p->force.v_sig, p->force.h_dt, p->time_bin);
 }
 
 #endif /* SWIFT_GADGET2_HYDRO_DEBUG_H */
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index 08fb2b37db566e191bd74d82488b5d68e764573b..3fef18b4f487f1734a5f93c4bad46cf4e6968240 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -155,20 +155,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density(
 
   /* Get the radius and inverse radius. */
   r2.v = vec_load(R2);
-  ri.v = vec_rsqrt(r2.v);
-  /*vec_rsqrt does not have the level of accuracy we need, so an extra term is
-   * added below.*/
-  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
+  ri = vec_reciprocal_sqrt(r2);
   r.v = r2.v * ri.v;
 
   hi.v = vec_load(Hi);
-  hi_inv.v = vec_rcp(hi.v);
-  hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
+  hi_inv = vec_reciprocal(hi);
   xi.v = r.v * hi_inv.v;
 
   hj.v = vec_load(Hj);
-  hj_inv.v = vec_rcp(hj.v);
-  hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f));
+  hj_inv = vec_reciprocal(hj);
   xj.v = r.v * hj_inv.v;
 
   /* Compute the kernel function. */
@@ -327,15 +322,11 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
 
   /* Get the radius and inverse radius. */
   r2.v = vec_load(R2);
-  ri.v = vec_rsqrt(r2.v);
-  /*vec_rsqrt does not have the level of accuracy we need, so an extra term is
-   * added below.*/
-  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
+  ri = vec_reciprocal_sqrt(r2);
   r.v = r2.v * ri.v;
 
   hi.v = vec_load(Hi);
-  hi_inv.v = vec_rcp(hi.v);
-  hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
+  hi_inv = vec_reciprocal(hi);
   xi.v = r.v * hi_inv.v;
 
   kernel_deval_vec(&xi, &wi, &wi_dx);
@@ -382,6 +373,176 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
 #endif
 }
 
+#ifdef WITH_VECTORIZATION
+/**
+ * @brief Density interaction computed using 2 interleaved vectors
+ * (non-symmetric vectorized version).
+ */
+__attribute__((always_inline)) INLINE static void
+runner_iact_nonsym_2_vec_density(
+    float *R2, float *Dx, float *Dy, float *Dz, vector hi_inv, vector vix,
+    vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj,
+    vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum,
+    vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum,
+    vector mask, vector mask2, int knlMask, int knlMask2) {
+
+  vector r, ri, r2, xi, wi, wi_dx;
+  vector mj;
+  vector dx, dy, dz, dvx, dvy, dvz;
+  vector vjx, vjy, vjz;
+  vector dvdr;
+  vector curlvrx, curlvry, curlvrz;
+  vector r_2, ri2, r2_2, xi2, wi2, wi_dx2;
+  vector mj2;
+  vector dx2, dy2, dz2, dvx2, dvy2, dvz2;
+  vector vjx2, vjy2, vjz2;
+  vector dvdr2;
+  vector curlvrx2, curlvry2, curlvrz2;
+
+  /* Fill the vectors. */
+  mj.v = vec_load(Mj);
+  mj2.v = vec_load(&Mj[VEC_SIZE]);
+  vjx.v = vec_load(Vjx);
+  vjx2.v = vec_load(&Vjx[VEC_SIZE]);
+  vjy.v = vec_load(Vjy);
+  vjy2.v = vec_load(&Vjy[VEC_SIZE]);
+  vjz.v = vec_load(Vjz);
+  vjz2.v = vec_load(&Vjz[VEC_SIZE]);
+  dx.v = vec_load(Dx);
+  dx2.v = vec_load(&Dx[VEC_SIZE]);
+  dy.v = vec_load(Dy);
+  dy2.v = vec_load(&Dy[VEC_SIZE]);
+  dz.v = vec_load(Dz);
+  dz2.v = vec_load(&Dz[VEC_SIZE]);
+
+  /* Get the radius and inverse radius. */
+  r2.v = vec_load(R2);
+  r2_2.v = vec_load(&R2[VEC_SIZE]);
+  ri = vec_reciprocal_sqrt(r2);
+  ri2 = vec_reciprocal_sqrt(r2_2);
+  r.v = vec_mul(r2.v, ri.v);
+  r_2.v = vec_mul(r2_2.v, ri2.v);
+
+  xi.v = vec_mul(r.v, hi_inv.v);
+  xi2.v = vec_mul(r_2.v, hi_inv.v);
+
+  /* Calculate the kernel for two particles. */
+  kernel_deval_2_vec(&xi, &wi, &wi_dx, &xi2, &wi2, &wi_dx2);
+
+  /* Compute dv. */
+  dvx.v = vec_sub(vix.v, vjx.v);
+  dvx2.v = vec_sub(vix.v, vjx2.v);
+  dvy.v = vec_sub(viy.v, vjy.v);
+  dvy2.v = vec_sub(viy.v, vjy2.v);
+  dvz.v = vec_sub(viz.v, vjz.v);
+  dvz2.v = vec_sub(viz.v, vjz2.v);
+
+  /* Compute dv dot r */
+  dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v)));
+  dvdr2.v =
+      vec_fma(dvx2.v, dx2.v, vec_fma(dvy2.v, dy2.v, vec_mul(dvz2.v, dz2.v)));
+  dvdr.v = vec_mul(dvdr.v, ri.v);
+  dvdr2.v = vec_mul(dvdr2.v, ri2.v);
+
+  /* Compute dv cross r */
+  curlvrx.v =
+      vec_fma(dvy.v, dz.v, vec_mul(vec_set1(-1.0f), vec_mul(dvz.v, dy.v)));
+  curlvrx2.v =
+      vec_fma(dvy2.v, dz2.v, vec_mul(vec_set1(-1.0f), vec_mul(dvz2.v, dy2.v)));
+  curlvry.v =
+      vec_fma(dvz.v, dx.v, vec_mul(vec_set1(-1.0f), vec_mul(dvx.v, dz.v)));
+  curlvry2.v =
+      vec_fma(dvz2.v, dx2.v, vec_mul(vec_set1(-1.0f), vec_mul(dvx2.v, dz2.v)));
+  curlvrz.v =
+      vec_fma(dvx.v, dy.v, vec_mul(vec_set1(-1.0f), vec_mul(dvy.v, dx.v)));
+  curlvrz2.v =
+      vec_fma(dvx2.v, dy2.v, vec_mul(vec_set1(-1.0f), vec_mul(dvy2.v, dx2.v)));
+  curlvrx.v = vec_mul(curlvrx.v, ri.v);
+  curlvrx2.v = vec_mul(curlvrx2.v, ri2.v);
+  curlvry.v = vec_mul(curlvry.v, ri.v);
+  curlvry2.v = vec_mul(curlvry2.v, ri2.v);
+  curlvrz.v = vec_mul(curlvrz.v, ri.v);
+  curlvrz2.v = vec_mul(curlvrz2.v, ri2.v);
+
+/* Mask updates to intermediate vector sums for particle pi. */
+#ifdef HAVE_AVX512_F
+  rhoSum->v =
+      _mm512_mask_add_ps(rhoSum->v, knlMask, vec_mul(mj.v, wi.v), rhoSum->v);
+  rhoSum->v =
+      _mm512_mask_add_ps(rhoSum->v, knlMask2, vec_mul(mj2.v, wi2.v), rhoSum->v);
+
+  rho_dhSum->v =
+      _mm512_mask_sub_ps(rho_dhSum->v, knlMask, rho_dhSum->v,
+                         vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
+                                               vec_mul(xi.v, wi_dx.v))));
+  rho_dhSum->v = _mm512_mask_sub_ps(
+      rho_dhSum->v, knlMask2, rho_dhSum->v,
+      vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v,
+                             vec_mul(xi2.v, wi_dx2.v))));
+
+  wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask, wi.v, wcountSum->v);
+  wcountSum->v =
+      _mm512_mask_add_ps(wcountSum->v, knlMask2, wi2.v, wcountSum->v);
+
+  wcount_dhSum->v = _mm512_mask_sub_ps(wcount_dhSum->v, knlMask,
+                                       wcount_dhSum->v, vec_mul(xi.v, wi_dx.v));
+  wcount_dhSum->v = _mm512_mask_sub_ps(
+      wcount_dhSum->v, knlMask2, wcount_dhSum->v, vec_mul(xi2.v, wi_dx2.v));
+
+  div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask, div_vSum->v,
+                                   vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)));
+  div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask2, div_vSum->v,
+                                   vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)));
+
+  curlvxSum->v = _mm512_mask_add_ps(curlvxSum->v, knlMask,
+                                    vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)),
+                                    curlvxSum->v);
+  curlvxSum->v = _mm512_mask_add_ps(
+      curlvxSum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)),
+      curlvxSum->v);
+
+  curlvySum->v = _mm512_mask_add_ps(curlvySum->v, knlMask,
+                                    vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)),
+                                    curlvySum->v);
+  curlvySum->v = _mm512_mask_add_ps(
+      curlvySum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)),
+      curlvySum->v);
+
+  curlvzSum->v = _mm512_mask_add_ps(curlvzSum->v, knlMask,
+                                    vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)),
+                                    curlvzSum->v);
+  curlvzSum->v = _mm512_mask_add_ps(
+      curlvzSum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)),
+      curlvzSum->v);
+#else
+  rhoSum->v += vec_and(vec_mul(mj.v, wi.v), mask.v);
+  rhoSum->v += vec_and(vec_mul(mj2.v, wi2.v), mask2.v);
+  rho_dhSum->v -= vec_and(vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
+                                                vec_mul(xi.v, wi_dx.v))),
+                          mask.v);
+  rho_dhSum->v -=
+      vec_and(vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v,
+                                     vec_mul(xi2.v, wi_dx2.v))),
+              mask2.v);
+  wcountSum->v += vec_and(wi.v, mask.v);
+  wcountSum->v += vec_and(wi2.v, mask2.v);
+  wcount_dhSum->v -= vec_and(vec_mul(xi.v, wi_dx.v), mask.v);
+  wcount_dhSum->v -= vec_and(vec_mul(xi2.v, wi_dx2.v), mask2.v);
+  div_vSum->v -= vec_and(vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask.v);
+  div_vSum->v -= vec_and(vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)), mask2.v);
+  curlvxSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask.v);
+  curlvxSum->v +=
+      vec_and(vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)), mask2.v);
+  curlvySum->v += vec_and(vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask.v);
+  curlvySum->v +=
+      vec_and(vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)), mask2.v);
+  curlvzSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), mask.v);
+  curlvzSum->v +=
+      vec_and(vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), mask2.v);
+#endif
+}
+#endif
+
 /**
  * @brief Force loop
  */
@@ -492,9 +653,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
   vector hi, hj, hi_inv, hj_inv;
   vector hid_inv, hjd_inv;
   vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
-  vector piPOrho, pjPOrho, pirho, pjrho;
+  vector piPOrho2, pjPOrho2, pirho, pjrho;
   vector mi, mj;
   vector f;
+  vector grad_hi, grad_hj;
   vector dx[3];
   vector vi[3], vj[3];
   vector pia[3], pja[3];
@@ -512,14 +674,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
                  pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
   mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
                  pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
-  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
-                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
-                      pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
-                      pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
-  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
-                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
-                      pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
-                      pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
+  piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
+                       pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
+                       pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
+                       pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
+  pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
+                       pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
+                       pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
+                       pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
+  grad_hi.v =
+      vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f,
+              pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f);
+  grad_hj.v =
+      vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f,
+              pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f);
   pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
                     pi[5]->rho, pi[6]->rho, pi[7]->rho);
   pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
@@ -551,10 +719,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
 #elif VEC_SIZE == 4
   mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
   mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
-  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
-                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
-  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
-                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
+  piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
+                       pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
+  pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
+                       pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
+  grad_hi.v =
+      vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f);
+  grad_hj.v =
+      vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f);
   pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
   pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
   ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
@@ -577,14 +749,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
 
   /* Get the radius and inverse radius. */
   r2.v = vec_load(R2);
-  ri.v = vec_rsqrt(r2.v);
-  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
+  ri = vec_reciprocal_sqrt(r2);
   r.v = r2.v * ri.v;
 
   /* Get the kernel for hi. */
   hi.v = vec_load(Hi);
-  hi_inv.v = vec_rcp(hi.v);
-  hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
+  hi_inv = vec_reciprocal(hi);
   hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */
   xi.v = r.v * hi_inv.v;
   kernel_deval_vec(&xi, &wi, &wi_dx);
@@ -592,8 +762,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
 
   /* Get the kernel for hj. */
   hj.v = vec_load(Hj);
-  hj_inv.v = vec_rcp(hj.v);
-  hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
+  hj_inv = vec_reciprocal(hj);
   hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */
   xj.v = r.v * hj_inv.v;
   kernel_deval_vec(&xj, &wj, &wj_dx);
@@ -619,7 +788,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
 
   /* Now, convolve with the kernel */
   visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
-  sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v;
+  sph_term.v =
+      (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) *
+      ri.v;
 
   /* Eventually get the acceleration */
   acc.v = visc_term.v + sph_term.v;
@@ -764,9 +935,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
   vector hi, hj, hi_inv, hj_inv;
   vector hid_inv, hjd_inv;
   vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
-  vector piPOrho, pjPOrho, pirho, pjrho;
+  vector piPOrho2, pjPOrho2, pirho, pjrho;
   vector mj;
   vector f;
+  vector grad_hi, grad_hj;
   vector dx[3];
   vector vi[3], vj[3];
   vector pia[3];
@@ -782,14 +954,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
 #if VEC_SIZE == 8
   mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
                  pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
-  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
-                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
-                      pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
-                      pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
-  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
-                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
-                      pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
-                      pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
+  piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
+                       pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
+                       pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
+                       pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
+  pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
+                       pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
+                       pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
+                       pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
+  grad_hi.v =
+      vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f,
+              pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f);
+  grad_hj.v =
+      vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f,
+              pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f);
   pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
                     pi[5]->rho, pi[6]->rho, pi[7]->rho);
   pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
@@ -820,10 +998,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
               pj[6]->force.balsara, pj[7]->force.balsara);
 #elif VEC_SIZE == 4
   mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
-  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
-                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
-  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
-                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
+  piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
+                       pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
+  pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
+                       pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
+  grad_hi.v =
+      vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f);
+  grad_hj.v =
+      vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f);
   pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
   pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
   ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
@@ -846,14 +1028,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
 
   /* Get the radius and inverse radius. */
   r2.v = vec_load(R2);
-  ri.v = vec_rsqrt(r2.v);
-  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
+  ri = vec_reciprocal_sqrt(r2);
   r.v = r2.v * ri.v;
 
   /* Get the kernel for hi. */
   hi.v = vec_load(Hi);
-  hi_inv.v = vec_rcp(hi.v);
-  hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
+  hi_inv = vec_reciprocal(hi);
   hid_inv = pow_dimension_plus_one_vec(hi_inv);
   xi.v = r.v * hi_inv.v;
   kernel_deval_vec(&xi, &wi, &wi_dx);
@@ -861,8 +1041,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
 
   /* Get the kernel for hj. */
   hj.v = vec_load(Hj);
-  hj_inv.v = vec_rcp(hj.v);
-  hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
+  hj_inv = vec_reciprocal(hj);
   hjd_inv = pow_dimension_plus_one_vec(hj_inv);
   xj.v = r.v * hj_inv.v;
   kernel_deval_vec(&xj, &wj, &wj_dx);
@@ -888,7 +1067,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
 
   /* Now, convolve with the kernel */
   visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
-  sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v;
+  sph_term.v =
+      (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) *
+      ri.v;
 
   /* Eventually get the acceleration */
   acc.v = visc_term.v + sph_term.v;
diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h
index 433aef64c388c8bc4989e883f10a8f0d3eeb30e9..162d368dd073be2fd0f06f4ecbc1431fb34e7798 100644
--- a/src/hydro/Gadget2/hydro_io.h
+++ b/src/hydro/Gadget2/hydro_io.h
@@ -57,12 +57,12 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
 
 float convert_u(struct engine* e, struct part* p) {
 
-  return hydro_get_internal_energy(p, 0);
+  return hydro_get_internal_energy(p);
 }
 
 float convert_P(struct engine* e, struct part* p) {
 
-  return hydro_get_pressure(p, 0);
+  return hydro_get_pressure(p);
 }
 
 /**
diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h
index 4bbbf0aede12b692b15442b71a03ffbbcf2f8378..69ae79666e1db4e4f405c653cfc533606989a73a 100644
--- a/src/hydro/Gadget2/hydro_part.h
+++ b/src/hydro/Gadget2/hydro_part.h
@@ -42,6 +42,9 @@ struct xpart {
   /* Velocity at the last full step. */
   float v_full[3];
 
+  /* Entropy at the last full step. */
+  float entropy_full;
+
   /* Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
@@ -50,6 +53,12 @@ struct xpart {
 /* Data of a single particle. */
 struct part {
 
+  /* Particle ID. */
+  long long id;
+
+  /* Pointer to corresponding gravity part. */
+  struct gpart* gpart;
+
   /* Particle position. */
   double x[3];
 
@@ -65,12 +74,6 @@ struct part {
   /* Particle mass. */
   float mass;
 
-  /* Particle time of beginning of time-step. */
-  int ti_begin;
-
-  /* Particle time of end of time-step. */
-  int ti_end;
-
   /* Particle density. */
   float rho;
 
@@ -124,11 +127,18 @@ struct part {
     } force;
   };
 
-  /* Particle ID. */
-  long long id;
+  /* Time-step length */
+  timebin_t time_bin;
 
-  /* Pointer to corresponding gravity part. */
-  struct gpart* gpart;
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
 
 } SWIFT_STRUCT_ALIGN;
 
diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h
index 1c64291ee64dd770b1f1a76371f67a34230365c7..c59af05460157a756c15d8ca84af8a7834fde2d3 100644
--- a/src/hydro/Gizmo/hydro.h
+++ b/src/hydro/Gizmo/hydro.h
@@ -178,11 +178,10 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
  * @param timeBase Conversion factor between integer time and physical time.
  */
 __attribute__((always_inline)) INLINE static void hydro_prepare_force(
-    struct part* restrict p, struct xpart* restrict xp, int ti_current,
-    double timeBase) {
+    struct part* restrict p, struct xpart* restrict xp) {
 
   /* Set the physical time step */
-  p->force.dt = (p->ti_end - p->ti_begin) * timeBase;
+  p->force.dt = get_timestep(p->time_bin, 0.);  // MATTHIEU 0
 
   /* Initialize time step criterion variables */
   p->timestepvars.vmax = 0.0f;
@@ -233,6 +232,16 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
   p->force.h_dt = 0.0f;
 }
 
+/**
+ * @brief Sets the values to be predicted in the drifts to their values at a
+ * kick time
+ *
+ * @param p The particle.
+ * @param xp The extended data of this particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values(
+    struct part* restrict p, const struct xpart* restrict xp) {}
+
 /**
  * @brief Converts the hydrodynamic variables from the initial condition file to
  * conserved variables that can be used during the integration
@@ -250,7 +259,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  * @param p The particle to act upon.
  */
 __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
-    struct part* p) {
+    struct part* p, struct xpart* xp) {
 
   const float volume = p->geometry.volume;
   const float m = p->conserved.mass;
@@ -283,8 +292,7 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
  * @param timeBase Conversion factor between integer and physical time.
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part* p, struct xpart* xp, float dt, int t0, int t1,
-    double timeBase) {
+    struct part* p, struct xpart* xp, float dt) {
 
   const float h_inv = 1.0f / p->h;
 
@@ -367,9 +375,10 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
  * @param half_dt Half the physical time step.
  */
 __attribute__((always_inline)) INLINE static void hydro_kick_extra(
-    struct part* p, struct xpart* xp, float dt, float half_dt) {
+    struct part* p, struct xpart* xp, float dt) {
 
   float oldm, oldp[3], anew[3];
+  const float half_dt = 0.5f * dt;  // MATTHIEU
 
   /* Retrieve the current value of the gravitational acceleration from the
      gpart. We are only allowed to do this because this is the kick. We still
@@ -441,10 +450,9 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
  * @brief Returns the internal energy of a particle
  *
  * @param p The particle of interest.
- * @param dt Time since the last kick.
  */
 __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
-    const struct part* restrict p, float dt) {
+    const struct part* restrict p) {
 
   return p->primitives.P / hydro_gamma_minus_one / p->primitives.rho;
 }
@@ -453,10 +461,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
  * @brief Returns the entropy of a particle
  *
  * @param p The particle of interest.
- * @param dt Time since the last kick.
  */
 __attribute__((always_inline)) INLINE static float hydro_get_entropy(
-    const struct part* restrict p, float dt) {
+    const struct part* restrict p) {
 
   return p->primitives.P / pow_gamma(p->primitives.rho);
 }
@@ -465,10 +472,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_entropy(
  * @brief Returns the sound speed of a particle
  *
  * @param p The particle of interest.
- * @param dt Time since the last kick.
  */
 __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
-    const struct part* restrict p, float dt) {
+    const struct part* restrict p) {
 
   return sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho);
 }
@@ -477,10 +483,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
  * @brief Returns the pressure of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_pressure(
-    const struct part* restrict p, float dt) {
+    const struct part* restrict p) {
 
   return p->primitives.P;
 }
diff --git a/src/hydro/Gizmo/hydro_debug.h b/src/hydro/Gizmo/hydro_debug.h
index f4c071023a627b177fd06373856f25611fc9485d..a05ff9a7d96f04ca3354235540adc31386a2d2e3 100644
--- a/src/hydro/Gizmo/hydro_debug.h
+++ b/src/hydro/Gizmo/hydro_debug.h
@@ -24,8 +24,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "v=[%.3e,%.3e,%.3e], "
       "a=[%.3e,%.3e,%.3e], "
       "h=%.3e, "
-      "ti_begin=%d, "
-      "ti_end=%d, "
+      "time_bin=%d, "
       "primitives={"
       "v=[%.3e,%.3e,%.3e], "
       "rho=%.3e, "
@@ -54,9 +53,9 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "curl_v=[%.3e,%.3e,%.3e], "
       "wcount=%.3e}\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0],
-      p->a_hydro[1], p->a_hydro[2], p->h, p->ti_begin, p->ti_end,
-      p->primitives.v[0], p->primitives.v[1], p->primitives.v[2],
-      p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0],
+      p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->primitives.v[0],
+      p->primitives.v[1], p->primitives.v[2], p->primitives.rho,
+      p->primitives.P, p->primitives.gradients.rho[0],
       p->primitives.gradients.rho[1], p->primitives.gradients.rho[2],
       p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1],
       p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0],
diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h
index cf2b9a223b49c3ce2fbd6874b83c523e8213a5ce..aba6bd53c1c9557929426c11a0986e5f02888874 100644
--- a/src/hydro/Gizmo/hydro_iact.h
+++ b/src/hydro/Gizmo/hydro_iact.h
@@ -411,7 +411,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
      UPDATE particle j.
      ==> we update particle j if (MODE IS 1) OR (j IS INACTIVE)
   */
-  if (mode == 1 || pj->ti_end > pi->ti_end) {
+
+  // MATTHIEU
+  const integertime_t pj_ti_end = 0;  // get_integer_time_end(pj->time_bin);
+  const integertime_t pi_ti_end = 0;  // get_integer_time_end(pi->time_bin);
+
+  if (mode == 1 || pj_ti_end > pi_ti_end) {
     /* Store mass flux */
     mflux = dtj * Anorm * totflux[0];
     pj->gravity.mflux[0] -= mflux * dx[0];
diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h
index c4919ff173c64a4a83a5d1bf61ab82697cc03096..f6592ca107d8d2c6970f34ebd3929e226b53a355 100644
--- a/src/hydro/Gizmo/hydro_part.h
+++ b/src/hydro/Gizmo/hydro_part.h
@@ -38,6 +38,12 @@ struct xpart {
 /* Data of a single particle. */
 struct part {
 
+  /* Particle ID. */
+  long long id;
+
+  /* Associated gravitas. */
+  struct gpart *gpart;
+
   /* Particle position. */
   double x[3];
 
@@ -50,12 +56,6 @@ struct part {
   /* Particle smoothing length. */
   float h;
 
-  /* Particle time of beginning of time-step. */
-  int ti_begin;
-
-  /* Particle time of end of time-step. */
-  int ti_end;
-
   /* Old internal energy flux */
   float du_dt;
 
@@ -197,11 +197,18 @@ struct part {
 
   } gravity;
 
-  /* Particle ID. */
-  long long id;
+  /* Time-step length */
+  timebin_t time_bin;
 
-  /* Associated gravitas. */
-  struct gpart *gpart;
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
 
 } SWIFT_STRUCT_ALIGN;
 
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index beb6f98b8c0d781aa709fb6ee3ca564a52704db2..20856b7e038855e22aa3776a74ba9f495ff6c93f 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -49,26 +49,22 @@
  * energy from the thermodynamic variable.
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
-  return p->u + p->u_dt * dt;
+  return p->u;
 }
 
 /**
  * @brief Returns the pressure of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_pressure(
-    const struct part *restrict p, float dt) {
-
-  const float u = p->u + p->u_dt * dt;
+    const struct part *restrict p) {
 
-  return gas_pressure_from_internal_energy(p->rho, u);
+  return gas_pressure_from_internal_energy(p->rho, p->u);
 }
 
 /**
@@ -79,24 +75,20 @@ __attribute__((always_inline)) INLINE static float hydro_get_pressure(
  * the thermodynamic variable.
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_entropy(
-    const struct part *restrict p, float dt) {
-
-  const float u = p->u + p->u_dt * dt;
+    const struct part *restrict p) {
 
-  return gas_entropy_from_internal_energy(p->rho, u);
+  return gas_entropy_from_internal_energy(p->rho, p->u);
 }
 
 /**
  * @brief Returns the sound speed of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return p->force.soundspeed;
 }
@@ -124,68 +116,31 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass(
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed internal
- * energy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overwrites the current state of the particle but does *not* change its
- * time-derivatives. Internal energy, pressure, sound-speed and signal velocity
- * will be updated.
+ * We assume a constant density.
  *
- * @param p The particle
- * @param u The new internal energy
+ * @param p The particle of interest
  */
-__attribute__((always_inline)) INLINE static void hydro_set_internal_energy(
-    struct part *restrict p, float u) {
-
-  p->u = u;
-
-  /* Compute the new pressure */
-  const float pressure = gas_pressure_from_internal_energy(p->rho, p->u);
-
-  /* Compute the new sound speed */
-  const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u);
-
-  /* Update the signal velocity */
-  const float v_sig_old = p->force.v_sig;
-  const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed;
-  const float v_sig = max(v_sig_old, v_sig_new);
+__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt(
+    const struct part *restrict p) {
 
-  p->force.soundspeed = soundspeed;
-  p->force.pressure = pressure;
-  p->force.v_sig = v_sig;
+  return p->u_dt;
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed entropy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overwrites the current state of the particle but does *not* change its
- * time-derivatives. Internal energy, pressure, sound-speed and signal velocity
- * will be updated.
+ * We assume a constant density.
  *
- * @param p The particle
- * @param S The new entropy
+ * @param p The particle of interest.
+ * @param du_dt The new time derivative of the internal energy.
  */
-__attribute__((always_inline)) INLINE static void hydro_set_entropy(
-    struct part *restrict p, float S) {
-
-  p->u = gas_internal_energy_from_entropy(p->rho, S);
-
-  /* Compute the pressure */
-  const float pressure = gas_pressure_from_internal_energy(p->rho, p->u);
-
-  /* Compute the new sound speed */
-  const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u);
-
-  /* Update the signal velocity */
-  const float v_sig_old = p->force.v_sig;
-  const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed;
-  const float v_sig = max(v_sig_old, v_sig_new);
+__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt(
+    struct part *restrict p, float du_dt) {
 
-  p->force.soundspeed = soundspeed;
-  p->force.pressure = pressure;
-  p->force.v_sig = v_sig;
+  p->u_dt = du_dt;
 }
-
 /**
  * @brief Computes the hydro time-step of a given particle
  *
@@ -210,26 +165,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
   return dt_cfl;
 }
 
-/**
- * @brief Initialises the particles for the first time
- *
- * This function is called only once just after the ICs have been
- * read in to do some conversions or assignments between the particle
- * and extended particle fields.
- *
- * @param p The particle to act upon
- * @param xp The extended particle data to act upon
- */
-__attribute__((always_inline)) INLINE static void hydro_first_init_part(
-    struct part *restrict p, struct xpart *restrict xp) {
-
-  p->ti_begin = 0;
-  p->ti_end = 0;
-  xp->v_full[0] = p->v[0];
-  xp->v_full[1] = p->v[1];
-  xp->v_full[2] = p->v[2];
-}
-
 /**
  * @brief Prepares a particle for the density calculation.
  *
@@ -292,16 +227,12 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
  *
  * @param p The particle to act upon
  * @param xp The extended particle data to act upon
- * @param ti_current The current time (on the timeline)
- * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_prepare_force(
-    struct part *restrict p, struct xpart *restrict xp, int ti_current,
-    double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   /* Compute the pressure */
-  const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
-  const float pressure = hydro_get_pressure(p, half_dt);
+  const float pressure = gas_pressure_from_internal_energy(p->rho, p->u);
 
   /* Compute the sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
@@ -339,6 +270,25 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
   p->force.v_sig = 0.0f;
 }
 
+/**
+ * @brief Sets the values to be predicted in the drifts to their values at a
+ * kick time
+ *
+ * @param p The particle.
+ * @param xp The extended data of this particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values(
+    struct part *restrict p, const struct xpart *restrict xp) {
+
+  /* Re-set the predicted velocities */
+  p->v[0] = xp->v_full[0];
+  p->v[1] = xp->v_full[1];
+  p->v[2] = xp->v_full[2];
+
+  /* Re-set the entropy */
+  p->u = xp->u_full;
+}
+
 /**
  * @brief Predict additional particle fields forward in time when drifting
  *
@@ -348,13 +298,9 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
  * @param p The particle.
  * @param xp The extended data of the particle.
  * @param dt The drift time-step.
- * @param t0 The time at the start of the drift (on the timeline).
- * @param t1 The time at the end of the drift (on the timeline).
- * @param timeBase The minimal time-step size.
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, const struct xpart *restrict xp, float dt, int t0,
-    int t1, double timeBase) {
+    struct part *restrict p, const struct xpart *restrict xp, float dt) {
 
   const float h_inv = 1.f / p->h;
 
@@ -372,9 +318,11 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
   else
     p->rho *= expf(w2);
 
-  /* Drift the pressure */
-  const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase;
-  const float pressure = hydro_get_pressure(p, dt_entr);
+  /* Predict the internal energy */
+  p->u += p->u_dt * dt;
+
+  /* Compute the new pressure */
+  const float pressure = gas_pressure_from_internal_energy(p->rho, p->u);
 
   /* Compute the new sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure);
@@ -407,24 +355,18 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
  * @param p The particle to act upon
  * @param xp The particle extended data to act upon
  * @param dt The time-step for this kick
- * @param half_dt The half time-step for this kick
  */
 __attribute__((always_inline)) INLINE static void hydro_kick_extra(
-    struct part *restrict p, struct xpart *restrict xp, float dt,
-    float half_dt) {
+    struct part *restrict p, struct xpart *restrict xp, float dt) {
 
   /* Do not decrease the energy by more than a factor of 2*/
-  const float u_change = p->u_dt * dt;
-  if (u_change > -0.5f * p->u)
-    p->u += u_change;
-  else
-    p->u *= 0.5f;
-
-  /* Do not 'overcool' when timestep increases */
-  if (p->u + p->u_dt * half_dt < 0.5f * p->u) p->u_dt = -0.5f * p->u / half_dt;
+  if (dt > 0. && p->u_dt * dt < -0.5f * xp->u_full) {
+    p->u_dt = -0.5f * xp->u_full / dt;
+  }
+  xp->u_full += p->u_dt * dt;
 
   /* Compute the pressure */
-  const float pressure = gas_pressure_from_internal_energy(p->rho, p->u);
+  const float pressure = gas_pressure_from_internal_energy(p->rho, xp->u_full);
 
   /* Compute the sound speed */
   const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u);
@@ -442,9 +384,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
  * This can be used to convert internal energy into entropy for instance.
  *
  * @param p The particle to act upon
+ * @param xp The extended particle to act upon
  */
 __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
-    struct part *restrict p) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   /* Compute the pressure */
   const float pressure = gas_pressure_from_internal_energy(p->rho, p->u);
@@ -456,4 +399,27 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
   p->force.soundspeed = soundspeed;
 }
 
+/**
+ * @brief Initialises the particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions or assignments between the particle
+ * and extended particle fields.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline)) INLINE static void hydro_first_init_part(
+    struct part *restrict p, struct xpart *restrict xp) {
+
+  p->time_bin = 0;
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+  xp->u_full = p->u;
+
+  hydro_reset_acceleration(p);
+  hydro_init_part(p);
+}
+
 #endif /* SWIFT_MINIMAL_HYDRO_H */
diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h
index 16ae62413a0d76b7bf871e615fe5684219752fee..876ce148824489d4c43358c2c519aa3b90dcf002 100644
--- a/src/hydro/Minimal/hydro_debug.h
+++ b/src/hydro/Minimal/hydro_debug.h
@@ -40,12 +40,11 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e], "
       "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n"
       "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, "
-      "t_begin=%d, t_end=%d\n",
+      "time_bin=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->u, p->u_dt, p->force.v_sig, p->force.pressure, p->h, p->force.h_dt,
-      (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, p->ti_begin,
-      p->ti_end);
+      (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, p->time_bin);
 }
 
 #endif /* SWIFT_MINIMAL_HYDRO_DEBUG_H */
diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h
index 01a75b17fd5577cfcfb48d3afac22579f30fcf7a..8c83349a3e17d6b3375663698af7beeeab0636bc 100644
--- a/src/hydro/Minimal/hydro_io.h
+++ b/src/hydro/Minimal/hydro_io.h
@@ -71,12 +71,12 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
 
 float convert_S(struct engine* e, struct part* p) {
 
-  return hydro_get_entropy(p, 0);
+  return hydro_get_entropy(p);
 }
 
 float convert_P(struct engine* e, struct part* p) {
 
-  return hydro_get_pressure(p, 0);
+  return hydro_get_pressure(p);
 }
 
 /**
diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h
index 8542177278998d5e0b830dc164988611549ef24d..dabae1a546d66f61db4f9796c21b71817ca20aac 100644
--- a/src/hydro/Minimal/hydro_part.h
+++ b/src/hydro/Minimal/hydro_part.h
@@ -49,6 +49,9 @@ struct xpart {
   /*! Velocity at the last full step. */
   float v_full[3];
 
+  /*! Internal energy at the last full step. */
+  float u_full;
+
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
@@ -63,6 +66,12 @@ struct xpart {
  */
 struct part {
 
+  /*! Particle unique ID. */
+  long long id;
+
+  /*! Pointer to corresponding gravity part. */
+  struct gpart* gpart;
+
   /*! Particle position. */
   double x[3];
 
@@ -78,12 +87,6 @@ struct part {
   /*! Particle smoothing length. */
   float h;
 
-  /*! Time at the beginning of time-step. */
-  int ti_begin;
-
-  /*! Time at the end of time-step. */
-  int ti_end;
-
   /*! Particle internal energy. */
   float u;
 
@@ -143,11 +146,18 @@ struct part {
     } force;
   };
 
-  /*! Particle unique ID. */
-  long long id;
+  /*! Time-step length */
+  timebin_t time_bin;
 
-  /*! Pointer to corresponding gravity part. */
-  struct gpart* gpart;
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
 
 } SWIFT_STRUCT_ALIGN;
 
diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h
index 8c063596efd3be97ebb4da6b6879ac06122bd357..f22bb8a13a8ba4d896a77bd4c4f5e86bed5a5960 100644
--- a/src/hydro/PressureEntropy/hydro.h
+++ b/src/hydro/PressureEntropy/hydro.h
@@ -43,50 +43,42 @@
  * @brief Returns the internal energy of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
-    const struct part *restrict p, float dt) {
-
-  const float entropy = p->entropy + p->entropy_dt * dt;
+    const struct part *restrict p) {
 
-  return gas_internal_energy_from_entropy(p->rho_bar, entropy);
+  return gas_internal_energy_from_entropy(p->rho_bar, p->entropy);
 }
 
 /**
  * @brief Returns the pressure of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_pressure(
-    const struct part *restrict p, float dt) {
-
-  const float entropy = p->entropy + p->entropy_dt * dt;
+    const struct part *restrict p) {
 
-  return gas_pressure_from_entropy(p->rho_bar, entropy);
+  return gas_pressure_from_entropy(p->rho_bar, p->entropy);
 }
 
 /**
  * @brief Returns the entropy of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_entropy(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
-  return p->entropy + p->entropy_dt * dt;
+  return p->entropy;
 }
 
 /**
  * @brief Returns the sound speed of a particle
  *
  * @param p The particle of interest
- * @param dt Time since the last kick
  */
 __attribute__((always_inline)) INLINE static float hydro_get_soundspeed(
-    const struct part *restrict p, float dt) {
+    const struct part *restrict p) {
 
   return p->force.soundspeed;
 }
@@ -114,72 +106,30 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass(
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed internal
- * energy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overwrites the current state of the particle but does *not* change its
- * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be
- * updated.
+ * We assume a constant density.
  *
- * @param p The particle
- * @param u The new internal energy
+ * @param p The particle of interest
  */
-__attribute__((always_inline)) INLINE static void hydro_set_internal_energy(
-    struct part *restrict p, float u) {
-
-  p->entropy = gas_entropy_from_internal_energy(p->rho_bar, u);
-  p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
-
-  /* Compute the pressure */
-  const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy);
-
-  /* Compute the sound speed from the pressure*/
-  const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure);
-
-  /* Update the signal velocity */
-  const float v_sig_old = p->force.v_sig;
-  const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed;
-  const float v_sig = max(v_sig_old, v_sig_new);
-
-  const float rho_bar_inv = 1.f / p->rho_bar;
+__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt(
+    const struct part *restrict p) {
 
-  p->force.soundspeed = soundspeed;
-  p->force.P_over_rho2 = pressure * rho_bar_inv * rho_bar_inv;
-  p->force.v_sig = v_sig;
+  return gas_internal_energy_from_entropy(p->rho_bar, p->entropy_dt);
 }
 
 /**
- * @brief Modifies the thermal state of a particle to the imposed entropy
+ * @brief Returns the time derivative of internal energy of a particle
  *
- * This overwrites the current state of the particle but does *not* change its
- * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be
- * updated.
+ * We assume a constant density.
  *
- * @param p The particle
- * @param S The new entropy
+ * @param p The particle of interest.
+ * @param du_dt The new time derivative of the internal energy.
  */
-__attribute__((always_inline)) INLINE static void hydro_set_entropy(
-    struct part *restrict p, float S) {
+__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt(
+    struct part *restrict p, float du_dt) {
 
-  p->entropy = S;
-  p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
-
-  /* Compute the pressure */
-  const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy);
-
-  /* Compute the sound speed from the pressure*/
-  const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure);
-
-  /* Update the signal velocity */
-  const float v_sig_old = p->force.v_sig;
-  const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed;
-  const float v_sig = max(v_sig_old, v_sig_new);
-
-  const float rho_bar_inv = 1.f / p->rho_bar;
-
-  p->force.soundspeed = soundspeed;
-  p->force.P_over_rho2 = pressure * rho_bar_inv * rho_bar_inv;
-  p->force.v_sig = v_sig;
+  p->entropy_dt = gas_entropy_from_internal_energy(p->rho_bar, du_dt);
 }
 
 /**
@@ -202,27 +152,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep(
   return dt_cfl;
 }
 
-/**
- * @brief Initialises the particles for the first time
- *
- * This function is called only once just after the ICs have been
- * read in to do some conversions.
- *
- * @param p The particle to act upon
- * @param xp The extended particle data to act upon
- */
-__attribute__((always_inline)) INLINE static void hydro_first_init_part(
-    struct part *restrict p, struct xpart *restrict xp) {
-
-  p->ti_begin = 0;
-  p->ti_end = 0;
-  p->rho_bar = 0.f;
-  p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
-  xp->v_full[0] = p->v[0];
-  xp->v_full[1] = p->v[1];
-  xp->v_full[2] = p->v[2];
-}
-
 /**
  * @brief Prepares a particle for the density calculation.
  *
@@ -302,12 +231,9 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
  *
  * @param p The particle to act upon
  * @param xp The extended particle data to act upon
- * @param ti_current The current time (on the timeline)
- * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_prepare_force(
-    struct part *restrict p, struct xpart *restrict xp, int ti_current,
-    double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   const float fac_mu = 1.f; /* Will change with cosmological integration */
 
@@ -320,9 +246,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
   const float abs_div_v = fabsf(p->density.div_v);
 
   /* Compute the pressure */
-  const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
-  const float entropy = hydro_get_entropy(p, half_dt);
-  const float pressure = gas_pressure_from_entropy(p->rho_bar, entropy);
+  const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy);
 
   /* Compute the sound speed from the pressure*/
   const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure);
@@ -375,19 +299,34 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
   p->force.v_sig = 0.0f;
 }
 
+/**
+ * @brief Sets the values to be predicted in the drifts to their values at a
+ * kick time
+ *
+ * @param p The particle.
+ * @param xp The extended data of this particle.
+ */
+__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values(
+    struct part *restrict p, const struct xpart *restrict xp) {
+
+  /* Re-set the predicted velocities */
+  p->v[0] = xp->v_full[0];
+  p->v[1] = xp->v_full[1];
+  p->v[2] = xp->v_full[2];
+
+  /* Re-set the entropy */
+  p->entropy = xp->entropy_full;
+}
+
 /**
  * @brief Predict additional particle fields forward in time when drifting
  *
  * @param p The particle
  * @param xp The extended data of the particle
  * @param dt The drift time-step.
- * @param t0 The time at the start of the drift (on the timeline).
- * @param t1 The time at the end of the drift (on the timeline).
- * @param timeBase The minimal time-step size
  */
 __attribute__((always_inline)) INLINE static void hydro_predict_extra(
-    struct part *restrict p, const struct xpart *restrict xp, float dt, int t0,
-    int t1, double timeBase) {
+    struct part *restrict p, const struct xpart *restrict xp, float dt) {
 
   const float h_inv = 1.f / p->h;
 
@@ -408,12 +347,11 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
     p->rho_bar *= expf(w2);
   }
 
-  /* Drift the entropy */
-  const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase;
-  const float entropy = hydro_get_entropy(p, dt_entr);
+  /* Predict the entropy */
+  p->entropy += p->entropy_dt * dt;
 
   /* Compute the pressure */
-  const float pressure = gas_pressure_from_entropy(p->rho_bar, entropy);
+  const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy);
 
   /* Compute the new sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure);
@@ -423,7 +361,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(
   const float P_over_rho2 = pressure * rho_bar_inv * rho_bar_inv;
 
   /* Update the variables */
-  p->entropy_one_over_gamma = pow_one_over_gamma(entropy);
+  p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
   p->force.soundspeed = soundspeed;
   p->force.P_over_rho2 = P_over_rho2;
 }
@@ -453,22 +391,17 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
  * @param half_dt The half time-step for this kick
  */
 __attribute__((always_inline)) INLINE static void hydro_kick_extra(
-    struct part *restrict p, struct xpart *restrict xp, float dt,
-    float half_dt) {
+    struct part *restrict p, struct xpart *restrict xp, float dt) {
 
   /* Do not decrease the entropy (temperature) by more than a factor of 2*/
-  const float entropy_change = p->entropy_dt * dt;
-  if (entropy_change > -0.5f * p->entropy)
-    p->entropy += entropy_change;
-  else
-    p->entropy *= 0.5f;
-
-  /* Do not 'overcool' when timestep increases */
-  if (p->entropy + p->entropy_dt * half_dt < 0.5f * p->entropy)
-    p->entropy_dt = -0.5f * p->entropy / half_dt;
+  if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) {
+    p->entropy_dt = -0.5f * xp->entropy_full / dt;
+  }
+  xp->entropy_full += p->entropy_dt * dt;
 
   /* Compute the pressure */
-  const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy);
+  const float pressure =
+      gas_pressure_from_entropy(p->rho_bar, xp->entropy_full);
 
   /* Compute the new sound speed */
   const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure);
@@ -490,10 +423,11 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra(
  * @param p The particle to act upon
  */
 __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
-    struct part *restrict p) {
+    struct part *restrict p, struct xpart *restrict xp) {
 
   /* We read u in the entropy field. We now get S from u */
-  p->entropy = gas_entropy_from_internal_energy(p->rho_bar, p->entropy);
+  xp->entropy_full = gas_entropy_from_internal_energy(p->rho_bar, p->entropy);
+  p->entropy = xp->entropy_full;
   p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
 
   /* Compute the pressure */
@@ -510,4 +444,27 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities(
   p->force.P_over_rho2 = P_over_rho2;
 }
 
+/**
+ * @brief Initialises the particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline)) INLINE static void hydro_first_init_part(
+    struct part *restrict p, struct xpart *restrict xp) {
+
+  p->time_bin = 0;
+  p->rho_bar = 0.f;
+  p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy);
+  xp->v_full[0] = p->v[0];
+  xp->v_full[1] = p->v[1];
+  xp->v_full[2] = p->v[2];
+
+  hydro_reset_acceleration(p);
+  hydro_init_part(p);
+}
+
 #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_H */
diff --git a/src/hydro/PressureEntropy/hydro_debug.h b/src/hydro/PressureEntropy/hydro_debug.h
index 486543793515795092e7cc97fe7b567b8230be3b..3a0a315a4fa0eb4710042e8020002691ed9c425a 100644
--- a/src/hydro/PressureEntropy/hydro_debug.h
+++ b/src/hydro/PressureEntropy/hydro_debug.h
@@ -29,7 +29,6 @@
  * Follows eqautions (19), (21) and (22) of Hopkins, P., MNRAS, 2013,
  * Volume 428, Issue 4, pp. 2840-2856 with a simple Balsara viscosity term.
  */
-
 __attribute__((always_inline)) INLINE static void hydro_debug_particle(
     const struct part* p, const struct xpart* xp) {
   printf(
@@ -37,14 +36,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle(
       "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, "
       "rho_bar=%.3e, P=%.3e, dP_dh=%.3e, P_over_rho2=%.3e, S=%.3e, S^1/g=%.3e, "
-      "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n",
+      "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e time_bin=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
       p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh,
-      p->rho, p->rho_bar, hydro_get_pressure(p, 0.), p->density.pressure_dh,
+      p->rho, p->rho_bar, hydro_get_pressure(p), p->density.pressure_dh,
       p->force.P_over_rho2, p->entropy, p->entropy_one_over_gamma,
       p->entropy_dt, p->force.soundspeed, p->force.v_sig, p->force.h_dt,
-      p->ti_begin, p->ti_end);
+      p->time_bin);
 }
 
 #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_DEBUG_H */
diff --git a/src/hydro/PressureEntropy/hydro_io.h b/src/hydro/PressureEntropy/hydro_io.h
index 9914a656466f3f0d0a5eeb79b511706d7068ffc6..fcc8439f64d299b7dcb59e819f8dd273112ce25a 100644
--- a/src/hydro/PressureEntropy/hydro_io.h
+++ b/src/hydro/PressureEntropy/hydro_io.h
@@ -69,12 +69,12 @@ void hydro_read_particles(struct part* parts, struct io_props* list,
 
 float convert_u(struct engine* e, struct part* p) {
 
-  return hydro_get_internal_energy(p, 0);
+  return hydro_get_internal_energy(p);
 }
 
 float convert_P(struct engine* e, struct part* p) {
 
-  return hydro_get_pressure(p, 0);
+  return hydro_get_pressure(p);
 }
 
 /**
diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h
index cac585ff79bae737f0e5c09860a38536cbf3a38c..b6e496918fa0e7989a8bddcfc5e8ea6b332c338e 100644
--- a/src/hydro/PressureEntropy/hydro_part.h
+++ b/src/hydro/PressureEntropy/hydro_part.h
@@ -41,6 +41,9 @@ struct xpart {
   /*! Velocity at the last full step. */
   float v_full[3];
 
+  /*! Entropy at the last full step. */
+  float entropy_full;
+
   /*! Additional data used to record cooling information */
   struct cooling_xpart_data cooling_data;
 
@@ -49,6 +52,12 @@ struct xpart {
 /* Data of a single particle. */
 struct part {
 
+  /*! Particle ID. */
+  long long id;
+
+  /*! Pointer to corresponding gravity part. */
+  struct gpart* gpart;
+
   /*! Particle position. */
   double x[3];
 
@@ -64,12 +73,6 @@ struct part {
   /*! Particle mass. */
   float mass;
 
-  /*! Particle time of beginning of time-step. */
-  int ti_begin;
-
-  /*! Particle time of end of time-step. */
-  int ti_end;
-
   /*! Particle density. */
   float rho;
 
@@ -132,11 +135,18 @@ struct part {
     } force;
   };
 
-  /*! Particle ID. */
-  long long id;
+  /* Time-step length */
+  timebin_t time_bin;
 
-  /*! Pointer to corresponding gravity part. */
-  struct gpart* gpart;
+#ifdef SWIFT_DEBUG_CHECKS
+
+  /* Time of the last drift */
+  integertime_t ti_drift;
+
+  /* Time of the last kick */
+  integertime_t ti_kick;
+
+#endif
 
 } SWIFT_STRUCT_ALIGN;
 
diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h
index 8f38fc0d2b98988a48fe36edcbd2f9419d237d41..7bf2e01a719a29b731bb437096093b13ca086e37 100644
--- a/src/kernel_hydro.h
+++ b/src/kernel_hydro.h
@@ -362,6 +362,117 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec(
       dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v;
 }
 
+/* Define constant vectors for the Wendland C2 kernel coefficients. */
+#ifdef WENDLAND_C2_KERNEL
+static const vector wendland_const_c0 = FILL_VEC(4.f);
+static const vector wendland_const_c1 = FILL_VEC(-15.f);
+static const vector wendland_const_c2 = FILL_VEC(20.f);
+static const vector wendland_const_c3 = FILL_VEC(-10.f);
+static const vector wendland_const_c4 = FILL_VEC(0.f);
+static const vector wendland_const_c5 = FILL_VEC(1.f);
+#endif
+
+/**
+ * @brief Computes the kernel function and its derivative for two particles
+ * using interleaved vectors.
+ *
+ * Return 0 if $u > \\gamma = H/h$
+ *
+ * @param u The ratio of the distance to the smoothing length $u = x/h$.
+ * @param w (return) The value of the kernel function $W(x,h)$.
+ * @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
+ * @param u2 The ratio of the distance to the smoothing length $u = x/h$ for
+ * second particle.
+ * @param w2 (return) The value of the kernel function $W(x,h)$ for second
+ * particle.
+ * @param dw_dx2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for
+ * second particle.
+ */
+__attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
+    vector *u, vector *w, vector *dw_dx, vector *u2, vector *w2,
+    vector *dw_dx2) {
+
+  /* Go to the range [0,1[ from [0,H[ */
+  vector x, x2;
+  x.v = vec_mul(u->v, kernel_gamma_inv_vec.v);
+  x2.v = vec_mul(u2->v, kernel_gamma_inv_vec.v);
+
+#ifdef WENDLAND_C2_KERNEL
+  /* Init the iteration for Horner's scheme. */
+  w->v = vec_fma(wendland_const_c0.v, x.v, wendland_const_c1.v);
+  w2->v = vec_fma(wendland_const_c0.v, x2.v, wendland_const_c1.v);
+  dw_dx->v = wendland_const_c0.v;
+  dw_dx2->v = wendland_const_c0.v;
+
+  /* Calculate the polynomial interleaving vector operations */
+  dw_dx->v = vec_fma(dw_dx->v, x.v, w->v);
+  dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v);
+  w->v = vec_fma(x.v, w->v, wendland_const_c2.v);
+  w2->v = vec_fma(x2.v, w2->v, wendland_const_c2.v);
+
+  dw_dx->v = vec_fma(dw_dx->v, x.v, w->v);
+  dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v);
+  w->v = vec_fma(x.v, w->v, wendland_const_c3.v);
+  w2->v = vec_fma(x2.v, w2->v, wendland_const_c3.v);
+
+  dw_dx->v = vec_fma(dw_dx->v, x.v, w->v);
+  dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v);
+  w->v = vec_fma(x.v, w->v, wendland_const_c4.v);
+  w2->v = vec_fma(x2.v, w2->v, wendland_const_c4.v);
+
+  dw_dx->v = vec_fma(dw_dx->v, x.v, w->v);
+  dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v);
+  w->v = vec_fma(x.v, w->v, wendland_const_c5.v);
+  w2->v = vec_fma(x2.v, w2->v, wendland_const_c5.v);
+
+  /* Return everything */
+  w->v =
+      vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
+  w2->v = vec_mul(w2->v,
+                  vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
+  dw_dx->v = vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v,
+                                       kernel_gamma_inv_dim_plus_one_vec.v));
+  dw_dx2->v = vec_mul(dw_dx2->v, vec_mul(kernel_constant_vec.v,
+                                         kernel_gamma_inv_dim_plus_one_vec.v));
+#else
+
+  /* Load x and get the interval id. */
+  vector ind, ind2;
+  ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v));
+  ind2.m = vec_ftoi(vec_fmin(x2.v * kernel_ivals_vec.v, kernel_ivals_vec.v));
+
+  /* load the coefficients. */
+  vector c[kernel_degree + 1], c2[kernel_degree + 1];
+  for (int k = 0; k < VEC_SIZE; k++)
+    for (int j = 0; j < kernel_degree + 1; j++) {
+      c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
+      c2[j].f[k] = kernel_coeffs[ind2.i[k] * (kernel_degree + 1) + j];
+    }
+
+  /* Init the iteration for Horner's scheme. */
+  w->v = (c[0].v * x.v) + c[1].v;
+  w2->v = (c2[0].v * x2.v) + c2[1].v;
+  dw_dx->v = c[0].v;
+  dw_dx2->v = c2[0].v;
+
+  /* And we're off! */
+  for (int k = 2; k <= kernel_degree; k++) {
+    dw_dx->v = (dw_dx->v * x.v) + w->v;
+    dw_dx2->v = (dw_dx2->v * x2.v) + w2->v;
+    w->v = (x.v * w->v) + c[k].v;
+    w2->v = (x2.v * w2->v) + c2[k].v;
+  }
+  /* Return everything */
+  w->v = w->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v;
+  w2->v = w2->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v;
+  dw_dx->v =
+      dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v;
+  dw_dx2->v =
+      dw_dx2->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v;
+
+#endif
+}
+
 #endif
 
 /* Some cross-check functions */
diff --git a/src/kick.h b/src/kick.h
index e3fa3bf78c7da514abacf697a9d94212020e5a7b..d6c85b5eab92a288f78f22fce2f03862bc34604f 100644
--- a/src/kick.h
+++ b/src/kick.h
@@ -25,34 +25,31 @@
 /* Local headers. */
 #include "const.h"
 #include "debug.h"
+#include "stars.h"
+#include "timeline.h"
 
 /**
  * @brief Perform the 'kick' operation on a #gpart
  *
  * @param gp The #gpart to kick.
- * @param new_dti The (integer) time-step for this kick.
+ * @param ti_start The starting (integer) time of the kick
+ * @param ti_end The ending (integer) time of the kick
  * @param timeBase The minimal allowed time-step size.
  */
 __attribute__((always_inline)) INLINE static void kick_gpart(
-    struct gpart *restrict gp, int new_dti, double timeBase) {
+    struct gpart *restrict gp, integertime_t ti_start, integertime_t ti_end,
+    double timeBase) {
 
-  /* Compute the time step for this kick */
-  const int ti_start = (gp->ti_begin + gp->ti_end) / 2;
-  const int ti_end = gp->ti_end + new_dti / 2;
+  /* Time interval for this half-kick */
   const float dt = (ti_end - ti_start) * timeBase;
-  const float half_dt = (ti_end - gp->ti_end) * timeBase;
-
-  /* Move particle forward in time */
-  gp->ti_begin = gp->ti_end;
-  gp->ti_end = gp->ti_begin + new_dti;
 
   /* Kick particles in momentum space */
   gp->v_full[0] += gp->a_grav[0] * dt;
   gp->v_full[1] += gp->a_grav[1] * dt;
   gp->v_full[2] += gp->a_grav[2] * dt;
 
-  /* Extra kick work */
-  gravity_kick_extra(gp, dt, half_dt);
+  /* Kick extra variables */
+  gravity_kick_extra(gp, dt);
 }
 
 /**
@@ -60,26 +57,26 @@ __attribute__((always_inline)) INLINE static void kick_gpart(
  *
  * @param p The #part to kick.
  * @param xp The #xpart of the particle.
- * @param new_dti The (integer) time-step for this kick.
+ * @param ti_start The starting (integer) time of the kick
+ * @param ti_end The ending (integer) time of the kick
  * @param timeBase The minimal allowed time-step size.
  */
 __attribute__((always_inline)) INLINE static void kick_part(
-    struct part *restrict p, struct xpart *restrict xp, int new_dti,
-    double timeBase) {
+    struct part *restrict p, struct xpart *restrict xp, integertime_t ti_start,
+    integertime_t ti_end, double timeBase) {
 
-  /* Compute the time step for this kick */
-  const int ti_start = (p->ti_begin + p->ti_end) / 2;
-  const int ti_end = p->ti_end + new_dti / 2;
+  /* Time interval for this half-kick */
   const float dt = (ti_end - ti_start) * timeBase;
-  const float half_dt = (ti_end - p->ti_end) * timeBase;
 
-  /* Move particle forward in time */
-  p->ti_begin = p->ti_end;
-  p->ti_end = p->ti_begin + new_dti;
-  if (p->gpart != NULL) {
-    p->gpart->ti_begin = p->ti_begin;
-    p->gpart->ti_end = p->ti_end;
-  }
+#ifdef SWIFT_DEBUG_CHECKS
+  if (p->ti_kick != ti_start)
+    error(
+        "Particle has not been kicked to the current time p->ti_kick=%lld, "
+        "ti_start=%lld, ti_end=%lld",
+        p->ti_kick, ti_start, ti_end);
+
+  p->ti_kick = ti_end;
+#endif
 
   /* Get the acceleration */
   float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]};
@@ -99,14 +96,40 @@ __attribute__((always_inline)) INLINE static void kick_part(
     p->gpart->v_full[2] = xp->v_full[2];
   }
 
-  /* Go back by half-step for the hydro velocity */
-  p->v[0] = xp->v_full[0] - half_dt * a_tot[0];
-  p->v[1] = xp->v_full[1] - half_dt * a_tot[1];
-  p->v[2] = xp->v_full[2] - half_dt * a_tot[2];
-
   /* Extra kick work */
-  hydro_kick_extra(p, xp, dt, half_dt);
-  if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt);
+  hydro_kick_extra(p, xp, dt);
+  if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt);
+}
+
+/**
+ * @brief Perform the 'kick' operation on a #spart
+ *
+ * @param sp The #spart to kick.
+ * @param ti_start The starting (integer) time of the kick
+ * @param ti_end The ending (integer) time of the kick
+ * @param timeBase The minimal allowed time-step size.
+ */
+__attribute__((always_inline)) INLINE static void kick_spart(
+    struct spart *restrict sp, integertime_t ti_start, integertime_t ti_end,
+    double timeBase) {
+
+  /* Time interval for this half-kick */
+  const float dt = (ti_end - ti_start) * timeBase;
+
+  /* Acceleration from gravity */
+  const float a[3] = {sp->gpart->a_grav[0], sp->gpart->a_grav[1],
+                      sp->gpart->a_grav[2]};
+
+  /* Kick particles in momentum space */
+  sp->v[0] += a[0] * dt;
+  sp->v[1] += a[1] * dt;
+  sp->v[2] += a[2] * dt;
+  sp->gpart->v_full[0] = sp->v[0];
+  sp->gpart->v_full[1] = sp->v[1];
+  sp->gpart->v_full[2] = sp->v[2];
+
+  /* Kick extra variables */
+  star_kick_extra(sp, dt);
 }
 
 #endif /* SWIFT_KICK_H */
diff --git a/src/logger.c b/src/logger.c
new file mode 100644
index 0000000000000000000000000000000000000000..b2acf47aa70cef55f53d296033f6f5c6162fd5bd
--- /dev/null
+++ b/src/logger.c
@@ -0,0 +1,446 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2017 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* This object's header. */
+#include "logger.h"
+
+/* Local headers. */
+#include "atomic.h"
+#include "dump.h"
+#include "error.h"
+#include "part.h"
+
+/**
+ * @brief Compute the size of a message given its mask.
+ *
+ * @param mask The mask that will be used to dump a #part or #gpart.
+ *
+ * @return The size of the logger message in bytes.
+ */
+
+int logger_size(unsigned int mask) {
+
+  /* Start with 8 bytes for the header. */
+  int size = 8;
+
+  /* Is this a particle or a timestep? */
+  if (mask & logger_mask_timestamp) {
+
+    /* The timestamp should not contain any other bits. */
+    if (mask != logger_mask_timestamp)
+      error("Timestamps should not include any other data.");
+
+    /* A timestamp consists of an unsigned long long int. */
+    size += sizeof(unsigned long long int);
+
+  } else {
+
+    /* Particle position as three doubles. */
+    if (mask & logger_mask_x) size += 3 * sizeof(double);
+
+    /* Particle velocity as three floats. */
+    if (mask & logger_mask_v) size += 3 * sizeof(float);
+
+    /* Particle accelleration as three floats. */
+    if (mask & logger_mask_a) size += 3 * sizeof(float);
+
+    /* Particle internal energy as a single float. */
+    if (mask & logger_mask_u) size += sizeof(float);
+
+    /* Particle smoothing length as a single float. */
+    if (mask & logger_mask_h) size += sizeof(float);
+
+    /* Particle density as a single float. */
+    if (mask & logger_mask_rho) size += sizeof(float);
+
+    /* Particle constants, which is a bit more complicated. */
+    if (mask & logger_mask_rho) {
+      size += sizeof(float) +     // mass
+              sizeof(long long);  // id
+    }
+  }
+
+  return size;
+}
+
+/**
+ * @brief Dump a #part to the log.
+ *
+ * @param p The #part to dump.
+ * @param mask The mask of the data to dump.
+ * @param offset Pointer to the offset of the previous log of this particle.
+ * @param dump The #dump in which to log the particle data.
+ */
+
+void logger_log_part(struct part *p, unsigned int mask, size_t *offset,
+                     struct dump *dump) {
+
+  /* Make sure we're not writing a timestamp. */
+  if (mask & logger_mask_timestamp)
+    error("You should not log particles as timestamps.");
+
+  /* Start by computing the size of the message. */
+  const int size = logger_size(mask);
+
+  /* Allocate a chunk of memory in the dump of the right size. */
+  size_t offset_new;
+  char *buff = dump_get(dump, size, &offset_new);
+
+  /* Write the header. */
+  uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) |
+                  ((uint64_t)mask << 56);
+  memcpy(buff, &temp, 8);
+  buff += 8;
+
+  /* Particle position as three doubles. */
+  if (mask & logger_mask_x) {
+    memcpy(buff, p->x, 3 * sizeof(double));
+    buff += 3 * sizeof(double);
+  }
+
+  /* Particle velocity as three floats. */
+  if (mask & logger_mask_v) {
+    memcpy(buff, p->v, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+  /* Particle accelleration as three floats. */
+  if (mask & logger_mask_a) {
+    memcpy(buff, p->a_hydro, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+#if defined(GADGET2_SPH)
+
+  /* Particle internal energy as a single float. */
+  if (mask & logger_mask_u) {
+    memcpy(buff, &p->entropy, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle smoothing length as a single float. */
+  if (mask & logger_mask_h) {
+    memcpy(buff, &p->h, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle density as a single float. */
+  if (mask & logger_mask_rho) {
+    memcpy(buff, &p->rho, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle constants, which is a bit more complicated. */
+  if (mask & logger_mask_rho) {
+    memcpy(buff, &p->mass, sizeof(float));
+    buff += sizeof(float);
+    memcpy(buff, &p->id, sizeof(long long));
+    buff += sizeof(long long);
+  }
+
+#endif
+
+  /* Update the log message offset. */
+  *offset = offset_new;
+}
+
+/**
+ * @brief Dump a #gpart to the log.
+ *
+ * @param p The #gpart to dump.
+ * @param mask The mask of the data to dump.
+ * @param offset Pointer to the offset of the previous log of this particle.
+ * @param dump The #dump in which to log the particle data.
+ */
+
+void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset,
+                      struct dump *dump) {
+
+  /* Make sure we're not writing a timestamp. */
+  if (mask & logger_mask_timestamp)
+    error("You should not log particles as timestamps.");
+
+  /* Make sure we're not looging fields not supported by gparts. */
+  if (mask & (logger_mask_u | logger_mask_rho))
+    error("Can't log SPH quantities for gparts.");
+
+  /* Start by computing the size of the message. */
+  const int size = logger_size(mask);
+
+  /* Allocate a chunk of memory in the dump of the right size. */
+  size_t offset_new;
+  char *buff = dump_get(dump, size, &offset_new);
+
+  /* Write the header. */
+  uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) |
+                  ((uint64_t)mask << 56);
+  memcpy(buff, &temp, 8);
+  buff += 8;
+
+  /* Particle position as three doubles. */
+  if (mask & logger_mask_x) {
+    memcpy(buff, p->x, 3 * sizeof(double));
+    buff += 3 * sizeof(double);
+  }
+
+  /* Particle velocity as three floats. */
+  if (mask & logger_mask_v) {
+    memcpy(buff, p->v_full, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+  /* Particle accelleration as three floats. */
+  if (mask & logger_mask_a) {
+    memcpy(buff, p->a_grav, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+  /* Particle smoothing length as a single float. */
+  if (mask & logger_mask_h) {
+    memcpy(buff, &p->epsilon, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle constants, which is a bit more complicated. */
+  if (mask & logger_mask_rho) {
+    memcpy(buff, &p->mass, sizeof(float));
+    buff += sizeof(float);
+    memcpy(buff, &p->id_or_neg_offset, sizeof(long long));
+    buff += sizeof(long long);
+  }
+
+  /* Update the log message offset. */
+  *offset = offset_new;
+}
+
+void logger_log_timestamp(unsigned long long int timestamp, size_t *offset,
+                          struct dump *dump) {
+
+  /* Start by computing the size of the message. */
+  const int size = logger_size(logger_mask_timestamp);
+
+  /* Allocate a chunk of memory in the dump of the right size. */
+  size_t offset_new;
+  char *buff = dump_get(dump, size, &offset_new);
+
+  /* Write the header. */
+  uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) |
+                  ((uint64_t)logger_mask_timestamp << 56);
+  memcpy(buff, &temp, 8);
+  buff += 8;
+
+  /* Store the timestamp. */
+  memcpy(buff, &timestamp, sizeof(unsigned long long int));
+
+  /* Update the log message offset. */
+  *offset = offset_new;
+}
+
+/**
+ * @brief Read a logger message and store the data in a #part.
+ *
+ * @param p The #part in which to store the values.
+ * @param offset Pointer to the offset of the logger message in the buffer,
+ *        will be overwritten with the offset of the previous message.
+ * @param buff Pointer to the start of an encoded logger message.
+ *
+ * @return The mask containing the values read.
+ */
+
+int logger_read_part(struct part *p, size_t *offset, const char *buff) {
+
+  /* Jump to the offset. */
+  buff = &buff[*offset];
+
+  /* Start by reading the logger mask for this entry. */
+  uint64_t temp;
+  memcpy(&temp, buff, 8);
+  const int mask = temp >> 56;
+  *offset -= temp & 0xffffffffffffffULL;
+  buff += 8;
+
+  /* We are only interested in particle data. */
+  if (mask & logger_mask_timestamp)
+    error("Trying to read timestamp as particle.");
+
+  /* Particle position as three doubles. */
+  if (mask & logger_mask_x) {
+    memcpy(p->x, buff, 3 * sizeof(double));
+    buff += 3 * sizeof(double);
+  }
+
+  /* Particle velocity as three floats. */
+  if (mask & logger_mask_v) {
+    memcpy(p->v, buff, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+  /* Particle accelleration as three floats. */
+  if (mask & logger_mask_a) {
+    memcpy(p->a_hydro, buff, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+#if defined(GADGET2_SPH)
+
+  /* Particle internal energy as a single float. */
+  if (mask & logger_mask_u) {
+    memcpy(&p->entropy, buff, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle smoothing length as a single float. */
+  if (mask & logger_mask_h) {
+    memcpy(&p->h, buff, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle density as a single float. */
+  if (mask & logger_mask_rho) {
+    memcpy(&p->rho, buff, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle constants, which is a bit more complicated. */
+  if (mask & logger_mask_rho) {
+    memcpy(&p->mass, buff, sizeof(float));
+    buff += sizeof(float);
+    memcpy(&p->id, buff, sizeof(long long));
+    buff += sizeof(long long);
+  }
+
+#endif
+
+  /* Finally, return the mask of the values we just read. */
+  return mask;
+}
+
+/**
+ * @brief Read a logger message and store the data in a #gpart.
+ *
+ * @param p The #gpart in which to store the values.
+ * @param offset Pointer to the offset of the logger message in the buffer,
+ *        will be overwritten with the offset of the previous message.
+ * @param buff Pointer to the start of an encoded logger message.
+ *
+ * @return The mask containing the values read.
+ */
+
+int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff) {
+
+  /* Jump to the offset. */
+  buff = &buff[*offset];
+
+  /* Start by reading the logger mask for this entry. */
+  uint64_t temp;
+  memcpy(&temp, buff, 8);
+  const int mask = temp >> 56;
+  *offset -= temp & 0xffffffffffffffULL;
+  buff += 8;
+
+  /* We are only interested in particle data. */
+  if (mask & logger_mask_timestamp)
+    error("Trying to read timestamp as particle.");
+
+  /* We can't store all part fields in a gpart. */
+  if (mask & (logger_mask_u | logger_mask_rho))
+    error("Trying to read SPH quantities into a gpart.");
+
+  /* Particle position as three doubles. */
+  if (mask & logger_mask_x) {
+    memcpy(p->x, buff, 3 * sizeof(double));
+    buff += 3 * sizeof(double);
+  }
+
+  /* Particle velocity as three floats. */
+  if (mask & logger_mask_v) {
+    memcpy(p->v_full, buff, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+  /* Particle accelleration as three floats. */
+  if (mask & logger_mask_a) {
+    memcpy(p->a_grav, buff, 3 * sizeof(float));
+    buff += 3 * sizeof(float);
+  }
+
+  /* Particle smoothing length as a single float. */
+  if (mask & logger_mask_h) {
+    memcpy(&p->epsilon, buff, sizeof(float));
+    buff += sizeof(float);
+  }
+
+  /* Particle constants, which is a bit more complicated. */
+  if (mask & logger_mask_rho) {
+    memcpy(&p->mass, buff, sizeof(float));
+    buff += sizeof(float);
+    memcpy(&p->id_or_neg_offset, buff, sizeof(long long));
+    buff += sizeof(long long);
+  }
+
+  /* Finally, return the mask of the values we just read. */
+  return mask;
+}
+
+/**
+ * @brief Read a logger message for a timestamp.
+ *
+ * @param t The timestamp in which to store the value.
+ * @param offset Pointer to the offset of the logger message in the buffer,
+ *        will be overwritten with the offset of the previous message.
+ * @param buff Pointer to the start of an encoded logger message.
+ *
+ * @return The mask containing the values read.
+ */
+
+int logger_read_timestamp(unsigned long long int *t, size_t *offset,
+                          const char *buff) {
+
+  /* Jump to the offset. */
+  buff = &buff[*offset];
+
+  /* Start by reading the logger mask for this entry. */
+  uint64_t temp;
+  memcpy(&temp, buff, 8);
+  const int mask = temp >> 56;
+  *offset -= temp & 0xffffffffffffffULL;
+  buff += 8;
+
+  /* We are only interested in timestamps. */
+  if (!(mask & logger_mask_timestamp))
+    error("Trying to read timestamp from a particle.");
+
+  /* Make sure we don't have extra fields. */
+  if (mask != logger_mask_timestamp)
+    error("Timestamp message contains extra fields.");
+
+  /* Copy the timestamp value from the buffer. */
+  memcpy(t, buff, sizeof(unsigned long long int));
+
+  /* Finally, return the mask of the values we just read. */
+  return mask;
+}
diff --git a/src/logger.h b/src/logger.h
new file mode 100644
index 0000000000000000000000000000000000000000..32fae752c2ae13a143809d9df3030dbc06b0942d
--- /dev/null
+++ b/src/logger.h
@@ -0,0 +1,87 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2017 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_LOGGER_H
+#define SWIFT_LOGGER_H
+
+/* Includes. */
+#include "dump.h"
+#include "part.h"
+
+/**
+ * Logger entries contain messages representing the particle data at a given
+ * point in time during the simulation.
+ *
+ * The logger messages always start with an 8-byte header structured as
+ * follows:
+ *
+ *   data: [ mask |                     offset                     ]
+ *   byte: [  01  |  02  |  03  |  04  |  05  |  06  |  07  |  08  ]
+ *
+ * I.e. a first "mask" byte followed by 7 "offset" bytes. The mask contains
+ * information on what kind of data is packed after the header. The mask
+ * bits correspond to the following data:
+ *
+ *   bit | name   | size | comment
+ *   -------------------------------------------------------------------------
+ *   0   | x      | 24   | The particle position, in absolute coordinates,
+ *       |        |      | stored as three doubles.
+ *   1   | v      | 12   | Particle velocity, stored as three floats.
+ *   2   | a      | 12   | Particle acceleration, stored as three floats.
+ *   3   | u      | 4    | Particle internal energy (or entropy, if Gadget-SPH
+ *       |        |      | is used), stored as a single float.
+ *   4   | h      | 4    | Particle smoothing length (or epsilon, if a gpart),
+ *       |        |      | stored as a single float.
+ *   5   | rho    | 4    | Particle density, stored as a single float.
+ *   6   | consts | 12   | Particle constants, i.e. mass and ID.
+ *   7   | time   | 8    | Timestamp, not associated with a particle, just
+ *       |        |      | marks the transitions from one timestep to another.
+ *
+ * There is no distinction between gravity and SPH particles.
+ *
+ * The offset refers to the relative location of the previous message for the
+ * same particle or for the previous timestamp (if mask bit 7 is set). I.e.
+ * the previous log entry will be at the address of the current mask byte minus
+ * the unsigned value stored in the offset. An offset of zero indicates that
+ * this is the first message for the given particle/timestamp.
+ */
+
+/* Some constants. */
+#define logger_mask_x 1
+#define logger_mask_v 2
+#define logger_mask_a 4
+#define logger_mask_u 8
+#define logger_mask_h 16
+#define logger_mask_rho 32
+#define logger_mask_consts 64
+#define logger_mask_timestamp 128
+
+/* Function prototypes. */
+int logger_size(unsigned int mask);
+void logger_log_part(struct part *p, unsigned int mask, size_t *offset,
+                     struct dump *dump);
+void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset,
+                      struct dump *dump);
+void logger_log_timestamp(unsigned long long int t, size_t *offset,
+                          struct dump *dump);
+int logger_read_part(struct part *p, size_t *offset, const char *buff);
+int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff);
+int logger_read_timestamp(unsigned long long int *t, size_t *offset,
+                          const char *buff);
+
+#endif /* SWIFT_LOGGER_H */
diff --git a/src/memswap.h b/src/memswap.h
index 4643725535917952d12927d52187bc7306ced5ef..92c902eeb158978d4a606f5f2a9416d4113fae0b 100644
--- a/src/memswap.h
+++ b/src/memswap.h
@@ -32,24 +32,27 @@
 #include <altivec.h>
 #endif
 
-/* Macro for in-place swap of two values a and b of type t. */
-#define swap_loop(t, a, b, c)  \
-  while (c >= sizeof(t)) {     \
-    register t temp = *(t *)a; \
-    *(t *)a = *(t *)b;         \
-    *(t *)b = temp;            \
-    a += sizeof(t);            \
-    b += sizeof(t);            \
-    bytes -= sizeof(t);        \
+/* Macro for in-place swap of two values a and b of type t. a and b are
+   assumed to be of type char* so that the pointer arithmetic works. */
+#define swap_loop(type, a, b, count) \
+  while (count >= sizeof(type)) {    \
+    register type temp = *(type *)a; \
+    *(type *)a = *(type *)b;         \
+    *(type *)b = temp;               \
+    a += sizeof(type);               \
+    b += sizeof(type);               \
+    count -= sizeof(type);           \
   }
 
 /**
  * @brief Swap the contents of two elements in-place.
  *
- * Keep in mind that this function works best when the underlying data
+ * Keep in mind that this function only works when the underlying data
  * is aligned to the vector length, e.g. with the @c
  * __attribute__((aligned(32)))
- * syntax, and the code is compiled with @c -funroll-loops.
+ * syntax!
+ * Furthermore, register re-labeling only seems to work when the code is
+ * compiled with @c -funroll-loops.
  *
  * @param void_a Pointer to the first element.
  * @param void_b Pointer to the second element.
@@ -76,4 +79,63 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
   swap_loop(char, a, b, bytes);
 }
 
+/**
+ * @brief Swap the contents of two elements in-place.
+ *
+ * As opposed to #memswap, this function does not require the parameters
+ * to be aligned in any specific way.
+ * Furthermore, register re-labeling only seems to work when the code is
+ * compiled with @c -funroll-loops.
+ *
+ * @param void_a Pointer to the first element.
+ * @param void_b Pointer to the second element.
+ * @param bytes Size, in bytes, of the data pointed to by @c a and @c b.
+ */
+__attribute__((always_inline)) inline void memswap_unaligned(void *void_a,
+                                                             void *void_b,
+                                                             size_t bytes) {
+  char *a = (char *)void_a, *b = (char *)void_b;
+#ifdef __AVX512F__
+  while (bytes >= sizeof(__m512i)) {
+    register __m512i temp;
+    temp = _mm512_loadu_si512((__m512i *)a);
+    _mm512_storeu_si512((__m512i *)a, _mm512_loadu_si512((__m512i *)b));
+    _mm512_storeu_si512((__m512i *)b, temp);
+    a += sizeof(__m512i);
+    b += sizeof(__m512i);
+    bytes -= sizeof(__m512i);
+  }
+#endif
+#ifdef __AVX__
+  while (bytes >= sizeof(__m256i)) {
+    register __m256i temp;
+    temp = _mm256_loadu_si256((__m256i *)a);
+    _mm256_storeu_si256((__m256i *)a, _mm256_loadu_si256((__m256i *)b));
+    _mm256_storeu_si256((__m256i *)b, temp);
+    a += sizeof(__m256i);
+    b += sizeof(__m256i);
+    bytes -= sizeof(__m256i);
+  }
+#endif
+#ifdef __SSE2__
+  while (bytes >= sizeof(__m128i)) {
+    register __m128i temp;
+    temp = _mm_loadu_si128((__m128i *)a);
+    _mm_storeu_si128((__m128i *)a, _mm_loadu_si128((__m128i *)b));
+    _mm_storeu_si128((__m128i *)b, temp);
+    a += sizeof(__m128i);
+    b += sizeof(__m128i);
+    bytes -= sizeof(__m128i);
+  }
+#endif
+#ifdef __ALTIVEC__
+  // Power8 supports unaligned load/stores, but not sure what it will do here.
+  swap_loop(vector int, a, b, bytes);
+#endif
+  swap_loop(size_t, a, b, bytes);
+  swap_loop(int, a, b, bytes);
+  swap_loop(short, a, b, bytes);
+  swap_loop(char, a, b, bytes);
+}
+
 #endif /* SWIFT_MEMSWAP_H */
diff --git a/src/parallel_io.c b/src/parallel_io.c
index 66c9203e39e56d520eeace8858b0c618b45e6a22..e429ff641961da342187f0c297eba8041cfcc51a 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -46,6 +46,7 @@
 #include "io_properties.h"
 #include "kernel_hydro.h"
 #include "part.h"
+#include "stars_io.h"
 #include "units.h"
 
 /**
@@ -373,18 +374,22 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  */
 void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
                       double dim[3], struct part** parts, struct gpart** gparts,
-                      size_t* Ngas, size_t* Ngparts, int* periodic,
-                      int* flag_entropy, int mpi_rank, int mpi_size,
-                      MPI_Comm comm, MPI_Info info, int dry_run) {
+                      struct spart** sparts, size_t* Ngas, size_t* Ngparts,
+                      size_t* Nstars, int* periodic, int* flag_entropy,
+                      int with_hydro, int with_gravity, int with_stars,
+                      int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info,
+                      int dry_run) {
+
   hid_t h_file = 0, h_grp = 0;
   /* GADGET has only cubic boxes (in cosmological mode) */
   double boxSize[3] = {0.0, -1.0, -1.0};
-  int numParticles[NUM_PARTICLE_TYPES] = {0};
-  int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
+  long long numParticles[NUM_PARTICLE_TYPES] = {0};
+  long long numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
   size_t N[NUM_PARTICLE_TYPES] = {0};
   long long N_total[NUM_PARTICLE_TYPES] = {0};
   long long offset[NUM_PARTICLE_TYPES] = {0};
   int dimension = 3; /* Assume 3D if nothing is specified */
+  size_t Ndm = 0;
 
   /* Open file */
   /* message("Opening file '%s' as IC.", fileName); */
@@ -425,21 +430,21 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
   readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
   *flag_entropy = flag_entropy_temp[0];
   readAttribute(h_grp, "BoxSize", DOUBLE, boxSize);
-  readAttribute(h_grp, "NumPart_Total", UINT, numParticles);
-  readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord);
+  readAttribute(h_grp, "NumPart_Total", LONGLONG, numParticles);
+  readAttribute(h_grp, "NumPart_Total_HighWord", LONGLONG,
+                numParticles_highWord);
 
   for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
-    N_total[ptype] = ((long long)numParticles[ptype]) +
-                     ((long long)numParticles_highWord[ptype] << 32);
+    N_total[ptype] =
+        (numParticles[ptype]) + (numParticles_highWord[ptype] << 32);
 
   dim[0] = boxSize[0];
   dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
   dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];
 
-  /* message("Found %d particles in a %speriodic box of size
-   * [%f %f %f].",  */
-  /* 	 N_total, (periodic ? "": "non-"), dim[0],
-   * dim[1], dim[2]); */
+  /* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */
+  /* 	  N_total[0], (periodic ? "": "non-"), dim[0], */
+  /* 	  dim[1], dim[2]); */
 
   /* Divide the particles among the tasks. */
   for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) {
@@ -492,29 +497,38 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
         units_conversion_factor(ic_units, internal_units, UNIT_CONV_LENGTH);
 
   /* Allocate memory to store SPH particles */
-  *Ngas = N[0];
-  if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) !=
-      0)
-    error("Error while allocating memory for particles");
-  bzero(*parts, *Ngas * sizeof(struct part));
-
-  /* Allocate memory to store all particles */
-  const size_t Ndm = N[1];
-  *Ngparts = N[1] + N[0];
-  if (posix_memalign((void*)gparts, gpart_align,
-                     *Ngparts * sizeof(struct gpart)) != 0)
-    error(
-        "Error while allocating memory for gravity "
-        "particles");
-  bzero(*gparts, *Ngparts * sizeof(struct gpart));
-
-  /* message("Allocated %8.2f MB for particles.", *N *
-   * sizeof(struct part) /
+  if (with_hydro) {
+    *Ngas = N[0];
+    if (posix_memalign((void*)parts, part_align,
+                       (*Ngas) * sizeof(struct part)) != 0)
+      error("Error while allocating memory for particles");
+    bzero(*parts, *Ngas * sizeof(struct part));
+  }
+
+  /* Allocate memory to store star particles */
+  if (with_stars) {
+    *Nstars = N[STAR];
+    if (posix_memalign((void*)sparts, spart_align,
+                       *Nstars * sizeof(struct spart)) != 0)
+      error("Error while allocating memory for star particles");
+    bzero(*sparts, *Nstars * sizeof(struct spart));
+  }
+
+  /* Allocate memory to store gravity particles */
+  if (with_gravity) {
+    Ndm = N[1];
+    *Ngparts = (with_hydro ? N[GAS] : 0) + N[DM] + (with_stars ? N[STAR] : 0);
+    if (posix_memalign((void*)gparts, gpart_align,
+                       *Ngparts * sizeof(struct gpart)) != 0)
+      error("Error while allocating memory for gravity particles");
+    bzero(*gparts, *Ngparts * sizeof(struct gpart));
+  }
+
+  /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) /
    * (1024.*1024.)); */
 
   /* message("BoxSize = %lf", dim[0]); */
-  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm,
-   * *Ngparts); */
+  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */
 
   /* Loop over all particle types */
   for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
@@ -539,13 +553,24 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
     switch (ptype) {
 
       case GAS:
-        Nparticles = *Ngas;
-        hydro_read_particles(*parts, list, &num_fields);
+        if (with_hydro) {
+          Nparticles = *Ngas;
+          hydro_read_particles(*parts, list, &num_fields);
+        }
         break;
 
       case DM:
-        Nparticles = Ndm;
-        darkmatter_read_particles(*gparts, list, &num_fields);
+        if (with_gravity) {
+          Nparticles = Ndm;
+          darkmatter_read_particles(*gparts, list, &num_fields);
+        }
+        break;
+
+      case STAR:
+        if (with_stars) {
+          Nparticles = *Nstars;
+          star_read_particles(*sparts, list, &num_fields);
+        }
         break;
 
       default:
@@ -563,10 +588,15 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
   }
 
   /* Prepare the DM particles */
-  if (!dry_run) prepare_dm_gparts(*gparts, Ndm);
+  if (!dry_run && with_gravity) prepare_dm_gparts(*gparts, Ndm);
 
-  /* Now duplicate the hydro particle into gparts */
-  if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+  /* Duplicate the hydro particles into gparts */
+  if (!dry_run && with_gravity && with_hydro)
+    duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+
+  /* Duplicate the star particles into gparts */
+  if (!dry_run && with_gravity && with_stars)
+    duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas);
 
   /* message("Done Reading particles..."); */
 
@@ -609,17 +639,19 @@ void write_output_parallel(struct engine* e, const char* baseName,
 
   hid_t h_file = 0, h_grp = 0;
   const size_t Ngas = e->s->nr_parts;
+  const size_t Nstars = e->s->nr_sparts;
   const size_t Ntot = e->s->nr_gparts;
   int periodic = e->s->periodic;
   int numFiles = 1;
   struct part* parts = e->s->parts;
   struct gpart* gparts = e->s->gparts;
   struct gpart* dmparts = NULL;
+  struct spart* sparts = e->s->sparts;
   static int outputCount = 0;
   FILE* xmfFile = 0;
 
   /* Number of unassociated gparts */
-  const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0;
+  const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0;
 
   /* File name */
   char fileName[FILENAME_BUFFER_SIZE];
@@ -642,16 +674,16 @@ void write_output_parallel(struct engine* e, const char* baseName,
 
   /* Compute offset in the file and total number of
    * particles */
-  size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0};
+  size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0, 0, Nstars, 0};
   long long N_total[NUM_PARTICLE_TYPES] = {0};
   long long offset[NUM_PARTICLE_TYPES] = {0};
-  MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm);
+  MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG_INT, MPI_SUM, comm);
   for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
     N_total[ptype] = offset[ptype] + N[ptype];
 
   /* The last rank now has the correct N_total. Let's
    * broadcast from there */
-  MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm);
+  MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm);
 
   /* Now everybody konws its offset and the total number of
    * particles of each
@@ -816,9 +848,11 @@ void write_output_parallel(struct engine* e, const char* baseName,
         /* Write DM particles */
         Nparticles = Ndm;
         darkmatter_write_particles(dmparts, list, &num_fields);
+        break;
 
-        /* Free temporary array */
-        free(dmparts);
+      case STAR:
+        Nparticles = Nstars;
+        star_write_particles(sparts, list, &num_fields);
         break;
 
       default:
@@ -832,7 +866,10 @@ void write_output_parallel(struct engine* e, const char* baseName,
                  internal_units, snapshot_units);
 
     /* Free temporary array */
-    free(dmparts);
+    if (dmparts) {
+      free(dmparts);
+      dmparts = 0;
+    }
 
     /* Close particle group */
     H5Gclose(h_grp);
diff --git a/src/parallel_io.h b/src/parallel_io.h
index e5b12aa50c30b4d63ccc81835d2d8454e01b3889..e4cb9f5976bc0f5b55207a7422597a05feaa3d5e 100644
--- a/src/parallel_io.h
+++ b/src/parallel_io.h
@@ -36,9 +36,11 @@
 
 void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units,
                       double dim[3], struct part** parts, struct gpart** gparts,
-                      size_t* Ngas, size_t* Ngparts, int* periodic,
-                      int* flag_entropy, int mpi_rank, int mpi_size,
-                      MPI_Comm comm, MPI_Info info, int dry_run);
+                      struct spart** sparts, size_t* Ngas, size_t* Ngparts,
+                      size_t* Nsparts, int* periodic, int* flag_entropy,
+                      int with_hydro, int with_gravity, int with_stars,
+                      int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info,
+                      int dry_run);
 
 void write_output_parallel(struct engine* e, const char* baseName,
                            const struct UnitSystem* internal_units,
diff --git a/src/part.c b/src/part.c
index b00eaccaae0e86f7c4e8019a307f0bf455687b7c..ecc5ca977ae0716b395cdd61af97382e6603186e 100644
--- a/src/part.c
+++ b/src/part.c
@@ -36,7 +36,8 @@
  * @param N The number of particles to re-link;
  * @param offset The offset of #part%s relative to the global parts list.
  */
-void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset) {
+void part_relink_gparts_to_parts(struct part *parts, size_t N,
+                                 ptrdiff_t offset) {
   for (size_t k = 0; k < N; k++) {
     if (parts[k].gpart) {
       parts[k].gpart->id_or_neg_offset = -(k + offset);
@@ -45,28 +46,194 @@ void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset) {
 }
 
 /**
- * @brief Re-link the #gpart%s associated with the list of #part%s.
+ * @brief Re-link the #gpart%s associated with the list of #spart%s.
+ *
+ * @param sparts The list of #spart.
+ * @param N The number of s-particles to re-link;
+ * @param offset The offset of #spart%s relative to the global sparts list.
+ */
+void part_relink_gparts_to_sparts(struct spart *sparts, size_t N,
+                                  ptrdiff_t offset) {
+  for (size_t k = 0; k < N; k++) {
+    if (sparts[k].gpart) {
+      sparts[k].gpart->id_or_neg_offset = -(k + offset);
+    }
+  }
+}
+
+/**
+ * @brief Re-link the #part%s associated with the list of #gpart%s.
  *
  * @param gparts The list of #gpart.
  * @param N The number of particles to re-link;
- * @param parts The global part array in which to find the #gpart offsets.
+ * @param parts The global #part array in which to find the #gpart offsets.
  */
-void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts) {
+void part_relink_parts_to_gparts(struct gpart *gparts, size_t N,
+                                 struct part *parts) {
   for (size_t k = 0; k < N; k++) {
-    if (gparts[k].id_or_neg_offset <= 0) {
+    if (gparts[k].type == swift_type_gas) {
       parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k];
     }
   }
 }
 
+/**
+ * @brief Re-link the #spart%s associated with the list of #gpart%s.
+ *
+ * @param gparts The list of #gpart.
+ * @param N The number of particles to re-link;
+ * @param sparts The global #spart array in which to find the #gpart offsets.
+ */
+void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N,
+                                  struct spart *sparts) {
+  for (size_t k = 0; k < N; k++) {
+    if (gparts[k].type == swift_type_star) {
+      sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k];
+    }
+  }
+}
+
+/**
+ * @brief Verifies that the #gpart, #part and #spart are correctly linked
+ * together
+ * and that the particle poisitions match.
+ *
+ * This is a debugging function.
+ *
+ * @param parts The #part array.
+ * @param gparts The #gpart array.
+ * @param sparts The #spart array.
+ * @param nr_parts The number of #part in the array.
+ * @param nr_gparts The number of #gpart in the array.
+ * @param nr_sparts The number of #spart in the array.
+ * @param verbose Do we report verbosely in case of success ?
+ */
+void part_verify_links(struct part *parts, struct gpart *gparts,
+                       struct spart *sparts, size_t nr_parts, size_t nr_gparts,
+                       size_t nr_sparts, int verbose) {
+
+  for (size_t k = 0; k < nr_gparts; ++k) {
+
+    /* We have a DM particle */
+    if (gparts[k].type == swift_type_dark_matter) {
+
+      /* Check that it's not linked */
+      if (gparts[k].id_or_neg_offset < 0)
+        error("DM gpart particle linked to something !");
+    }
+
+    /* We have a gas particle */
+    else if (gparts[k].type == swift_type_gas) {
+
+      /* Check that it is linked */
+      if (gparts[k].id_or_neg_offset > 0)
+        error("Gas gpart not linked to anything !");
+
+      /* Find its link */
+      const struct part *part = &parts[-gparts[k].id_or_neg_offset];
+
+      /* Check the reverse link */
+      if (part->gpart != &gparts[k]) error("Linking problem !");
+
+      /* Check that the particles are at the same place */
+      if (gparts[k].x[0] != part->x[0] || gparts[k].x[1] != part->x[1] ||
+          gparts[k].x[2] != part->x[2])
+        error(
+            "Linked particles are not at the same position !\n"
+            "gp->x=[%e %e %e] p->x=[%e %e %e] diff=[%e %e %e]",
+            gparts[k].x[0], gparts[k].x[1], gparts[k].x[2], part->x[0],
+            part->x[1], part->x[2], gparts[k].x[0] - part->x[0],
+            gparts[k].x[1] - part->x[1], gparts[k].x[2] - part->x[2]);
+
+      /* Check that the particles are at the same time */
+      if (gparts[k].time_bin != part->time_bin)
+        error("Linked particles are not at the same time !");
+    }
+
+    else if (gparts[k].type == swift_type_star) {
+
+      /* Check that it is linked */
+      if (gparts[k].id_or_neg_offset > 0)
+        error("Star gpart not linked to anything !");
+
+      /* Find its link */
+      const struct spart *spart = &sparts[-gparts[k].id_or_neg_offset];
+
+      /* Check the reverse link */
+      if (spart->gpart != &gparts[k]) error("Linking problem !");
+
+      /* Check that the particles are at the same place */
+      if (gparts[k].x[0] != spart->x[0] || gparts[k].x[1] != spart->x[1] ||
+          gparts[k].x[2] != spart->x[2])
+        error(
+            "Linked particles are not at the same position !\n"
+            "gp->x=[%e %e %e] sp->x=[%e %e %e] diff=[%e %e %e]",
+            gparts[k].x[0], gparts[k].x[1], gparts[k].x[2], spart->x[0],
+            spart->x[1], spart->x[2], gparts[k].x[0] - spart->x[0],
+            gparts[k].x[1] - spart->x[1], gparts[k].x[2] - spart->x[2]);
+
+      /* Check that the particles are at the same time */
+      if (gparts[k].time_bin != spart->time_bin)
+        error("Linked particles are not at the same time !");
+    }
+  }
+
+  /* Now check that all parts are linked */
+  for (size_t k = 0; k < nr_parts; ++k) {
+
+    /* Ok, there is a link */
+    if (parts[k].gpart != NULL) {
+
+      /* Check the link */
+      if (parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
+        error("Linking problem !");
+      }
+
+      /* Check that the particles are at the same place */
+      if (parts[k].x[0] != parts[k].gpart->x[0] ||
+          parts[k].x[1] != parts[k].gpart->x[1] ||
+          parts[k].x[2] != parts[k].gpart->x[2])
+        error("Linked particles are not at the same position !");
+
+      /* Check that the particles are at the same time */
+      if (parts[k].time_bin != parts[k].gpart->time_bin)
+        error("Linked particles are not at the same time !");
+    }
+  }
+
+  /* Now check that all sparts are linked */
+  for (size_t k = 0; k < nr_sparts; ++k) {
+
+    /* Ok, there is a link */
+    if (sparts[k].gpart != NULL) {
+
+      /* Check the link */
+      if (sparts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
+        error("Linking problem !");
+
+        /* Check that the particles are at the same place */
+        if (sparts[k].x[0] != sparts[k].gpart->x[0] ||
+            sparts[k].x[1] != sparts[k].gpart->x[1] ||
+            sparts[k].x[2] != sparts[k].gpart->x[2])
+          error("Linked particles are not at the same position !");
+
+        /* Check that the particles are at the same time */
+        if (sparts[k].time_bin != sparts[k].gpart->time_bin)
+          error("Linked particles are not at the same time !");
+      }
+    }
+  }
+
+  if (verbose) message("All links OK");
+}
+
 #ifdef WITH_MPI
 /* MPI data type for the particle transfers */
 MPI_Datatype part_mpi_type;
 MPI_Datatype xpart_mpi_type;
 MPI_Datatype gpart_mpi_type;
-#endif
+MPI_Datatype spart_mpi_type;
 
-#ifdef WITH_MPI
 /**
  * @brief Registers MPI particle types.
  */
@@ -93,5 +260,10 @@ void part_create_mpi_types() {
       MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) {
     error("Failed to create MPI type for gparts.");
   }
+  if (MPI_Type_contiguous(sizeof(struct spart) / sizeof(unsigned char),
+                          MPI_BYTE, &spart_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&spart_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for sparts.");
+  }
 }
 #endif
diff --git a/src/part.h b/src/part.h
index 0bf4359f891619b0900f8aa9f17b2a2a71127579..4ed4b490964b59239faf170218cd099d225f5edd 100644
--- a/src/part.h
+++ b/src/part.h
@@ -32,10 +32,13 @@
 
 /* Local headers. */
 #include "align.h"
+#include "part_type.h"
+#include "timeline.h"
 
 /* Some constants. */
 #define part_align 128
 #define xpart_align 128
+#define spart_align 128
 #define gpart_align 128
 
 /* Import the right hydro particle definition */
@@ -62,13 +65,27 @@
 /* Import the right gravity particle definition */
 #include "./gravity/Default/gravity_part.h"
 
-void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset);
-void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts);
+/* Import the right star particle definition */
+#include "./stars/Default/star_part.h"
+
+void part_relink_gparts_to_parts(struct part *parts, size_t N,
+                                 ptrdiff_t offset);
+void part_relink_gparts_to_sparts(struct spart *sparts, size_t N,
+                                  ptrdiff_t offset);
+void part_relink_parts_to_gparts(struct gpart *gparts, size_t N,
+                                 struct part *parts);
+void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N,
+                                  struct spart *sparts);
+void part_verify_links(struct part *parts, struct gpart *gparts,
+                       struct spart *sparts, size_t nr_parts, size_t nr_gparts,
+                       size_t nr_sparts, int verbose);
+
 #ifdef WITH_MPI
 /* MPI data type for the particle transfers */
 extern MPI_Datatype part_mpi_type;
 extern MPI_Datatype xpart_mpi_type;
 extern MPI_Datatype gpart_mpi_type;
+extern MPI_Datatype spart_mpi_type;
 
 void part_create_mpi_types();
 #endif
diff --git a/src/part_type.h b/src/part_type.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c564d6908c8887e8fa8a5197a0a92ed85cbe5bb
--- /dev/null
+++ b/src/part_type.h
@@ -0,0 +1,34 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_PART_TYPES_H
+#define SWIFT_PART_TYPES_H
+
+/**
+ * @brief The different types of particles a #gpart can link to.
+ *
+ * Note we use the historical values from Gadget for these fields.
+ */
+enum part_type {
+  swift_type_gas = 0,
+  swift_type_dark_matter = 1,
+  swift_type_star = 4,
+  swift_type_black_hole = 5
+} __attribute__((packed));
+
+#endif /* SWIFT_PART_TYPES_H */
diff --git a/src/partition.c b/src/partition.c
index 90fbc53c9f5898b8fcb6133f1411e7ee1f0bcb95..3e21c2b51b20501631e080ffc3f9e7d8df5dab20 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -522,8 +522,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
     /* Skip un-interesting tasks. */
     if (t->type != task_type_self && t->type != task_type_pair &&
         t->type != task_type_sub_self && t->type != task_type_sub_self &&
-        t->type != task_type_ghost && t->type != task_type_kick &&
-        t->type != task_type_init)
+        t->type != task_type_ghost && t->type != task_type_kick1 &&
+        t->type != task_type_kick2 && t->type != task_type_timestep &&
+        t->type != task_type_drift && t->type != task_type_init)
       continue;
 
     /* Get the task weight. */
@@ -554,7 +555,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
     int cid = ci - cells;
 
     /* Different weights for different tasks. */
-    if (t->type == task_type_ghost || t->type == task_type_kick) {
+    if (t->type == task_type_ghost || t->type == task_type_kick1 ||
+        t->type == task_type_kick2 || t->type == task_type_timestep ||
+        t->type == task_type_drift) {
       /* Particle updates add only to vertex weight. */
       if (taskvweights) weights_v[cid] += w;
 
diff --git a/src/potential.h b/src/potential.h
index c462806e206e0e0455bf7094708ab003b7ca9682..116ea8302e7f706cdb861540a89d562174d73408 100644
--- a/src/potential.h
+++ b/src/potential.h
@@ -34,8 +34,6 @@
 #include "./potential/point_mass/potential.h"
 #elif defined(EXTERNAL_POTENTIAL_ISOTHERMAL)
 #include "./potential/isothermal/potential.h"
-#elif defined(EXTERNAL_POTENTIAL_SOFTENED_ISOTHERMAL)
-#include "./potential/softened_isothermal/potential.h"
 #elif defined(EXTERNAL_POTENTIAL_DISC_PATCH)
 #include "./potential/disc_patch/potential.h"
 #else
diff --git a/src/potential/disc_patch/potential.h b/src/potential/disc_patch/potential.h
index fe1df8796f046edded0c5b1779859a1c6fffffc0..400539a8d02d29a8d383bb1c523d064f733267c5 100644
--- a/src/potential/disc_patch/potential.h
+++ b/src/potential/disc_patch/potential.h
@@ -83,37 +83,37 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep(
   float dt = dt_dyn;
 
   /* absolute value of height above disc */
-  const float dz = fabs(g->x[2] - potential->z_disc);
+  const float dz = fabsf(g->x[2] - potential->z_disc);
 
-  /* vertical cceleration */
+  /* vertical acceleration */
   const float z_accel = 2.f * M_PI * phys_const->const_newton_G *
                         potential->surface_density *
-                        tanh(dz / potential->scale_height);
+                        tanhf(dz / potential->scale_height);
 
   /* demand that dt * velocity <  fraction of scale height of disc */
   float dt1 = FLT_MAX;
-  if (fabs(g->v_full[2]) > 0) {
-    dt1 = potential->scale_height / fabs(g->v_full[2]);
+  if (g->v_full[2] != 0.f) {
+    dt1 = potential->scale_height / fabsf(g->v_full[2]);
     if (dt1 < dt) dt = dt1;
   }
 
   /* demand that dt^2 * acceleration < fraction of scale height of disc */
   float dt2 = FLT_MAX;
-  if (fabs(z_accel) > 0) {
-    dt2 = potential->scale_height / fabs(z_accel);
-    if (dt2 < dt * dt) dt = sqrt(dt2);
+  if (z_accel != 0.f) {
+    dt2 = potential->scale_height / fabsf(z_accel);
+    if (dt2 < dt * dt) dt = sqrtf(dt2);
   }
 
-  /* demand that dt^3 jerk < fraction of scale height of disc */
+  /* demand that dt^3 * jerk < fraction of scale height of disc */
   float dt3 = FLT_MAX;
-  if (abs(g->v_full[2]) > 0) {
+  if (g->v_full[2] != 0.f) {
     const float dz_accel_over_dt =
         2.f * M_PI * phys_const->const_newton_G * potential->surface_density /
-        potential->scale_height / cosh(dz / potential->scale_height) /
-        cosh(dz / potential->scale_height) * fabs(g->v_full[2]);
+        potential->scale_height / coshf(dz / potential->scale_height) /
+        coshf(dz / potential->scale_height) * fabsf(g->v_full[2]);
 
-    dt3 = potential->scale_height / fabs(dz_accel_over_dt);
-    if (dt3 < dt * dt * dt) dt = pow(dt3, 1. / 3.);
+    dt3 = potential->scale_height / fabsf(dz_accel_over_dt);
+    if (dt3 < dt * dt * dt) dt = cbrtf(dt3);
   }
 
   return potential->timestep_mult * dt;
@@ -123,7 +123,8 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep(
  * @brief Computes the gravitational acceleration along z due to a hydrostatic
  * disc
  *
- * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948
+ * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948,
+ * equation 17.
  *
  * @param time The current time in internal units.
  * @param potential The properties of the potential.
@@ -144,7 +145,7 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
   /* Accelerations. Note that they are multiplied by G later on */
   const float z_accel = reduction_factor * 2.f * M_PI *
                         potential->surface_density *
-                        tanh(fabs(dz) / potential->scale_height);
+                        tanhf(fabsf(dz) / potential->scale_height);
 
   if (dz > 0) g->a_grav[2] -= z_accel;
   if (dz < 0) g->a_grav[2] += z_accel;
@@ -153,26 +154,40 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
 /**
  * @brief Computes the gravitational potential energy of a particle in the
  * disc patch potential.
- * Time evolving system so not sure how to do this
- * Placeholder for now- just returns 0
  *
+ * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948,
+ * equation 24.
+ *
+ * @param time The current time.
  * @param potential The #external_potential used in the run.
  * @param phys_const Physical constants in internal units.
- * @param p Pointer to the particle data.
+ * @param gp Pointer to the particle data.
  */
-
 __attribute__((always_inline)) INLINE static float
 external_gravity_get_potential_energy(
-    const struct external_potential* potential,
-    const struct phys_const* const phys_const, const struct gpart* p) {
+    double time, const struct external_potential* potential,
+    const struct phys_const* const phys_const, const struct gpart* gp) {
+
+  const float dz = gp->x[2] - potential->z_disc;
+  const float t_dyn = potential->dynamical_time;
 
-  return 0.f;
+  float reduction_factor = 1.f;
+  if (time < potential->growth_time * t_dyn)
+    reduction_factor = time / (potential->growth_time * t_dyn);
+
+  /* Accelerations. Note that they are multiplied by G later on */
+  return reduction_factor * 2.f * M_PI * phys_const->const_newton_G *
+         potential->surface_density * potential->scale_height *
+         logf(coshf(dz / potential->scale_height));
 }
 
 /**
  * @brief Initialises the external potential properties in the internal system
  * of units.
  *
+ * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948,
+ * equation 22.
+ *
  * @param parameter_file The parsed parameter file
  * @param phys_const Physical constants in internal units
  * @param us The current internal system of units
diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h
index a582dce17daba0ac9705ef4ae1fc6be9db19315a..9c07f3eb67528a003788ca94bd1e2e52dd985a2c 100644
--- a/src/potential/isothermal/potential.h
+++ b/src/potential/isothermal/potential.h
@@ -1,7 +1,8 @@
 /*******************************************************************************
  * This file is part of SWIFT.
- * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ * Copyright (c) 2016  Tom Theuns (tom.theuns@durham.ac.uk)
+ *                     Stefan Arridge (stefan.arridge@durham.ac.uk)
+ *                     Matthieu Schaller (matthieu.schaller@durham.ac.uk)
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published
@@ -35,7 +36,8 @@
 #include "units.h"
 
 /**
- * @brief External Potential Properties - Isothermal sphere case
+ * @brief External Potential Properties - Isothermal sphere case with
+ * central softening
  */
 struct external_potential {
 
@@ -45,9 +47,14 @@ struct external_potential {
   /*! Rotation velocity */
   double vrot;
 
-  /*! Square of vrot divided by G \f$ \frac{v_{rot}^2}{G} \f$ */
+  /*! Square of vrot, the circular velocity which defines the isothermal
+   * potential devided by Newton's constant */
   double vrot2_over_G;
 
+  /*! Square of the softening length. Acceleration tends to zero within this
+   * distance from the origin */
+  double epsilon2;
+
   /*! Time-step condition pre-factor */
   double timestep_mult;
 };
@@ -70,17 +77,18 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep(
   const float dy = g->x[1] - potential->y;
   const float dz = g->x[2] - potential->z;
 
-  const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
+  const float r2_plus_epsilon2_inv =
+      1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2);
   const float drdv =
       dx * (g->v_full[0]) + dy * (g->v_full[1]) + dz * (g->v_full[2]);
   const double vrot = potential->vrot;
 
-  const float dota_x =
-      vrot * vrot * rinv2 * (g->v_full[0] - 2.f * drdv * dx * rinv2);
-  const float dota_y =
-      vrot * vrot * rinv2 * (g->v_full[1] - 2.f * drdv * dy * rinv2);
-  const float dota_z =
-      vrot * vrot * rinv2 * (g->v_full[2] - 2.f * drdv * dz * rinv2);
+  const float dota_x = vrot * vrot * r2_plus_epsilon2_inv *
+                       (g->v_full[0] - 2.f * drdv * dx * r2_plus_epsilon2_inv);
+  const float dota_y = vrot * vrot * r2_plus_epsilon2_inv *
+                       (g->v_full[1] - 2.f * drdv * dy * r2_plus_epsilon2_inv);
+  const float dota_z = vrot * vrot * r2_plus_epsilon2_inv *
+                       (g->v_full[2] - 2.f * drdv * dz * r2_plus_epsilon2_inv);
   const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z;
   const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] +
                     g->a_grav[2] * g->a_grav[2];
@@ -94,6 +102,10 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep(
  * Note that the accelerations are multiplied by Newton's G constant
  * later on.
  *
+ * a_x = -(v_rot^2 / G) * x / (r^2 + epsilon^2)
+ * a_y = -(v_rot^2 / G) * y / (r^2 + epsilon^2)
+ * a_z = -(v_rot^2 / G) * z / (r^2 + epsilon^2)
+ *
  * @param time The current time.
  * @param potential The #external_potential used in the run.
  * @param phys_const The physical constants in internal units.
@@ -106,10 +118,10 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
   const float dx = g->x[0] - potential->x;
   const float dy = g->x[1] - potential->y;
   const float dz = g->x[2] - potential->z;
+  const float r2_plus_epsilon2_inv =
+      1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2);
 
-  const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
-
-  const double term = -potential->vrot2_over_G * rinv2;
+  const float term = -potential->vrot2_over_G * r2_plus_epsilon2_inv;
 
   g->a_grav[0] += term * dx;
   g->a_grav[1] += term * dy;
@@ -120,21 +132,24 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
  * @brief Computes the gravitational potential energy of a particle in an
  * isothermal potential.
  *
+ * phi = -0.5 * vrot^2 * ln(r^2 + epsilon^2)
+ *
+ * @param time The current time (unused here).
  * @param potential The #external_potential used in the run.
  * @param phys_const Physical constants in internal units.
  * @param g Pointer to the particle data.
  */
 __attribute__((always_inline)) INLINE static float
 external_gravity_get_potential_energy(
-    const struct external_potential* potential,
+    double time, const struct external_potential* potential,
     const struct phys_const* const phys_const, const struct gpart* g) {
 
   const float dx = g->x[0] - potential->x;
   const float dy = g->x[1] - potential->y;
   const float dz = g->x[2] - potential->z;
 
-  return 0.5f * potential->vrot * potential->vrot *
-         logf(dx * dx + dy * dy * dz * dz);
+  return -0.5f * potential->vrot * potential->vrot *
+         logf(dx * dx + dy * dy + dz * dz + potential->epsilon2);
 }
 
 /**
@@ -164,9 +179,11 @@ static INLINE void potential_init_backend(
       parser_get_param_double(parameter_file, "IsothermalPotential:vrot");
   potential->timestep_mult = parser_get_param_float(
       parameter_file, "IsothermalPotential:timestep_mult");
-
+  const double epsilon =
+      parser_get_param_double(parameter_file, "IsothermalPotential:epsilon");
   potential->vrot2_over_G =
       potential->vrot * potential->vrot / phys_const->const_newton_G;
+  potential->epsilon2 = epsilon * epsilon;
 }
 
 /**
@@ -180,9 +197,9 @@ static INLINE void potential_print_backend(
   message(
       "External potential is 'Isothermal' with properties are (x,y,z) = (%e, "
       "%e, %e), vrot = %e "
-      "timestep multiplier = %e.",
+      "timestep multiplier = %e, epsilon = %e",
       potential->x, potential->y, potential->z, potential->vrot,
-      potential->timestep_mult);
+      potential->timestep_mult, sqrtf(potential->epsilon2));
 }
 
-#endif /* SWIFT_POTENTIAL_ISOTHERMAL_H */
+#endif /* SWIFT_ISOTHERMAL_H */
diff --git a/src/potential/none/potential.h b/src/potential/none/potential.h
index 8248b64678e28e06b9df4aab375cde0b5ed5281b..cb6254b4a23b336637cb3c9f36a2dd01170eabad 100644
--- a/src/potential/none/potential.h
+++ b/src/potential/none/potential.h
@@ -71,13 +71,16 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
 /**
  * @brief Computes the gravitational potential energy due to nothing.
  *
+ * We return 0.
+ *
+ * @param time The current time.
  * @param potential The #external_potential used in the run.
  * @param phys_const Physical constants in internal units.
  * @param g Pointer to the particle data.
  */
 __attribute__((always_inline)) INLINE static float
 external_gravity_get_potential_energy(
-    const struct external_potential* potential,
+    double time, const struct external_potential* potential,
     const struct phys_const* const phys_const, const struct gpart* g) {
 
   return 0.f;
diff --git a/src/potential/point_mass/potential.h b/src/potential/point_mass/potential.h
index 5f3d1c27b85c4f1353481e6351fba47aff62d66f..81b51ab2009ad599d0201708d78c8c64cac991dc 100644
--- a/src/potential/point_mass/potential.h
+++ b/src/potential/point_mass/potential.h
@@ -120,13 +120,14 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration(
  * @brief Computes the gravitational potential energy of a particle in a point
  * mass potential.
  *
+ * @param time The current time (unused here).
  * @param potential The #external_potential used in the run.
  * @param phys_const Physical constants in internal units.
  * @param g Pointer to the particle data.
  */
 __attribute__((always_inline)) INLINE static float
 external_gravity_get_potential_energy(
-    const struct external_potential* potential,
+    double time, const struct external_potential* potential,
     const struct phys_const* const phys_const, const struct gpart* g) {
 
   const float dx = g->x[0] - potential->x;
diff --git a/src/potential/softened_isothermal/potential.h b/src/potential/softened_isothermal/potential.h
deleted file mode 100644
index 24e59b12a5745728fb1189fbbfbc7cc3c06fbfa6..0000000000000000000000000000000000000000
--- a/src/potential/softened_isothermal/potential.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2016  Stefan Arridge (stefan.arridge@durham.ac.uk)
- *                     Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- ******************************************************************************/
-#ifndef SWIFT_POTENTIAL_SOFTENED_ISOTHERMAL_H
-#define SWIFT_POTENTIAL_SOFTENED_ISOTHERMAL_H
-
-/* Config parameters. */
-#include "../config.h"
-
-/* Some standard headers. */
-#include <math.h>
-
-/* Local includes. */
-#include "error.h"
-#include "parser.h"
-#include "part.h"
-#include "physical_constants.h"
-#include "space.h"
-#include "units.h"
-
-/**
- * @brief External Potential Properties - Softened Isothermal sphere case
- */
-struct external_potential {
-
-  /*! Position of the centre of potential */
-  double x, y, z;
-
-  /*! Rotation velocity */
-  double vrot;
-
-  /*! Square of vrot, the circular velocity which defines the isothermal
-   * potential */
-  double vrot2_over_G;
-
-  /*! Square of the softening length. Acceleration tends to zero within this
-   * distance from the origin */
-  double epsilon2;
-
-  /*! Time-step condition pre-factor */
-  double timestep_mult;
-};
-
-/**
- * @brief Computes the time-step due to the acceleration from an isothermal
- * potential.
- *
- * @param time The current time.
- * @param potential The #external_potential used in the run.
- * @param phys_const The physical constants in internal units.
- * @param g Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static float external_gravity_timestep(
-    double time, const struct external_potential* restrict potential,
-    const struct phys_const* restrict phys_const,
-    const struct gpart* restrict g) {
-
-  const float dx = g->x[0] - potential->x;
-  const float dy = g->x[1] - potential->y;
-  const float dz = g->x[2] - potential->z;
-
-  const float r2_plus_epsilon2_inv =
-      1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2);
-  const float drdv =
-      dx * (g->v_full[0]) + dy * (g->v_full[1]) + dz * (g->v_full[2]);
-  const double vrot = potential->vrot;
-
-  const float dota_x = vrot * vrot * r2_plus_epsilon2_inv *
-                       (g->v_full[0] - 2.f * drdv * dx * r2_plus_epsilon2_inv);
-  const float dota_y = vrot * vrot * r2_plus_epsilon2_inv *
-                       (g->v_full[1] - 2.f * drdv * dy * r2_plus_epsilon2_inv);
-  const float dota_z = vrot * vrot * r2_plus_epsilon2_inv *
-                       (g->v_full[2] - 2.f * drdv * dz * r2_plus_epsilon2_inv);
-  const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z;
-  const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] +
-                    g->a_grav[2] * g->a_grav[2];
-
-  return potential->timestep_mult * sqrtf(a_2 / dota_2);
-}
-
-/**
- * @brief Computes the gravitational acceleration from an isothermal potential.
- *
- * Note that the accelerations are multiplied by Newton's G constant
- * later on.
- *
- * a = v_rot^2 * (x,y,z) / (r^2 + epsilon^2)
- * @param time The current time.
- * @param potential The #external_potential used in the run.
- * @param phys_const The physical constants in internal units.
- * @param g Pointer to the g-particle data.
- */
-__attribute__((always_inline)) INLINE static void external_gravity_acceleration(
-    double time, const struct external_potential* potential,
-    const struct phys_const* const phys_const, struct gpart* g) {
-
-  const float dx = g->x[0] - potential->x;
-  const float dy = g->x[1] - potential->y;
-  const float dz = g->x[2] - potential->z;
-  const float r2_plus_epsilon2_inv =
-      1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2);
-
-  const double term = -potential->vrot2_over_G * r2_plus_epsilon2_inv;
-
-  g->a_grav[0] += term * dx;
-  g->a_grav[1] += term * dy;
-  g->a_grav[2] += term * dz;
-}
-
-/**
- * @brief Computes the gravitational potential energy of a particle in an
- * isothermal potential.
- *
- * @param potential The #external_potential used in the run.
- * @param phys_const Physical constants in internal units.
- * @param g Pointer to the particle data.
- */
-__attribute__((always_inline)) INLINE static float
-external_gravity_get_potential_energy(
-    const struct external_potential* potential,
-    const struct phys_const* const phys_const, const struct gpart* g) {
-
-  const float dx = g->x[0] - potential->x;
-  const float dy = g->x[1] - potential->y;
-  const float dz = g->x[2] - potential->z;
-
-  return 0.5f * potential->vrot * potential->vrot *
-         logf(dx * dx + dy * dy * dz * dz + potential->epsilon2);
-}
-/**
- * @brief Initialises the external potential properties in the internal system
- * of units.
- *
- * @param parameter_file The parsed parameter file
- * @param phys_const Physical constants in internal units
- * @param us The current internal system of units
- * @param potential The external potential properties to initialize
- */
-static INLINE void potential_init_backend(
-    const struct swift_params* parameter_file,
-    const struct phys_const* phys_const, const struct UnitSystem* us,
-    const struct space* s, struct external_potential* potential) {
-
-  potential->x = s->dim[0] / 2. +
-                 parser_get_param_double(
-                     parameter_file, "SoftenedIsothermalPotential:position_x");
-  potential->y = s->dim[1] / 2. +
-                 parser_get_param_double(
-                     parameter_file, "SoftenedIsothermalPotential:position_y");
-  potential->z = s->dim[2] / 2. +
-                 parser_get_param_double(
-                     parameter_file, "SoftenedIsothermalPotential:position_z");
-  potential->vrot = parser_get_param_double(parameter_file,
-                                            "SoftenedIsothermalPotential:vrot");
-  potential->timestep_mult = parser_get_param_float(
-      parameter_file, "SoftenedIsothermalPotential:timestep_mult");
-  const double epsilon = parser_get_param_float(
-      parameter_file, "SoftenedIsothermalPotential:epsilon");
-  potential->vrot2_over_G =
-      potential->vrot * potential->vrot / phys_const->const_newton_G;
-  potential->epsilon2 = epsilon * epsilon;
-}
-
-/**
- * @brief Prints the properties of the external potential to stdout.
- *
- * @param  potential The external potential properties.
- */
-static INLINE void potential_print_backend(
-    const struct external_potential* potential) {
-
-  message(
-      "External potential is 'Isothermal' with properties are (x,y,z) = (%e, "
-      "%e, %e), vrot = %e "
-      "timestep multiplier = %e, epsilon = %e",
-      potential->x, potential->y, potential->z, potential->vrot,
-      potential->timestep_mult, sqrtf(potential->epsilon2));
-}
-
-#endif /* SWIFT_POTENTIAL_ISOTHERMAL_H */
diff --git a/src/proxy.c b/src/proxy.c
index efe3a3eec108d44d5b9bf8b4718dc025464f8762..dd6faa3055cb17a0a3050d9e62d107d7489a4326 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -45,7 +45,6 @@
  *
  * @param p The #proxy.
  */
-
 void proxy_cells_exch1(struct proxy *p) {
 
 #ifdef WITH_MPI
@@ -65,8 +64,8 @@ void proxy_cells_exch1(struct proxy *p) {
 
   /* Allocate and fill the pcell buffer. */
   if (p->pcells_out != NULL) free(p->pcells_out);
-  if ((p->pcells_out = malloc(sizeof(struct pcell) * p->size_pcells_out)) ==
-      NULL)
+  if (posix_memalign((void **)&p->pcells_out, SWIFT_STRUCT_ALIGNMENT,
+                     sizeof(struct pcell) * p->size_pcells_out) != 0)
     error("Failed to allocate pcell_out buffer.");
   for (int ind = 0, k = 0; k < p->nr_cells_out; k++) {
     memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell,
@@ -102,8 +101,8 @@ void proxy_cells_exch2(struct proxy *p) {
 
   /* Re-allocate the pcell_in buffer. */
   if (p->pcells_in != NULL) free(p->pcells_in);
-  if ((p->pcells_in = (struct pcell *)malloc(sizeof(struct pcell) *
-                                             p->size_pcells_in)) == NULL)
+  if (posix_memalign((void **)&p->pcells_in, SWIFT_STRUCT_ALIGNMENT,
+                     sizeof(struct pcell) * p->size_pcells_in) != 0)
     error("Failed to allocate pcell_in buffer.");
 
   /* Receive the particle buffers. */
@@ -126,7 +125,6 @@ void proxy_cells_exch2(struct proxy *p) {
  * @param p The #proxy.
  * @param c The #cell.
  */
-
 void proxy_addcell_in(struct proxy *p, struct cell *c) {
 
   /* Check if the cell is already registered with the proxy. */
@@ -155,7 +153,6 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) {
  * @param p The #proxy.
  * @param c The #cell.
  */
-
 void proxy_addcell_out(struct proxy *p, struct cell *c) {
 
   /* Check if the cell is already registered with the proxy. */
@@ -183,7 +180,6 @@ void proxy_addcell_out(struct proxy *p, struct cell *c) {
  *
  * @param p The #proxy.
  */
-
 void proxy_parts_exch1(struct proxy *p) {
 
 #ifdef WITH_MPI
@@ -191,7 +187,8 @@ void proxy_parts_exch1(struct proxy *p) {
   /* Send the number of particles. */
   p->buff_out[0] = p->nr_parts_out;
   p->buff_out[1] = p->nr_gparts_out;
-  if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID,
+  p->buff_out[2] = p->nr_sparts_out;
+  if (MPI_Isend(p->buff_out, 3, MPI_INT, p->nodeID,
                 p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD,
                 &p->req_parts_count_out) != MPI_SUCCESS)
     error("Failed to isend nr of parts.");
@@ -218,13 +215,22 @@ void proxy_parts_exch1(struct proxy *p) {
     if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID,
                   p->mynodeID * proxy_tag_shift + proxy_tag_gparts,
                   MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS)
-      error("Failed to isend part data.");
+      error("Failed to isend gpart data.");
+    // message( "isent gpart data (%i) to node %i." , p->nr_parts_out ,
+    // p->nodeID ); fflush(stdout);
+  }
+
+  if (p->nr_sparts_out > 0) {
+    if (MPI_Isend(p->sparts_out, p->nr_sparts_out, spart_mpi_type, p->nodeID,
+                  p->mynodeID * proxy_tag_shift + proxy_tag_sparts,
+                  MPI_COMM_WORLD, &p->req_sparts_out) != MPI_SUCCESS)
+      error("Failed to isend spart data.");
     // message( "isent gpart data (%i) to node %i." , p->nr_parts_out ,
     // p->nodeID ); fflush(stdout);
   }
 
   /* Receive the number of particles. */
-  if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID,
+  if (MPI_Irecv(p->buff_in, 3, MPI_INT, p->nodeID,
                 p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD,
                 &p->req_parts_count_in) != MPI_SUCCESS)
     error("Failed to irecv nr of parts.");
@@ -241,8 +247,9 @@ void proxy_parts_exch2(struct proxy *p) {
   /* Unpack the incomming parts counts. */
   p->nr_parts_in = p->buff_in[0];
   p->nr_gparts_in = p->buff_in[1];
+  p->nr_sparts_in = p->buff_in[2];
 
-  /* Is there enough space in the buffer? */
+  /* Is there enough space in the buffers? */
   if (p->nr_parts_in > p->size_parts_in) {
     do {
       p->size_parts_in *= proxy_buffgrow;
@@ -264,6 +271,15 @@ void proxy_parts_exch2(struct proxy *p) {
                                                p->size_gparts_in)) == NULL)
       error("Failed to re-allocate gparts_in buffers.");
   }
+  if (p->nr_sparts_in > p->size_sparts_in) {
+    do {
+      p->size_sparts_in *= proxy_buffgrow;
+    } while (p->nr_sparts_in > p->size_sparts_in);
+    free(p->sparts_in);
+    if ((p->sparts_in = (struct spart *)malloc(sizeof(struct spart) *
+                                               p->size_sparts_in)) == NULL)
+      error("Failed to re-allocate sparts_in buffers.");
+  }
 
   /* Receive the particle buffers. */
   if (p->nr_parts_in > 0) {
@@ -285,6 +301,14 @@ void proxy_parts_exch2(struct proxy *p) {
     // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in ,
     // p->nodeID ); fflush(stdout);
   }
+  if (p->nr_sparts_in > 0) {
+    if (MPI_Irecv(p->sparts_in, p->nr_sparts_in, spart_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_sparts,
+                  MPI_COMM_WORLD, &p->req_sparts_in) != MPI_SUCCESS)
+      error("Failed to irecv spart data.");
+    // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in ,
+    // p->nodeID ); fflush(stdout);
+  }
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -299,7 +323,6 @@ void proxy_parts_exch2(struct proxy *p) {
  * @param xparts Pointer to an array of #xpart to send.
  * @param N The number of parts.
  */
-
 void proxy_parts_load(struct proxy *p, const struct part *parts,
                       const struct xpart *xparts, int N) {
 
@@ -308,8 +331,8 @@ void proxy_parts_load(struct proxy *p, const struct part *parts,
     do {
       p->size_parts_out *= proxy_buffgrow;
     } while (p->nr_parts_out + N > p->size_parts_out);
-    struct part *tp;
-    struct xpart *txp;
+    struct part *tp = NULL;
+    struct xpart *txp = NULL;
     if ((tp = (struct part *)malloc(sizeof(struct part) * p->size_parts_out)) ==
             NULL ||
         (txp = (struct xpart *)malloc(sizeof(struct xpart) *
@@ -332,13 +355,12 @@ void proxy_parts_load(struct proxy *p, const struct part *parts,
 }
 
 /**
- * @brief Load parts onto a proxy for exchange.
+ * @brief Load gparts onto a proxy for exchange.
  *
  * @param p The #proxy.
  * @param gparts Pointer to an array of #gpart to send.
- * @param N The number of parts.
+ * @param N The number of gparts.
  */
-
 void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) {
 
   /* Is there enough space in the buffer? */
@@ -362,6 +384,36 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) {
   p->nr_gparts_out += N;
 }
 
+/**
+ * @brief Load sparts onto a proxy for exchange.
+ *
+ * @param p The #proxy.
+ * @param sparts Pointer to an array of #spart to send.
+ * @param N The number of sparts.
+ */
+void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N) {
+
+  /* Is there enough space in the buffer? */
+  if (p->nr_sparts_out + N > p->size_sparts_out) {
+    do {
+      p->size_sparts_out *= proxy_buffgrow;
+    } while (p->nr_sparts_out + N > p->size_sparts_out);
+    struct spart *tp;
+    if ((tp = (struct spart *)malloc(sizeof(struct spart) *
+                                     p->size_sparts_out)) == NULL)
+      error("Failed to re-allocate sparts_out buffers.");
+    memcpy(tp, p->sparts_out, sizeof(struct spart) * p->nr_sparts_out);
+    free(p->sparts_out);
+    p->sparts_out = tp;
+  }
+
+  /* Copy the parts and xparts data to the buffer. */
+  memcpy(&p->sparts_out[p->nr_sparts_out], sparts, sizeof(struct spart) * N);
+
+  /* Increase the counters. */
+  p->nr_sparts_out += N;
+}
+
 /**
  * @brief Initialize the given proxy.
  *
@@ -369,7 +421,6 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) {
  * @param mynodeID The node this proxy is running on.
  * @param nodeID The node with which this proxy will communicate.
  */
-
 void proxy_init(struct proxy *p, int mynodeID, int nodeID) {
 
   /* Set the nodeID. */
@@ -427,4 +478,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) {
       error("Failed to allocate gparts_out buffers.");
   }
   p->nr_gparts_out = 0;
+
+  /* Allocate the spart send and receive buffers, if needed. */
+  if (p->sparts_in == NULL) {
+    p->size_sparts_in = proxy_buffinit;
+    if ((p->sparts_in = (struct spart *)malloc(sizeof(struct spart) *
+                                               p->size_sparts_in)) == NULL)
+      error("Failed to allocate sparts_in buffers.");
+  }
+  p->nr_sparts_in = 0;
+  if (p->sparts_out == NULL) {
+    p->size_sparts_out = proxy_buffinit;
+    if ((p->sparts_out = (struct spart *)malloc(sizeof(struct spart) *
+                                                p->size_sparts_out)) == NULL)
+      error("Failed to allocate sparts_out buffers.");
+  }
+  p->nr_sparts_out = 0;
 }
diff --git a/src/proxy.h b/src/proxy.h
index 5a747187e05a78a109ce4523ebb3c9d5fe2ad717..a245077193878bb669b474944965badceffcee80 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -33,7 +33,8 @@
 #define proxy_tag_parts 1
 #define proxy_tag_xparts 2
 #define proxy_tag_gparts 3
-#define proxy_tag_cells 4
+#define proxy_tag_sparts 4
+#define proxy_tag_cells 5
 
 /* Data structure for the proxy. */
 struct proxy {
@@ -55,13 +56,16 @@ struct proxy {
   struct part *parts_in, *parts_out;
   struct xpart *xparts_in, *xparts_out;
   struct gpart *gparts_in, *gparts_out;
+  struct spart *sparts_in, *sparts_out;
   int size_parts_in, size_parts_out;
   int nr_parts_in, nr_parts_out;
   int size_gparts_in, size_gparts_out;
   int nr_gparts_in, nr_gparts_out;
+  int size_sparts_in, size_sparts_out;
+  int nr_sparts_in, nr_sparts_out;
 
   /* Buffer to hold the incomming/outgoing particle counts. */
-  int buff_out[2], buff_in[2];
+  int buff_out[3], buff_in[3];
 
 /* MPI request handles. */
 #ifdef WITH_MPI
@@ -69,6 +73,7 @@ struct proxy {
   MPI_Request req_parts_out, req_parts_in;
   MPI_Request req_xparts_out, req_xparts_in;
   MPI_Request req_gparts_out, req_gparts_in;
+  MPI_Request req_sparts_out, req_sparts_in;
   MPI_Request req_cells_count_out, req_cells_count_in;
   MPI_Request req_cells_out, req_cells_in;
 #endif
@@ -79,6 +84,7 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID);
 void proxy_parts_load(struct proxy *p, const struct part *parts,
                       const struct xpart *xparts, int N);
 void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N);
+void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N);
 void proxy_parts_exch1(struct proxy *p);
 void proxy_parts_exch2(struct proxy *p);
 void proxy_addcell_in(struct proxy *p, struct cell *c);
diff --git a/src/queue.h b/src/queue.h
index c0a2fb1da6e6e3cbea813a0ef53841084ab0f933..951a3e5a056d7ad0c3935f98341a0d93c805e3ad 100644
--- a/src/queue.h
+++ b/src/queue.h
@@ -30,6 +30,7 @@
 #define queue_sizegrow 2
 #define queue_search_window 8
 #define queue_incoming_size 1024
+#define queue_struct_align 64
 
 /* Counters. */
 enum {
@@ -57,7 +58,7 @@ struct queue {
   int *tid_incoming;
   volatile unsigned int first_incoming, last_incoming, count_incoming;
 
-} __attribute__((aligned(64)));
+} __attribute__((aligned(queue_struct_align)));
 
 /* Function prototypes. */
 struct task *queue_gettask(struct queue *q, const struct task *prev,
diff --git a/src/runner.c b/src/runner.c
index 2d6da4e4aedc9c40d1dade243e605e9aeda86dbe..64b03732b4f12319ff7713c82cba4546b3c48510 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -53,9 +53,11 @@
 #include "hydro_properties.h"
 #include "kick.h"
 #include "minmax.h"
+#include "runner_doiact_vec.h"
 #include "scheduler.h"
 #include "sourceterms.h"
 #include "space.h"
+#include "stars.h"
 #include "task.h"
 #include "timers.h"
 #include "timestep.h"
@@ -205,14 +207,17 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
   struct part *restrict parts = c->parts;
   struct xpart *restrict xparts = c->xparts;
   const int count = c->count;
-  const int ti_current = r->e->ti_current;
-  const struct cooling_function_data *cooling_func = r->e->cooling_func;
-  const struct phys_const *constants = r->e->physical_constants;
-  const struct UnitSystem *us = r->e->internalUnits;
-  const double timeBase = r->e->timeBase;
+  const struct engine *e = r->e;
+  const struct cooling_function_data *cooling_func = e->cooling_func;
+  const struct phys_const *constants = e->physical_constants;
+  const struct UnitSystem *us = e->internalUnits;
+  const double timeBase = e->timeBase;
 
   TIMER_TIC;
 
+  /* Anything to do here? */
+  if (!cell_is_active(c, e)) return;
+
   /* Recurse? */
   if (c->split) {
     for (int k = 0; k < 8; k++)
@@ -226,11 +231,10 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
       struct part *restrict p = &parts[i];
       struct xpart *restrict xp = &xparts[i];
 
-      /* Kick has already updated ti_end, so need to check ti_begin */
-      if (p->ti_begin == ti_current) {
-
-        const double dt = (p->ti_end - p->ti_begin) * timeBase;
+      if (part_is_active(p, e)) {
 
+        /* Let's cool ! */
+        const double dt = get_timestep(p->time_bin, timeBase);
         cooling_cool_part(constants, us, cooling_func, p, xp, dt);
       }
     }
@@ -589,8 +593,6 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
   struct xpart *restrict xparts = c->xparts;
   int redo, count = c->count;
   const struct engine *e = r->e;
-  const int ti_current = e->ti_current;
-  const double timeBase = e->timeBase;
   const float target_wcount = e->hydro_properties->target_neighbours;
   const float max_wcount =
       target_wcount + e->hydro_properties->delta_neighbours;
@@ -672,7 +674,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
           /* As of here, particle force variables will be set. */
 
           /* Compute variables required for the force loop */
-          hydro_prepare_force(p, xp, ti_current, timeBase);
+          hydro_prepare_force(p, xp);
 
           /* The particle force values are now set.  Do _NOT_
              try to read any particle density variables! */
@@ -733,8 +735,16 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
       }
     }
 
+#ifdef SWIFT_DEBUG_CHECKS
+    if (count) {
+      message("Smoothing length failed to converge on %i particles.", count);
+
+      error("Aborting....");
+    }
+#else
     if (count)
       message("Smoothing length failed to converge on %i particles.", count);
+#endif
 
     /* Be clean */
     free(pid);
@@ -744,15 +754,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
 }
 
 /**
- * @brief Drift particles and g-particles in a cell forward in time,
- *              unskipping any tasks associated with active cells.
+ * @brief Unskip any tasks associated with active cells.
  *
  * @param c The cell.
  * @param e The engine.
- * @param drift whether to actually drift the particles, will not be
- *              necessary for non-local cells.
  */
-static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
+static void runner_do_unskip(struct cell *c, struct engine *e) {
+
+  /* Ignore empty cells. */
+  if (c->count == 0 && c->gcount == 0) return;
 
   /* Unskip any active tasks. */
   if (cell_is_active(c, e)) {
@@ -760,239 +770,463 @@ static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
     if (forcerebuild) atomic_inc(&e->forcerebuild);
   }
 
-  /* Do we really need to drift? */
-  if (drift) {
-    if (!e->drift_all && !cell_is_drift_needed(c, e)) return;
-  } else {
-
-    /* Not drifting, but may still need to recurse for task un-skipping. */
-    if (c->split) {
-      for (int k = 0; k < 8; k++) {
-        if (c->progeny[k] != NULL) {
-          struct cell *cp = c->progeny[k];
-          runner_do_drift(cp, e, 0);
-        }
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        runner_do_unskip(cp, e);
       }
     }
-    return;
   }
+}
+
+/**
+ * @brief Mapper function to unskip active tasks.
+ *
+ * @param map_data An array of #cell%s.
+ * @param num_elements Chunk size.
+ * @param extra_data Pointer to an #engine.
+ */
+void runner_do_unskip_mapper(void *map_data, int num_elements,
+                             void *extra_data) {
+
+  struct engine *e = (struct engine *)extra_data;
+  struct cell *cells = (struct cell *)map_data;
+
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct cell *c = &cells[ind];
+    if (c != NULL) runner_do_unskip(c, e);
+  }
+}
+/**
+ * @brief Drift particles in real space.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_drift(struct runner *r, struct cell *c, int timer) {
 
-  /* Now, we can drift */
+  TIMER_TIC;
 
-  /* Get some information first */
-  const double timeBase = e->timeBase;
-  const int ti_old = c->ti_old;
-  const int ti_current = e->ti_current;
-  struct part *const parts = c->parts;
-  struct xpart *const xparts = c->xparts;
-  struct gpart *const gparts = c->gparts;
+  cell_drift(c, r->e);
 
-  /* Drift from the last time the cell was drifted to the current time */
-  const double dt = (ti_current - ti_old) * timeBase;
-  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
+  if (timer) TIMER_TOC(timer_drift);
+}
 
-  /* No children? */
-  if (!c->split) {
+/**
+ * @brief Mapper function to drift ALL particle and g-particles forward in time.
+ *
+ * @param map_data An array of #cell%s.
+ * @param num_elements Chunk size.
+ * @param extra_data Pointer to an #engine.
+ */
+void runner_do_drift_mapper(void *map_data, int num_elements,
+                            void *extra_data) {
 
-    /* Check that we are actually going to move forward. */
-    if (ti_current > ti_old) {
+  struct engine *e = (struct engine *)extra_data;
+  struct cell *cells = (struct cell *)map_data;
 
-      /* Loop over all the g-particles in the cell */
-      const size_t nr_gparts = c->gcount;
-      for (size_t k = 0; k < nr_gparts; k++) {
+  for (int ind = 0; ind < num_elements; ind++) {
+    struct cell *c = &cells[ind];
+    if (c != NULL && c->nodeID == e->nodeID) cell_drift(c, e);
+  }
+}
 
-        /* Get a handle on the gpart. */
-        struct gpart *const gp = &gparts[k];
+/**
+ * @brief Perform the first half-kick on all the active particles in a cell.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
 
-        /* Drift... */
-        drift_gpart(gp, dt, timeBase, ti_old, ti_current);
+  const struct engine *e = r->e;
+  struct part *restrict parts = c->parts;
+  struct xpart *restrict xparts = c->xparts;
+  struct gpart *restrict gparts = c->gparts;
+  struct spart *restrict sparts = c->sparts;
+  const int count = c->count;
+  const int gcount = c->gcount;
+  const int scount = c->scount;
+  const integertime_t ti_current = e->ti_current;
+  const double timeBase = e->timeBase;
 
-        /* Compute (square of) motion since last cell construction */
-        const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
-                          gp->x_diff[1] * gp->x_diff[1] +
-                          gp->x_diff[2] * gp->x_diff[2];
-        dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
-      }
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(c, e)) return;
 
-      /* Loop over all the particles in the cell */
-      const size_t nr_parts = c->count;
-      for (size_t k = 0; k < nr_parts; k++) {
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0);
+  } else {
 
-        /* Get a handle on the part. */
-        struct part *const p = &parts[k];
-        struct xpart *const xp = &xparts[k];
+    /* Loop over the parts in this cell. */
+    for (int k = 0; k < count; k++) {
 
-        /* Drift... */
-        drift_part(p, xp, dt, timeBase, ti_old, ti_current);
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
 
-        /* Compute (square of) motion since last cell construction */
-        const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
-                          xp->x_diff[1] * xp->x_diff[1] +
-                          xp->x_diff[2] * xp->x_diff[2];
-        dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
+      /* If particle needs to be kicked */
+      if (part_is_active(p, e)) {
 
-        /* Maximal smoothing length */
-        h_max = (h_max > p->h) ? h_max : p->h;
-      }
+        const integertime_t ti_step = get_integer_timestep(p->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, p->time_bin);
 
-      /* Now, get the maximal particle motion from its square */
-      dx_max = sqrtf(dx2_max);
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, p->time_bin);
 
-    } /* Check that we are actually going to move forward. */
+        if (ti_end - ti_begin != ti_step)
+          error(
+              "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, "
+              "ti_step=%lld time_bin=%d ti_current=%lld",
+              ti_end, ti_begin, ti_step, p->time_bin, ti_current);
+#endif
 
-    else {
-      /* ti_old == ti_current, just keep the current cell values. */
-      h_max = c->h_max;
-      dx_max = c->dx_max;
+        /* do the kick */
+        kick_part(p, xp, ti_begin, ti_begin + ti_step / 2, timeBase);
+      }
     }
-  }
 
-  /* Otherwise, aggregate data from children. */
-  else {
+    /* Loop over the gparts in this cell. */
+    for (int k = 0; k < gcount; k++) {
 
-    /* Loop over the progeny and collect their data. */
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
 
-        /* Recurse. */
-        runner_do_drift(cp, e, drift);
-        dx_max = max(dx_max, cp->dx_max);
-        h_max = max(h_max, cp->h_max);
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if (gp->type == swift_type_dark_matter && gpart_is_active(gp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, gp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, gp->time_bin);
+
+        if (ti_end - ti_begin != ti_step) error("Particle in wrong time-bin");
+#endif
+
+        /* do the kick */
+        kick_gpart(gp, ti_begin, ti_begin + ti_step / 2, timeBase);
       }
-  }
+    }
 
-  /* Store the values */
-  c->h_max = h_max;
-  c->dx_max = dx_max;
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < scount; k++) {
 
-  /* Update the time of the last drift */
-  c->ti_old = ti_current;
-}
+      /* Get a handle on the s-part. */
+      struct spart *restrict sp = &sparts[k];
 
-/**
- * @brief Mapper function to drift particles and g-particles forward in time.
- *
- * @param map_data An array of #cell%s.
- * @param num_elements Chunk size.
- * @param extra_data Pointer to an #engine.
- */
+      /* If particle needs to be kicked */
+      if (spart_is_active(sp, e)) {
 
-void runner_do_drift_mapper(void *map_data, int num_elements,
-                            void *extra_data) {
+        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, sp->time_bin);
 
-  struct engine *e = (struct engine *)extra_data;
-  struct cell *cells = (struct cell *)map_data;
+#ifdef SWIFT_DEBUG_CHECKS
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, sp->time_bin);
 
-  for (int ind = 0; ind < num_elements; ind++) {
-    struct cell *c = &cells[ind];
-#ifdef WITH_MPI
-    if (c != NULL) runner_do_drift(c, e, (c->nodeID == e->nodeID));
-#else
-    if (c != NULL) runner_do_drift(c, e, 1);
+        if (ti_end - ti_begin != ti_step) error("Particle in wrong time-bin");
 #endif
+
+        /* do the kick */
+        kick_spart(sp, ti_begin, ti_begin + ti_step / 2, timeBase);
+      }
+    }
   }
+
+  if (timer) TIMER_TOC(timer_kick1);
 }
 
 /**
- * @brief Kick particles in momentum space and collect statistics (floating
- * time-step case)
+ * @brief Perform the second half-kick on all the active particles in a cell.
+ *
+ * Also prepares particles to be drifted.
  *
  * @param r The runner thread.
  * @param c The cell.
  * @param timer Are we timing this ?
  */
-void runner_do_kick(struct runner *r, struct cell *c, int timer) {
+void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
 
   const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
   const double timeBase = e->timeBase;
   const int count = c->count;
   const int gcount = c->gcount;
+  const int scount = c->scount;
   struct part *restrict parts = c->parts;
   struct xpart *restrict xparts = c->xparts;
   struct gpart *restrict gparts = c->gparts;
-  const double const_G = e->physical_constants->const_newton_G;
+  struct spart *restrict sparts = c->sparts;
 
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) {
-    c->updated = 0;
-    c->g_updated = 0;
-    return;
-  }
+  if (!cell_is_active(c, e)) return;
 
-  int updated = 0, g_updated = 0;
-  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0);
+  } else {
 
-  /* No children? */
-  if (!c->split) {
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+      struct xpart *restrict xp = &xparts[k];
+
+      /* If particle needs to be kicked */
+      if (part_is_active(p, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(p->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, p->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error(
+              "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld "
+              "time_bin=%d ti_current=%lld",
+              ti_begin, ti_step, p->time_bin, ti_current);
+#endif
 
-    /* Loop over the g-particles and kick the active ones. */
+        /* Finish the time-step with a second half-kick */
+        kick_part(p, xp, ti_begin + ti_step / 2, ti_begin + ti_step, timeBase);
+
+        /* Prepare the values to be drifted */
+        hydro_reset_predicted_values(p, xp);
+      }
+    }
+
+    /* Loop over the g-particles in this cell. */
     for (int k = 0; k < gcount; k++) {
 
       /* Get a handle on the part. */
       struct gpart *restrict gp = &gparts[k];
 
       /* If the g-particle has no counterpart and needs to be kicked */
-      if (gp->id_or_neg_offset > 0) {
+      if (gp->type == swift_type_dark_matter && gpart_is_active(gp, e)) {
 
-        if (gpart_is_active(gp, e)) {
+        const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, gp->time_bin);
 
-          /* First, finish the force calculation */
-          gravity_end_force(gp, const_G);
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error("Particle in wrong time-bin");
+#endif
 
-          /* Compute the next timestep */
-          const int new_dti = get_gpart_timestep(gp, e);
+        /* Finish the time-step with a second half-kick */
+        kick_gpart(gp, ti_begin + ti_step / 2, ti_begin + ti_step, timeBase);
+      }
+    }
 
-          /* Now we have a time step, proceed with the kick */
-          kick_gpart(gp, new_dti, timeBase);
+    /* Loop over the particles in this cell. */
+    for (int k = 0; k < scount; k++) {
 
-          /* Number of updated g-particles */
-          g_updated++;
-        }
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
 
-        /* Minimal time for next end of time-step */
-        ti_end_min = min(gp->ti_end, ti_end_min);
-        ti_end_max = max(gp->ti_end, ti_end_max);
+      /* If particle needs to be kicked */
+      if (spart_is_active(sp, e)) {
+
+        const integertime_t ti_step = get_integer_timestep(sp->time_bin);
+        const integertime_t ti_begin =
+            get_integer_time_begin(ti_current, sp->time_bin);
+
+#ifdef SWIFT_DEBUG_CHECKS
+        if (ti_begin + ti_step != ti_current)
+          error("Particle in wrong time-bin");
+#endif
+
+        /* Finish the time-step with a second half-kick */
+        kick_spart(sp, ti_begin + ti_step / 2, ti_begin + ti_step, timeBase);
+
+        /* Prepare the values to be drifted */
+        star_reset_predicted_values(sp);
       }
     }
+  }
+  if (timer) TIMER_TOC(timer_kick2);
+}
 
-    /* Now do the hydro ones... */
+/**
+ * @brief Computes the next time-step of all active particles in this cell
+ * and update the cell's statistics.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const integertime_t ti_current = e->ti_current;
+  const int count = c->count;
+  const int gcount = c->gcount;
+  const int scount = c->scount;
+  struct part *restrict parts = c->parts;
+  struct xpart *restrict xparts = c->xparts;
+  struct gpart *restrict gparts = c->gparts;
+  struct spart *restrict sparts = c->sparts;
+
+  TIMER_TIC;
+
+  int updated = 0, g_updated = 0, s_updated = 0;
+  integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0;
+
+  /* No children? */
+  if (!c->split) {
 
-    /* Loop over the particles and kick the active ones. */
+    /* Loop over the particles in this cell. */
     for (int k = 0; k < count; k++) {
 
       /* Get a handle on the part. */
       struct part *restrict p = &parts[k];
       struct xpart *restrict xp = &xparts[k];
 
-      /* If particle needs to be kicked */
+      /* If particle needs updating */
       if (part_is_active(p, e)) {
 
-        /* First, finish the force loop */
-        hydro_end_force(p);
-        if (p->gpart != NULL) gravity_end_force(p->gpart, const_G);
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, p->time_bin);
 
-        /* Compute the next timestep (hydro condition) */
-        const int new_dti = get_part_timestep(p, xp, e);
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
 
-        /* Now we have a time step, proceed with the kick */
-        kick_part(p, xp, new_dti, timeBase);
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_part_timestep(p, xp, e);
+
+        /* Update particle */
+        p->time_bin = get_time_bin(ti_new_step);
+        if (p->gpart != NULL) p->gpart->time_bin = get_time_bin(ti_new_step);
 
         /* Number of updated particles */
         updated++;
         if (p->gpart != NULL) g_updated++;
+
+        /* What is the next sync-point ? */
+        ti_end_min = min(ti_current + ti_new_step, ti_end_min);
+        ti_end_max = max(ti_current + ti_new_step, ti_end_max);
       }
 
-      /* Minimal time for next end of time-step */
-      ti_end_min = min(p->ti_end, ti_end_min);
-      ti_end_max = max(p->ti_end, ti_end_max);
+      else { /* part is inactive */
+
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, p->time_bin);
+
+        /* What is the next sync-point ? */
+        ti_end_min = min(ti_end, ti_end_min);
+        ti_end_max = max(ti_end, ti_end_max);
+      }
     }
-  }
 
-  /* Otherwise, aggregate data from children. */
-  else {
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *restrict gp = &gparts[k];
+
+      /* If the g-particle has no counterpart */
+      if (gp->type == swift_type_dark_matter) {
+
+        /* need to be updated ? */
+        if (gpart_is_active(gp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+          /* Current end of time-step */
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, gp->time_bin);
+
+          if (ti_end != ti_current)
+            error("Computing time-step of rogue particle.");
+#endif
+
+          /* Get new time-step */
+          const integertime_t ti_new_step = get_gpart_timestep(gp, e);
+
+          /* Update particle */
+          gp->time_bin = get_time_bin(ti_new_step);
+
+          /* Number of updated g-particles */
+          g_updated++;
+
+          /* What is the next sync-point ? */
+          ti_end_min = min(ti_current + ti_new_step, ti_end_min);
+          ti_end_max = max(ti_current + ti_new_step, ti_end_max);
+
+        } else { /* gpart is inactive */
+
+          const integertime_t ti_end =
+              get_integer_time_end(ti_current, gp->time_bin);
+
+          /* What is the next sync-point ? */
+          ti_end_min = min(ti_end, ti_end_min);
+          ti_end_max = max(ti_end, ti_end_max);
+        }
+      }
+    }
+
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
+
+      /* need to be updated ? */
+      if (spart_is_active(sp, e)) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Current end of time-step */
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, sp->time_bin);
+
+        if (ti_end != ti_current)
+          error("Computing time-step of rogue particle.");
+#endif
+        /* Get new time-step */
+        const integertime_t ti_new_step = get_spart_timestep(sp, e);
+
+        /* Update particle */
+        sp->time_bin = get_time_bin(ti_new_step);
+        sp->gpart->time_bin = get_time_bin(ti_new_step);
+
+        /* Number of updated s-particles */
+        s_updated++;
+        g_updated++;
+
+        /* What is the next sync-point ? */
+        ti_end_min = min(ti_current + ti_new_step, ti_end_min);
+        ti_end_max = max(ti_current + ti_new_step, ti_end_max);
+
+      } else { /* star particle is inactive */
+
+        const integertime_t ti_end =
+            get_integer_time_end(ti_current, sp->time_bin);
+
+        /* What is the next sync-point ? */
+        ti_end_min = min(ti_end, ti_end_min);
+        ti_end_max = max(ti_end, ti_end_max);
+      }
+    }
+  } else {
 
     /* Loop over the progeny. */
     for (int k = 0; k < 8; k++)
@@ -1000,11 +1234,12 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) {
         struct cell *restrict cp = c->progeny[k];
 
         /* Recurse */
-        runner_do_kick(r, cp, 0);
+        runner_do_timestep(r, cp, 0);
 
         /* And aggregate */
         updated += cp->updated;
         g_updated += cp->g_updated;
+        s_updated += cp->s_updated;
         ti_end_min = min(cp->ti_end_min, ti_end_min);
         ti_end_max = max(cp->ti_end_max, ti_end_max);
       }
@@ -1013,31 +1248,105 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) {
   /* Store the values. */
   c->updated = updated;
   c->g_updated = g_updated;
+  c->s_updated = s_updated;
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
 
-  if (timer) TIMER_TOC(timer_kick);
+  if (timer) TIMER_TOC(timer_timestep);
+}
+
+/**
+ * @brief End the force calculation of all active particles in a cell
+ * by multiplying the acccelerations by the relevant constants
+ *
+ * @param r The #runner thread.
+ * @param c The #cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
+
+  const struct engine *e = r->e;
+  const int count = c->count;
+  const int gcount = c->gcount;
+  const int scount = c->scount;
+  struct part *restrict parts = c->parts;
+  struct gpart *restrict gparts = c->gparts;
+  struct spart *restrict sparts = c->sparts;
+  const double const_G = e->physical_constants->const_newton_G;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(c, e)) return;
+
+  /* Recurse? */
+  if (c->split) {
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) runner_do_end_force(r, c->progeny[k], 0);
+  } else {
+
+    /* Loop over the gas particles in this cell. */
+    for (int k = 0; k < count; k++) {
+
+      /* Get a handle on the part. */
+      struct part *restrict p = &parts[k];
+
+      if (part_is_active(p, e)) {
+
+        /* First, finish the force loop */
+        hydro_end_force(p);
+        if (p->gpart != NULL) gravity_end_force(p->gpart, const_G);
+      }
+    }
+
+    /* Loop over the g-particles in this cell. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *restrict gp = &gparts[k];
+
+      if (gp->type == swift_type_dark_matter) {
+        if (gpart_is_active(gp, e)) gravity_end_force(gp, const_G);
+      }
+    }
+
+    /* Loop over the star particles in this cell. */
+    for (int k = 0; k < scount; k++) {
+
+      /* Get a handle on the part. */
+      struct spart *restrict sp = &sparts[k];
+
+      if (spart_is_active(sp, e)) {
+
+        /* First, finish the force loop */
+        star_end_force(sp);
+        gravity_end_force(sp->gpart, const_G);
+      }
+    }
+  }
+
+  if (timer) TIMER_TOC(timer_endforce);
 }
 
 /**
- * @brief Construct the cell properties from the received particles
+ * @brief Construct the cell properties from the received #part.
  *
  * @param r The runner thread.
  * @param c The cell.
  * @param timer Are we timing this ?
  */
-void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
+void runner_do_recv_part(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
 
   const struct part *restrict parts = c->parts;
-  const struct gpart *restrict gparts = c->gparts;
   const size_t nr_parts = c->count;
-  const size_t nr_gparts = c->gcount;
-  // const int ti_current = r->e->ti_current;
+  const integertime_t ti_current = r->e->ti_current;
 
   TIMER_TIC;
 
-  int ti_end_min = max_nr_timesteps;
-  int ti_end_max = 0;
+  integertime_t ti_end_min = max_nr_timesteps;
+  integertime_t ti_end_max = 0;
   float h_max = 0.f;
 
   /* If this cell is a leaf, collect the particle data. */
@@ -1045,39 +1354,176 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) {
 
     /* Collect everything... */
     for (size_t k = 0; k < nr_parts; k++) {
-      const int ti_end = parts[k].ti_end;
-      // if(ti_end < ti_current) error("Received invalid particle !");
+      const integertime_t ti_end =
+          get_integer_time_end(ti_current, parts[k].time_bin);
       ti_end_min = min(ti_end_min, ti_end);
       ti_end_max = max(ti_end_max, ti_end);
       h_max = max(h_max, parts[k].h);
+
+#ifdef SWIFT_DEBUG_CHECKS
+      if (parts[k].ti_drift != ti_current)
+        error("Received un-drifted particle !");
+#endif
     }
+  }
+
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_recv_part(r, c->progeny[k], 0);
+        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
+        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+        h_max = max(h_max, c->progeny[k]->h_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  c->ti_end_min = ti_end_min;
+  c->ti_end_max = ti_end_max;
+  c->ti_old = ti_current;
+  c->h_max = h_max;
+
+  if (timer) TIMER_TOC(timer_dorecv_part);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #gpart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
+
+  const struct gpart *restrict gparts = c->gparts;
+  const size_t nr_gparts = c->gcount;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_end_min = max_nr_timesteps;
+  integertime_t ti_end_max = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
     for (size_t k = 0; k < nr_gparts; k++) {
-      const int ti_end = gparts[k].ti_end;
-      // if(ti_end < ti_current) error("Received invalid particle !");
+      const integertime_t ti_end =
+          get_integer_time_end(ti_current, gparts[k].time_bin);
       ti_end_min = min(ti_end_min, ti_end);
       ti_end_max = max(ti_end_max, ti_end);
     }
+  }
 
+  /* Otherwise, recurse and collect. */
+  else {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        runner_do_recv_gpart(r, c->progeny[k], 0);
+        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
+        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+      }
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
+  /* ... and store. */
+  c->ti_end_min = ti_end_min;
+  c->ti_end_max = ti_end_max;
+  c->ti_old = ti_current;
+
+  if (timer) TIMER_TOC(timer_dorecv_gpart);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Construct the cell properties from the received #spart.
+ *
+ * @param r The runner thread.
+ * @param c The cell.
+ * @param timer Are we timing this ?
+ */
+void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) {
+
+#ifdef WITH_MPI
+
+  const struct spart *restrict sparts = c->sparts;
+  const size_t nr_sparts = c->scount;
+  const integertime_t ti_current = r->e->ti_current;
+
+  TIMER_TIC;
+
+  integertime_t ti_end_min = max_nr_timesteps;
+  integertime_t ti_end_max = 0;
+
+  /* If this cell is a leaf, collect the particle data. */
+  if (!c->split) {
+
+    /* Collect everything... */
+    for (size_t k = 0; k < nr_sparts; k++) {
+      const integertime_t ti_end =
+          get_integer_time_end(ti_current, sparts[k].time_bin);
+      ti_end_min = min(ti_end_min, ti_end);
+      ti_end_max = max(ti_end_max, ti_end);
+    }
   }
 
   /* Otherwise, recurse and collect. */
   else {
     for (int k = 0; k < 8; k++) {
       if (c->progeny[k] != NULL) {
-        runner_do_recv_cell(r, c->progeny[k], 0);
+        runner_do_recv_spart(r, c->progeny[k], 0);
         ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
         ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        h_max = max(h_max, c->progeny[k]->h_max);
       }
     }
   }
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ti_end_min < ti_current)
+    error(
+        "Received a cell at an incorrect time c->ti_end_min=%lld, "
+        "e->ti_current=%lld.",
+        ti_end_min, ti_current);
+#endif
+
   /* ... and store. */
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
-  c->h_max = h_max;
+  c->ti_old = ti_current;
+
+  if (timer) TIMER_TOC(timer_dorecv_spart);
 
-  if (timer) TIMER_TOC(timer_dorecv_cell);
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
 }
 
 /**
@@ -1125,11 +1571,20 @@ void *runner_main(void *data) {
 
 /* Check that we haven't scheduled an inactive task */
 #ifdef SWIFT_DEBUG_CHECKS
-      if (cj == NULL) { /* self */
-        if (!cell_is_active(ci, e) && t->type != task_type_sort)
+      t->ti_run = e->ti_current;
+#ifndef WITH_MPI
+      if (ci == NULL && cj == NULL) {
+
+        if (t->type != task_type_grav_gather_m && t->type != task_type_grav_fft)
+          error("Task not associated with cells!");
+
+      } else if (cj == NULL) { /* self */
+
+        if (!cell_is_active(ci, e) && t->type != task_type_sort &&
+            t->type != task_type_send && t->type != task_type_recv)
           error(
-              "Task (type='%s/%s') should have been skipped ti_current=%d "
-              "c->ti_end_min=%d",
+              "Task (type='%s/%s') should have been skipped ti_current=%lld "
+              "c->ti_end_min=%lld",
               taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
               ci->ti_end_min);
 
@@ -1137,25 +1592,34 @@ void *runner_main(void *data) {
         if (!cell_is_active(ci, e) && t->type == task_type_sort &&
             t->flags == 0)
           error(
-              "Task (type='%s/%s') should have been skipped ti_current=%d "
-              "c->ti_end_min=%d t->flags=%d",
+              "Task (type='%s/%s') should have been skipped ti_current=%lld "
+              "c->ti_end_min=%lld t->flags=%d",
               taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
               ci->ti_end_min, t->flags);
 
       } else { /* pair */
         if (!cell_is_active(ci, e) && !cell_is_active(cj, e))
-          error(
-              "Task (type='%s/%s') should have been skipped ti_current=%d "
-              "ci->ti_end_min=%d cj->ti_end_min=%d",
-              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
-              ci->ti_end_min, cj->ti_end_min);
+
+          if (t->type != task_type_send && t->type != task_type_recv)
+            error(
+                "Task (type='%s/%s') should have been skipped ti_current=%lld "
+                "ci->ti_end_min=%lld cj->ti_end_min=%lld",
+                taskID_names[t->type], subtaskID_names[t->subtype],
+                e->ti_current, ci->ti_end_min, cj->ti_end_min);
       }
+#endif
 #endif
 
       /* Different types of tasks... */
       switch (t->type) {
         case task_type_self:
-          if (t->subtype == task_subtype_density) runner_doself1_density(r, ci);
+          if (t->subtype == task_subtype_density) {
+#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
+            runner_doself1_density_vec(r, ci);
+#else
+            runner_doself1_density(r, ci);
+#endif
+          }
 #ifdef EXTRA_HYDRO_LOOP
           else if (t->subtype == task_subtype_gradient)
             runner_doself1_gradient(r, ci);
@@ -1231,8 +1695,19 @@ void *runner_main(void *data) {
           runner_do_extra_ghost(r, ci, 1);
           break;
 #endif
-        case task_type_kick:
-          runner_do_kick(r, ci, 1);
+        case task_type_drift:
+          runner_do_drift(r, ci, 1);
+          break;
+        case task_type_kick1:
+          runner_do_kick1(r, ci, 1);
+          break;
+        case task_type_kick2:
+          if (!(e->policy & engine_policy_cooling))
+            runner_do_end_force(r, ci, 1);
+          runner_do_kick2(r, ci, 1);
+          break;
+        case task_type_timestep:
+          runner_do_timestep(r, ci, 1);
           break;
 #ifdef WITH_MPI
         case task_type_send:
@@ -1244,8 +1719,15 @@ void *runner_main(void *data) {
           if (t->subtype == task_subtype_tend) {
             cell_unpack_ti_ends(ci, t->buff);
             free(t->buff);
+          } else if (t->subtype == task_subtype_xv ||
+                     t->subtype == task_subtype_rho) {
+            runner_do_recv_part(r, ci, 1);
+          } else if (t->subtype == task_subtype_gpart) {
+            runner_do_recv_gpart(r, ci, 1);
+          } else if (t->subtype == task_subtype_spart) {
+            runner_do_recv_spart(r, ci, 1);
           } else {
-            runner_do_recv_cell(r, ci, 1);
+            error("Unknown/invalid task subtype (%d).", t->subtype);
           }
           break;
 #endif
@@ -1261,6 +1743,7 @@ void *runner_main(void *data) {
           runner_do_grav_fft(r);
           break;
         case task_type_cooling:
+          if (e->policy & engine_policy_cooling) runner_do_end_force(r, ci, 1);
           runner_do_cooling(r, t->ci, 1);
           break;
         case task_type_sourceterms:
diff --git a/src/runner.h b/src/runner.h
index a8caf24248c99438f16729e2cac3e1031535f62b..53e78b00657385c7185e0730d421707c87ccf382 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -23,6 +23,8 @@
 #ifndef SWIFT_RUNNER_H
 #define SWIFT_RUNNER_H
 
+#include "cache.h"
+
 extern const double runner_shift[13][3];
 extern const char runner_flip[27];
 
@@ -45,17 +47,25 @@ struct runner {
 
   /*! The engine owing this runner. */
   struct engine *e;
+
+  /*! The particle cache of this runner. */
+  struct cache par_cache;
 };
 
 /* Function prototypes. */
 void runner_do_ghost(struct runner *r, struct cell *c, int timer);
 void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer);
 void runner_do_sort(struct runner *r, struct cell *c, int flag, int clock);
-void runner_do_kick(struct runner *r, struct cell *c, int timer);
+void runner_do_drift(struct runner *r, struct cell *c, int timer);
+void runner_do_kick1(struct runner *r, struct cell *c, int timer);
+void runner_do_kick2(struct runner *r, struct cell *c, int timer);
+void runner_do_end_force(struct runner *r, struct cell *c, int timer);
 void runner_do_init(struct runner *r, struct cell *c, int timer);
 void runner_do_cooling(struct runner *r, struct cell *c, int timer);
 void runner_do_grav_external(struct runner *r, struct cell *c, int timer);
 void *runner_main(void *data);
+void runner_do_unskip_mapper(void *map_data, int num_elements,
+                             void *extra_data);
 void runner_do_drift_mapper(void *map_data, int num_elements, void *extra_data);
 
 #endif /* SWIFT_RUNNER_H */
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 6bc8f2da808cc2d953482b90e9441b833384bc75..6fa04018088a05ed0319489e88677c3ebcabd0f2 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -32,9 +32,18 @@
 #define _DOPAIR2(f) PASTE(runner_dopair2, f)
 #define DOPAIR2 _DOPAIR2(FUNCTION)
 
+#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f)
+#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION)
+
+#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f)
+#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION)
+
 #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
 #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
 
+#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
+#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)
+
 #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
 #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)
 
@@ -98,6 +107,8 @@
 #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
 #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
 
+#include "runner_doiact_nosort.h"
+
 /**
  * @brief Compute the interactions between a cell pair.
  *
@@ -112,7 +123,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
 
   error("Don't use in actual runs ! Slow code !");
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -167,7 +178,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
       /* Hit or miss? */
       if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT(r2, dx, hi, pj->h, pi, pj);
 
@@ -197,7 +208,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -213,7 +224,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 
   error("Don't use in actual runs ! Slow code !");
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -256,7 +267,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
       /* Hit or miss? */
       if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT(r2, dx, hi, pj->h, pi, pj);
 
@@ -286,7 +297,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -315,7 +326,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
 
   error("Don't use in actual runs ! Slow code !");
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -365,7 +376,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
       /* Hit or miss? */
       if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -395,7 +406,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -422,7 +433,14 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 
   struct engine *e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_MPI
+  if (ci->nodeID != cj->nodeID) {
+    DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj);
+    return;
+  }
+#endif
+
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -497,7 +515,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -562,7 +580,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -593,7 +611,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
     } /* loop over the parts in ci. */
   }
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -616,7 +634,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
 void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts, int *restrict ind, int count) {
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -656,7 +674,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
       /* Hit or miss? */
       if (r2 > 0.0f && r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -686,7 +704,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -707,7 +725,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   const struct engine *restrict e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_MPI
+  if (ci->nodeID != cj->nodeID) {
+    DOPAIR1_NOSORT(r, ci, cj);
+    return;
+  }
+#endif
+
+#ifdef WITH_OLD_VECTORIZATION
   int icount = 0;
   float r2q[VEC_SIZE] __attribute__((aligned(16)));
   float hiq[VEC_SIZE] __attribute__((aligned(16)));
@@ -721,10 +746,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
   /* Anything to do here? */
   if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
 
-#ifdef SWIFT_DEBUG_CHECKS
-  cell_is_drifted(ci, e);
-  cell_is_drifted(cj, e);
-#endif
+  if (!cell_is_drifted(ci, e)) cell_drift(ci, e);
+  if (!cell_is_drifted(cj, e)) cell_drift(cj, e);
 
   /* Get the sort ID. */
   double shift[3] = {0.0, 0.0, 0.0};
@@ -782,10 +805,18 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
         r2 += dx[k] * dx[k];
       }
 
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
       /* Hit or miss? */
       if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
 
@@ -844,10 +875,18 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
         r2 += dx[k] * dx[k];
       }
 
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
       /* Hit or miss? */
       if (r2 < hjg2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
         IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
 
@@ -877,7 +916,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount > 0)
     for (int k = 0; k < icount; k++)
@@ -898,7 +937,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   struct engine *restrict e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_MPI
+  if (ci->nodeID != cj->nodeID) {
+    DOPAIR2_NOSORT(r, ci, cj);
+    return;
+  }
+#endif
+
+#ifdef WITH_OLD_VECTORIZATION
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
   float hiq1[VEC_SIZE] __attribute__((aligned(16)));
@@ -918,10 +964,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   /* Anything to do here? */
   if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
 
-#ifdef SWIFT_DEBUG_CHECKS
-  cell_is_drifted(ci, e);
-  cell_is_drifted(cj, e);
-#endif
+  if (!cell_is_drifted(ci, e)) error("Cell ci not drifted");
+  if (!cell_is_drifted(cj, e)) error("Cell cj not drifted");
 
   /* Get the shift ID. */
   double shift[3] = {0.0, 0.0, 0.0};
@@ -1012,10 +1056,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
           r2 += dx[k] * dx[k];
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi);
 
@@ -1063,10 +1115,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
           r2 += dx[k] * dx[k];
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hig2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Does pj need to be updated too? */
           if (part_is_active(pj, e))
@@ -1156,10 +1216,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
           r2 += dx[k] * dx[k];
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hi, hj, pi, pj);
 
@@ -1206,10 +1274,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
           r2 += dx[k] * dx[k];
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Does pi need to be updated too? */
           if (part_is_active(pi, e))
@@ -1267,7 +1343,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 
   } /* loop over the parts in ci. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount1 > 0)
     for (int k = 0; k < icount1; k++)
@@ -1294,7 +1370,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
   const struct engine *e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
   float hiq1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1313,9 +1389,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
   if (!cell_is_active(c, e)) return;
 
-#ifdef SWIFT_DEBUG_CHECKS
-  cell_is_drifted(c, e);
-#endif
+  if (!cell_is_drifted(c, e)) cell_drift(c, e);
 
   struct part *restrict parts = c->parts;
   const int count = c->count;
@@ -1354,6 +1428,14 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
         struct part *restrict pj = &parts[indt[pjd]];
         const float hj = pj->h;
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Compute the pairwise distance. */
         float r2 = 0.0f;
         float dx[3];
@@ -1365,7 +1447,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
         /* Hit or miss? */
         if (r2 < hj * hj * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi);
 
@@ -1418,10 +1500,18 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
         const int doj =
             (part_is_active(pj, e)) && (r2 < hj * hj * kernel_gamma2);
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hig2 || doj) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Which parts need to be updated? */
           if (r2 < hig2 && doj)
@@ -1504,7 +1594,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
 
   } /* loop over all particles. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount1 > 0)
     for (int k = 0; k < icount1; k++)
@@ -1529,7 +1619,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
   const struct engine *e = r->e;
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   int icount1 = 0;
   float r2q1[VEC_SIZE] __attribute__((aligned(16)));
   float hiq1[VEC_SIZE] __attribute__((aligned(16)));
@@ -1548,9 +1638,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
   if (!cell_is_active(c, e)) return;
 
-#ifdef SWIFT_DEBUG_CHECKS
-  cell_is_drifted(c, e);
-#endif
+  if (!cell_is_drifted(c, e)) error("Cell is not drifted");
 
   struct part *restrict parts = c->parts;
   const int count = c->count;
@@ -1597,10 +1685,18 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
           r2 += dx[k] * dx[k];
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           IACT_NONSYM(r2, dx, hj, hi, pj, pi);
 
@@ -1651,10 +1747,18 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
           r2 += dx[k] * dx[k];
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
+        /* Check that particles have been drifted to the current time */
+        if (pi->ti_drift != e->ti_current)
+          error("Particle pi not drifted to current time");
+        if (pj->ti_drift != e->ti_current)
+          error("Particle pj not drifted to current time");
+#endif
+
         /* Hit or miss? */
         if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
 
-#ifndef WITH_VECTORIZATION
+#ifndef WITH_OLD_VECTORIZATION
 
           /* Does pj need to be updated too? */
           if (part_is_active(pj, e))
@@ -1712,7 +1816,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
 
   } /* loop over all particles. */
 
-#ifdef WITH_VECTORIZATION
+#ifdef WITH_OLD_VECTORIZATION
   /* Pick up any leftovers. */
   if (icount1 > 0)
     for (int k = 0; k < icount1; k++)
@@ -2007,8 +2111,14 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
   }
 
   /* Otherwise, compute self-interaction. */
-  else
+  else {
+#if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \
+    defined(GADGET2_SPH)
+    runner_doself1_density_vec(r, ci);
+#else
     DOSELF1(r, ci);
+#endif
+  }
 
   if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
 }
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 59a5ae496680390c23458bde65b4bba321ffe7a1..9d2606ceb06fd6d32592010376e867a6ae582bf0 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -25,8 +25,6 @@
 #include "gravity.h"
 #include "part.h"
 
-#define ICHECK -1000
-
 /**
  * @brief Compute the recursive upward sweep, i.e. construct the
  *        multipoles in a cell hierarchy.
diff --git a/src/runner_doiact_nosort.h b/src/runner_doiact_nosort.h
new file mode 100644
index 0000000000000000000000000000000000000000..d38f01c6955e2ee9848698d2b46d3f4a14ad0873
--- /dev/null
+++ b/src/runner_doiact_nosort.h
@@ -0,0 +1,305 @@
+
+/**
+ * @brief Compute the interactions between a cell pair.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void DOPAIR1_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+  if (!cell_is_drifted(ci, e)) cell_drift(ci, e);
+  if (!cell_is_drifted(cj, e)) cell_drift(cj, e);
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  space_getsid(e->s, &ci, &cj, shift);
+
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Loop over the parts in ci. */
+  for (int pid = 0; pid < count_i; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[pid];
+    if (!part_is_active(pi, e)) continue;
+    const float hi = pi->h;
+
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+      }
+
+    } /* loop over the parts in cj. */
+
+  } /* loop over the parts in ci. */
+
+  /* Loop over the parts in cj. */
+  for (int pjd = 0; pjd < count_j; pjd++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pj = &parts_j[pjd];
+    if (!part_is_active(pj, e)) continue;
+    const float hj = pj->h;
+
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
+
+    /* Loop over the parts in ci. */
+    for (int pid = 0; pid < count_i; pid++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pi = &parts_i[pid];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pjx[k] - pi->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hjg2) {
+        IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
+      }
+
+    } /* loop over the parts in ci. */
+
+  } /* loop over the parts in cj. */
+
+  TIMER_TOC(TIMER_DOPAIR);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void DOPAIR2_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) {
+
+  const struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  /* Anything to do here? */
+  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+
+  if (!cell_is_drifted(ci, e)) cell_drift(ci, e);
+  if (!cell_is_drifted(cj, e)) cell_drift(cj, e);
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  space_getsid(e->s, &ci, &cj, shift);
+
+  const int count_i = ci->count;
+  const int count_j = cj->count;
+  struct part *restrict parts_i = ci->parts;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Loop over the parts in ci. */
+  for (int pid = 0; pid < count_i; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[pid];
+    if (!part_is_active(pi, e)) continue;
+    const float hi = pi->h;
+
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+      const float hjg2 = pj->h * pj->h * kernel_gamma2;
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hig2 || r2 < hjg2) {
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+      }
+
+    } /* loop over the parts in cj. */
+
+  } /* loop over the parts in ci. */
+
+  /* Loop over the parts in cj. */
+  for (int pjd = 0; pjd < count_j; pjd++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pj = &parts_j[pjd];
+    if (!part_is_active(pj, e)) continue;
+    const float hj = pj->h;
+
+    double pjx[3];
+    for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
+    const float hjg2 = hj * hj * kernel_gamma2;
+
+    /* Loop over the parts in ci. */
+    for (int pid = 0; pid < count_i; pid++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pi = &parts_i[pid];
+      const float hig2 = pi->h * pi->h * kernel_gamma2;
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pjx[k] - pi->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Check that particles have been drifted to the current time */
+      if (pj->ti_drift != e->ti_current)
+        error("Particle pj not drifted to current time");
+      if (pi->ti_drift != e->ti_current)
+        error("Particle pi not drifted to current time");
+#endif
+
+      /* Hit or miss? */
+      if (r2 < hjg2 || r2 < hig2) {
+        IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
+      }
+
+    } /* loop over the parts in ci. */
+
+  } /* loop over the parts in cj. */
+
+  TIMER_TOC(TIMER_DOPAIR);
+}
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param parts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param count The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+void DOPAIR_SUBSET_NOSORT(struct runner *r, struct cell *restrict ci,
+                          struct part *restrict parts_i, int *restrict ind,
+                          int count, struct cell *restrict cj) {
+
+  struct engine *e = r->e;
+
+  TIMER_TIC;
+
+  const int count_j = cj->count;
+  struct part *restrict parts_j = cj->parts;
+
+  /* Get the relative distance between the pairs, wrapping. */
+  double shift[3] = {0.0, 0.0, 0.0};
+  for (int k = 0; k < 3; k++) {
+    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
+      shift[k] = e->s->dim[k];
+    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
+      shift[k] = -e->s->dim[k];
+  }
+
+  /* Loop over the parts_i. */
+  for (int pid = 0; pid < count; pid++) {
+
+    /* Get a hold of the ith part in ci. */
+    struct part *restrict pi = &parts_i[ind[pid]];
+    double pix[3];
+    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+    const float hi = pi->h;
+    const float hig2 = hi * hi * kernel_gamma2;
+
+    if (!part_is_active(pi, e))
+      error("Trying to correct smoothing length of inactive particle !");
+
+    /* Loop over the parts in cj. */
+    for (int pjd = 0; pjd < count_j; pjd++) {
+
+      /* Get a pointer to the jth particle. */
+      struct part *restrict pj = &parts_j[pjd];
+
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pix[k] - pj->x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+
+        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
+      }
+    } /* loop over the parts in cj. */
+  }   /* loop over the parts in ci. */
+
+  TIMER_TOC(timer_dopair_subset);
+}
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
new file mode 100644
index 0000000000000000000000000000000000000000..b91d288529c0706693d74b0c54d688ee0944aa29
--- /dev/null
+++ b/src/runner_doiact_vec.c
@@ -0,0 +1,874 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+#include "active.h"
+
+/* This object's header. */
+#include "runner_doiact_vec.h"
+
+#ifdef WITH_VECTORIZATION
+/**
+ * @brief Compute the vector remainder interactions from the secondary cache.
+ *
+ * @param int_cache (return) secondary #cache of interactions between two
+ * particles.
+ * @param icount Interaction count.
+ * @param rhoSum (return) #vector holding the cumulative sum of the density
+ * update on pi.
+ * @param rho_dhSum (return) #vector holding the cumulative sum of the density
+ * gradient update on pi.
+ * @param wcountSum (return) #vector holding the cumulative sum of the wcount
+ * update on pi.
+ * @param wcount_dhSum (return) #vector holding the cumulative sum of the wcount
+ * gradient update on pi.
+ * @param div_vSum (return) #vector holding the cumulative sum of the divergence
+ * update on pi.
+ * @param curlvxSum (return) #vector holding the cumulative sum of the curl of
+ * vx update on pi.
+ * @param curlvySum (return) #vector holding the cumulative sum of the curl of
+ * vy update on pi.
+ * @param curlvzSum (return) #vector holding the cumulative sum of the curl of
+ * vz update on pi.
+ * @param v_hi_inv #vector of 1/h for pi.
+ * @param v_vix #vector of x velocity of pi.
+ * @param v_viy #vector of y velocity of pi.
+ * @param v_viz #vector of z velocity of pi.
+ * @param icount_align (return) Interaction count after the remainder
+ * interactions have been performed, should be a multiple of the vector length.
+ */
+__attribute__((always_inline)) INLINE static void calcRemInteractions(
+    struct c2_cache *const int_cache, const int icount, vector *rhoSum,
+    vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum,
+    vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum,
+    vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz,
+    int *icount_align) {
+
+#ifdef HAVE_AVX512_F
+  KNL_MASK_16 knl_mask, knl_mask2;
+#endif
+  vector int_mask, int_mask2;
+
+  /* Work out the number of remainder interactions and pad secondary cache. */
+  *icount_align = icount;
+  int rem = icount % (NUM_VEC_PROC * VEC_SIZE);
+  if (rem != 0) {
+    int pad = (NUM_VEC_PROC * VEC_SIZE) - rem;
+    *icount_align += pad;
+
+/* Initialise masks to true. */
+#ifdef HAVE_AVX512_F
+    knl_mask = 0xFFFF;
+    knl_mask2 = 0xFFFF;
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+#else
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+#endif
+    /* Pad secondary cache so that there are no contributions in the interaction
+     * function. */
+    for (int i = icount; i < *icount_align; i++) {
+      int_cache->mq[i] = 0.f;
+      int_cache->r2q[i] = 1.f;
+      int_cache->dxq[i] = 0.f;
+      int_cache->dyq[i] = 0.f;
+      int_cache->dzq[i] = 0.f;
+      int_cache->vxq[i] = 0.f;
+      int_cache->vyq[i] = 0.f;
+      int_cache->vzq[i] = 0.f;
+    }
+
+    /* Zero parts of mask that represent the padded values.*/
+    if (pad < VEC_SIZE) {
+#ifdef HAVE_AVX512_F
+      knl_mask2 = knl_mask2 >> pad;
+#else
+      for (int i = VEC_SIZE - pad; i < VEC_SIZE; i++) int_mask2.i[i] = 0;
+#endif
+    } else {
+#ifdef HAVE_AVX512_F
+      knl_mask = knl_mask >> (VEC_SIZE - rem);
+      knl_mask2 = 0;
+#else
+      for (int i = rem; i < VEC_SIZE; i++) int_mask.i[i] = 0;
+      int_mask2.v = vec_setzero();
+#endif
+    }
+
+    /* Perform remainder interaction and remove remainder from aligned
+     * interaction count. */
+    *icount_align = icount - rem;
+    runner_iact_nonsym_2_vec_density(
+        &int_cache->r2q[*icount_align], &int_cache->dxq[*icount_align],
+        &int_cache->dyq[*icount_align], &int_cache->dzq[*icount_align],
+        v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align],
+        &int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align],
+        &int_cache->mq[*icount_align], rhoSum, rho_dhSum, wcountSum,
+        wcount_dhSum, div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask,
+        int_mask2,
+#ifdef HAVE_AVX512_F
+        knl_mask, knl_mask2);
+#else
+        0, 0);
+#endif
+  }
+}
+
+/**
+ * @brief Left-packs the values needed by an interaction into the secondary
+ * cache (Supports AVX, AVX2 and AVX512 instruction sets).
+ *
+ * @param mask Contains which particles need to interact.
+ * @param pjd Index of the particle to store into.
+ * @param v_r2 #vector of the separation between two particles squared.
+ * @param v_dx #vector of the x separation between two particles.
+ * @param v_dy #vector of the y separation between two particles.
+ * @param v_dz #vector of the z separation between two particles.
+ * @param v_mj #vector of the mass of particle pj.
+ * @param v_vjx #vector of x velocity of pj.
+ * @param v_vjy #vector of y velocity of pj.
+ * @param v_vjz #vector of z velocity of pj.
+ * @param cell_cache #cache of all particles in the cell.
+ * @param int_cache (return) secondary #cache of interactions between two
+ * particles.
+ * @param icount Interaction count.
+ * @param rhoSum #vector holding the cumulative sum of the density update on pi.
+ * @param rho_dhSum #vector holding the cumulative sum of the density gradient
+ * update on pi.
+ * @param wcountSum #vector holding the cumulative sum of the wcount update on
+ * pi.
+ * @param wcount_dhSum #vector holding the cumulative sum of the wcount gradient
+ * update on pi.
+ * @param div_vSum #vector holding the cumulative sum of the divergence update
+ * on pi.
+ * @param curlvxSum #vector holding the cumulative sum of the curl of vx update
+ * on pi.
+ * @param curlvySum #vector holding the cumulative sum of the curl of vy update
+ * on pi.
+ * @param curlvzSum #vector holding the cumulative sum of the curl of vz update
+ * on pi.
+ * @param v_hi_inv #vector of 1/h for pi.
+ * @param v_vix #vector of x velocity of pi.
+ * @param v_viy #vector of y velocity of pi.
+ * @param v_viz #vector of z velocity of pi.
+ */
+__attribute__((always_inline)) INLINE static void storeInteractions(
+    const int mask, const int pjd, vector *v_r2, vector *v_dx, vector *v_dy,
+    vector *v_dz, vector *v_mj, vector *v_vjx, vector *v_vjy, vector *v_vjz,
+    const struct cache *const cell_cache, struct c2_cache *const int_cache,
+    int *icount, vector *rhoSum, vector *rho_dhSum, vector *wcountSum,
+    vector *wcount_dhSum, vector *div_vSum, vector *curlvxSum,
+    vector *curlvySum, vector *curlvzSum, vector v_hi_inv, vector v_vix,
+    vector v_viy, vector v_viz) {
+
+/* Left-pack values needed into the secondary cache using the interaction mask.
+ */
+#if defined(HAVE_AVX2) || defined(HAVE_AVX512_F)
+  int pack = 0;
+
+#ifdef HAVE_AVX512_F
+  pack += __builtin_popcount(mask);
+  VEC_LEFT_PACK(v_r2->v, mask, &int_cache->r2q[*icount]);
+  VEC_LEFT_PACK(v_dx->v, mask, &int_cache->dxq[*icount]);
+  VEC_LEFT_PACK(v_dy->v, mask, &int_cache->dyq[*icount]);
+  VEC_LEFT_PACK(v_dz->v, mask, &int_cache->dzq[*icount]);
+  VEC_LEFT_PACK(v_mj->v, mask, &int_cache->mq[*icount]);
+  VEC_LEFT_PACK(v_vjx->v, mask, &int_cache->vxq[*icount]);
+  VEC_LEFT_PACK(v_vjy->v, mask, &int_cache->vyq[*icount]);
+  VEC_LEFT_PACK(v_vjz->v, mask, &int_cache->vzq[*icount]);
+#else
+  vector v_mask;
+  VEC_FORM_PACKED_MASK(mask, v_mask.m, pack);
+
+  VEC_LEFT_PACK(v_r2->v, v_mask.m, &int_cache->r2q[*icount]);
+  VEC_LEFT_PACK(v_dx->v, v_mask.m, &int_cache->dxq[*icount]);
+  VEC_LEFT_PACK(v_dy->v, v_mask.m, &int_cache->dyq[*icount]);
+  VEC_LEFT_PACK(v_dz->v, v_mask.m, &int_cache->dzq[*icount]);
+  VEC_LEFT_PACK(v_mj->v, v_mask.m, &int_cache->mq[*icount]);
+  VEC_LEFT_PACK(v_vjx->v, v_mask.m, &int_cache->vxq[*icount]);
+  VEC_LEFT_PACK(v_vjy->v, v_mask.m, &int_cache->vyq[*icount]);
+  VEC_LEFT_PACK(v_vjz->v, v_mask.m, &int_cache->vzq[*icount]);
+
+#endif /* HAVE_AVX512_F */
+
+  (*icount) += pack;
+#else
+  /* Quicker to do it serially in AVX rather than use intrinsics. */
+  for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) {
+    if (mask & (1 << bit_index)) {
+      /* Add this interaction to the queue. */
+      int_cache->r2q[*icount] = v_r2->f[bit_index];
+      int_cache->dxq[*icount] = v_dx->f[bit_index];
+      int_cache->dyq[*icount] = v_dy->f[bit_index];
+      int_cache->dzq[*icount] = v_dz->f[bit_index];
+      int_cache->mq[*icount] = cell_cache->m[pjd + bit_index];
+      int_cache->vxq[*icount] = cell_cache->vx[pjd + bit_index];
+      int_cache->vyq[*icount] = cell_cache->vy[pjd + bit_index];
+      int_cache->vzq[*icount] = cell_cache->vz[pjd + bit_index];
+
+      (*icount)++;
+    }
+  }
+
+#endif /* defined(HAVE_AVX2) || defined(HAVE_AVX512_F) */
+
+  /* Flush the c2 cache if it has reached capacity. */
+  if (*icount >= (C2_CACHE_SIZE - (NUM_VEC_PROC * VEC_SIZE))) {
+
+    int icount_align = *icount;
+
+    /* Peform remainder interactions. */
+    calcRemInteractions(int_cache, *icount, rhoSum, rho_dhSum, wcountSum,
+                        wcount_dhSum, div_vSum, curlvxSum, curlvySum, curlvzSum,
+                        v_hi_inv, v_vix, v_viy, v_viz, &icount_align);
+
+    vector int_mask, int_mask2;
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+
+    /* Perform interactions. */
+    for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
+      runner_iact_nonsym_2_vec_density(
+          &int_cache->r2q[pjd], &int_cache->dxq[pjd], &int_cache->dyq[pjd],
+          &int_cache->dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
+          &int_cache->vxq[pjd], &int_cache->vyq[pjd], &int_cache->vzq[pjd],
+          &int_cache->mq[pjd], rhoSum, rho_dhSum, wcountSum, wcount_dhSum,
+          div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, int_mask2, 0, 0);
+    }
+
+    /* Reset interaction count. */
+    *icount = 0;
+  }
+}
+#endif /* WITH_VECTORIZATION */
+
+/**
+ * @brief Compute the cell self-interaction (non-symmetric) using vector
+ * intrinsics with one particle pi at a time.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
+__attribute__((always_inline)) INLINE void runner_doself1_density_vec(
+    struct runner *r, struct cell *restrict c) {
+
+#ifdef WITH_VECTORIZATION
+  const struct engine *e = r->e;
+  int doi_mask;
+  struct part *restrict pi;
+  int count_align;
+  int num_vec_proc = NUM_VEC_PROC;
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+
+  vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
+
+  TIMER_TIC
+
+  if (!cell_is_active(c, e)) return;
+
+  if (!cell_is_drifted(c, e)) cell_drift(c, e);
+
+  /* Get the particle cache from the runner and re-allocate
+   * the cache if it is not big enough for the cell. */
+  struct cache *restrict cell_cache = &r->par_cache;
+
+  if (cell_cache->count < count) {
+    cache_init(cell_cache, count);
+  }
+
+  /* Read the particles from the cell and store them locally in the cache. */
+  cache_read_particles(c, cell_cache);
+
+  /* Create secondary cache to store particle interactions. */
+  struct c2_cache int_cache;
+  int icount = 0, icount_align = 0;
+
+  /* Loop over the particles in the cell. */
+  for (int pid = 0; pid < count; pid++) {
+
+    /* Get a pointer to the ith particle. */
+    pi = &parts[pid];
+
+    /* Is the ith particle active? */
+    if (!part_is_active(pi, e)) continue;
+
+    vector pix, piy, piz;
+
+    const float hi = cell_cache->h[pid];
+
+    /* Fill particle pi vectors. */
+    pix.v = vec_set1(cell_cache->x[pid]);
+    piy.v = vec_set1(cell_cache->y[pid]);
+    piz.v = vec_set1(cell_cache->z[pid]);
+    v_hi.v = vec_set1(hi);
+    v_vix.v = vec_set1(cell_cache->vx[pid]);
+    v_viy.v = vec_set1(cell_cache->vy[pid]);
+    v_viz.v = vec_set1(cell_cache->vz[pid]);
+
+    const float hig2 = hi * hi * kernel_gamma2;
+    v_hig2.v = vec_set1(hig2);
+
+    /* Reset cumulative sums of update vectors. */
+    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
+        curlvySum, curlvzSum;
+
+    /* Get the inverse of hi. */
+    vector v_hi_inv;
+
+    v_hi_inv = vec_reciprocal(v_hi);
+
+    rhoSum.v = vec_setzero();
+    rho_dhSum.v = vec_setzero();
+    wcountSum.v = vec_setzero();
+    wcount_dhSum.v = vec_setzero();
+    div_vSum.v = vec_setzero();
+    curlvxSum.v = vec_setzero();
+    curlvySum.v = vec_setzero();
+    curlvzSum.v = vec_setzero();
+
+    /* Pad cache if there is a serial remainder. */
+    count_align = count;
+    int rem = count % (num_vec_proc * VEC_SIZE);
+    if (rem != 0) {
+      int pad = (num_vec_proc * VEC_SIZE) - rem;
+
+      count_align += pad;
+      /* Set positions to the same as particle pi so when the r2 > 0 mask is
+       * applied these extra contributions are masked out.*/
+      for (int i = count; i < count_align; i++) {
+        cell_cache->x[i] = pix.f[0];
+        cell_cache->y[i] = piy.f[0];
+        cell_cache->z[i] = piz.f[0];
+      }
+    }
+
+    vector pjx, pjy, pjz;
+    vector pjvx, pjvy, pjvz, mj;
+    vector pjx2, pjy2, pjz2;
+    vector pjvx2, pjvy2, pjvz2, mj2;
+
+    /* Find all of particle pi's interacions and store needed values in the
+     * secondary cache.*/
+    for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
+
+      /* Load 2 sets of vectors from the particle cache. */
+      pjx.v = vec_load(&cell_cache->x[pjd]);
+      pjy.v = vec_load(&cell_cache->y[pjd]);
+      pjz.v = vec_load(&cell_cache->z[pjd]);
+      pjvx.v = vec_load(&cell_cache->vx[pjd]);
+      pjvy.v = vec_load(&cell_cache->vy[pjd]);
+      pjvz.v = vec_load(&cell_cache->vz[pjd]);
+      mj.v = vec_load(&cell_cache->m[pjd]);
+
+      pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]);
+      pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]);
+      pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]);
+      pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]);
+      pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]);
+      pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]);
+      mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]);
+
+      /* Compute the pairwise distance. */
+      vector v_dx_tmp, v_dy_tmp, v_dz_tmp;
+      vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2;
+
+      v_dx_tmp.v = vec_sub(pix.v, pjx.v);
+      v_dy_tmp.v = vec_sub(piy.v, pjy.v);
+      v_dz_tmp.v = vec_sub(piz.v, pjz.v);
+      v_dx_tmp2.v = vec_sub(pix.v, pjx2.v);
+      v_dy_tmp2.v = vec_sub(piy.v, pjy2.v);
+      v_dz_tmp2.v = vec_sub(piz.v, pjz2.v);
+
+      v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v);
+      v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v);
+      v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v);
+      v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v);
+      v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v);
+      v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v);
+
+/* Form a mask from r2 < hig2 and r2 > 0.*/
+#ifdef HAVE_AVX512_F
+      // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check;
+      KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check;
+
+      doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero());
+      doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v);
+
+      doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero());
+      doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v);
+
+      doi_mask = doi_mask & doi_mask_check;
+      doi_mask2 = doi_mask2 & doi_mask2_check;
+
+#else
+      vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check;
+      int doi_mask2;
+
+      /* Form r2 > 0 mask and r2 < hig2 mask. */
+      v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero());
+      v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v);
+
+      /* Form r2 > 0 mask and r2 < hig2 mask. */
+      v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero());
+      v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v);
+
+      /* Combine two masks and form integer mask. */
+      doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v));
+      doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v));
+#endif /* HAVE_AVX512_F */
+
+      /* If there are any interactions left pack interaction values into c2
+       * cache. */
+      if (doi_mask) {
+        storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp,
+                          &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache,
+                          &icount, &rhoSum, &rho_dhSum, &wcountSum,
+                          &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum,
+                          &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
+      }
+      if (doi_mask2) {
+        storeInteractions(
+            doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2, &v_dy_tmp2,
+            &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, cell_cache, &int_cache,
+            &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum,
+            &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
+      }
+    }
+
+    /* Perform padded vector remainder interactions if any are present. */
+    calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum,
+                        &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum,
+                        &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
+                        &icount_align);
+
+    /* Initialise masks to true in case remainder interactions have been
+     * performed. */
+    vector int_mask, int_mask2;
+#ifdef HAVE_AVX512_F
+    KNL_MASK_16 knl_mask = 0xFFFF;
+    KNL_MASK_16 knl_mask2 = 0xFFFF;
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+#else
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+#endif
+
+    /* Perform interaction with 2 vectors. */
+    for (int pjd = 0; pjd < icount_align; pjd += (num_vec_proc * VEC_SIZE)) {
+      runner_iact_nonsym_2_vec_density(
+          &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
+          &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
+          &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
+          &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum,
+          &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2,
+#ifdef HAVE_AVX512_F
+          knl_mask, knl_mask2);
+#else
+          0, 0);
+#endif
+    }
+
+    /* Perform horizontal adds on vector sums and store result in particle pi.
+     */
+    VEC_HADD(rhoSum, pi->rho);
+    VEC_HADD(rho_dhSum, pi->density.rho_dh);
+    VEC_HADD(wcountSum, pi->density.wcount);
+    VEC_HADD(wcount_dhSum, pi->density.wcount_dh);
+    VEC_HADD(div_vSum, pi->density.div_v);
+    VEC_HADD(curlvxSum, pi->density.rot_v[0]);
+    VEC_HADD(curlvySum, pi->density.rot_v[1]);
+    VEC_HADD(curlvzSum, pi->density.rot_v[2]);
+
+    /* Reset interaction count. */
+    icount = 0;
+  } /* loop over all particles. */
+
+  TIMER_TOC(timer_doself_density);
+#endif /* WITH_VECTORIZATION */
+}
+
+/**
+ * @brief Compute the cell self-interaction (non-symmetric) using vector
+ * intrinsics with two particle pis at a time.
+ *
+ * CURRENTLY BROKEN DO NOT USE.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
+__attribute__((always_inline)) INLINE void runner_doself1_density_vec_2(
+    struct runner *r, struct cell *restrict c) {
+
+#ifdef WITH_VECTORIZATION
+  const struct engine *e = r->e;
+  int doi_mask;
+  int doi2_mask;
+  struct part *restrict pi;
+  struct part *restrict pi2;
+  int count_align;
+
+  vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
+  vector v_hi2, v_vix2, v_viy2, v_viz2, v_hig2_2, v2_r2;
+
+  TIMER_TIC
+
+  if (!cell_is_active(c, e)) return;
+
+  if (!cell_is_drifted(c, e)) cell_drift(c, e);
+
+  /* TODO: Need to find two active particles, not just one. */
+
+  struct part *restrict parts = c->parts;
+  const int count = c->count;
+
+  /* Get the particle cache from the runner and re-allocate
+   * the cache if it is not big enough for the cell. */
+  struct cache *restrict cell_cache = &r->par_cache;
+
+  if (cell_cache->count < count) {
+    cache_init(cell_cache, count);
+  }
+
+  /* Read the particles from the cell and store them locally in the cache. */
+  cache_read_particles(c, &r->par_cache);
+
+  /* Create two secondary caches. */
+  int icount = 0, icount_align = 0;
+  struct c2_cache int_cache;
+
+  int icount2 = 0, icount_align2 = 0;
+  struct c2_cache int_cache2;
+
+  /* Loop over the particles in the cell. */
+  for (int pid = 0; pid < count; pid += 2) {
+
+    /* Get a pointer to the ith particle and next i particle. */
+    pi = &parts[pid];
+    pi2 = &parts[pid + 1];
+
+    /* Is the ith particle active? */
+    if (!part_is_active(pi, e)) continue;
+
+    vector pix, piy, piz;
+    vector pix2, piy2, piz2;
+
+    const float hi = cell_cache->h[pid];
+    const float hi2 = cell_cache->h[pid + 1];
+
+    /* Fill pi position vector. */
+    pix.v = vec_set1(cell_cache->x[pid]);
+    piy.v = vec_set1(cell_cache->y[pid]);
+    piz.v = vec_set1(cell_cache->z[pid]);
+    v_hi.v = vec_set1(hi);
+    v_vix.v = vec_set1(cell_cache->vx[pid]);
+    v_viy.v = vec_set1(cell_cache->vy[pid]);
+    v_viz.v = vec_set1(cell_cache->vz[pid]);
+
+    pix2.v = vec_set1(cell_cache->x[pid + 1]);
+    piy2.v = vec_set1(cell_cache->y[pid + 1]);
+    piz2.v = vec_set1(cell_cache->z[pid + 1]);
+    v_hi2.v = vec_set1(hi2);
+    v_vix2.v = vec_set1(cell_cache->vx[pid + 1]);
+    v_viy2.v = vec_set1(cell_cache->vy[pid + 1]);
+    v_viz2.v = vec_set1(cell_cache->vz[pid + 1]);
+
+    const float hig2 = hi * hi * kernel_gamma2;
+    const float hig2_2 = hi2 * hi2 * kernel_gamma2;
+    v_hig2.v = vec_set1(hig2);
+    v_hig2_2.v = vec_set1(hig2_2);
+
+    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
+        curlvySum, curlvzSum;
+    vector rhoSum2, rho_dhSum2, wcountSum2, wcount_dhSum2, div_vSum2,
+        curlvxSum2, curlvySum2, curlvzSum2;
+
+    vector v_hi_inv, v_hi_inv2;
+
+    v_hi_inv = vec_reciprocal(v_hi);
+    v_hi_inv2 = vec_reciprocal(v_hi2);
+
+    rhoSum.v = vec_setzero();
+    rho_dhSum.v = vec_setzero();
+    wcountSum.v = vec_setzero();
+    wcount_dhSum.v = vec_setzero();
+    div_vSum.v = vec_setzero();
+    curlvxSum.v = vec_setzero();
+    curlvySum.v = vec_setzero();
+    curlvzSum.v = vec_setzero();
+
+    rhoSum2.v = vec_setzero();
+    rho_dhSum2.v = vec_setzero();
+    wcountSum2.v = vec_setzero();
+    wcount_dhSum2.v = vec_setzero();
+    div_vSum2.v = vec_setzero();
+    curlvxSum2.v = vec_setzero();
+    curlvySum2.v = vec_setzero();
+    curlvzSum2.v = vec_setzero();
+
+    /* Pad cache if there is a serial remainder. */
+    count_align = count;
+    int rem = count % (NUM_VEC_PROC * VEC_SIZE);
+    if (rem != 0) {
+      int pad = (NUM_VEC_PROC * VEC_SIZE) - rem;
+
+      count_align += pad;
+      /* Set positions to the same as particle pi so when the r2 > 0 mask is
+       * applied these extra contributions are masked out.*/
+      for (int i = count; i < count_align; i++) {
+        cell_cache->x[i] = pix.f[0];
+        cell_cache->y[i] = piy.f[0];
+        cell_cache->z[i] = piz.f[0];
+      }
+    }
+
+    vector pjx, pjy, pjz;
+    vector pjvx, pjvy, pjvz, mj;
+    vector pjx2, pjy2, pjz2;
+    vector pjvx2, pjvy2, pjvz2, mj2;
+
+    /* Find all of particle pi's interacions and store needed values in
+     * secondary cache.*/
+    for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
+
+      /* Load 2 sets of vectors from the particle cache. */
+      pjx.v = vec_load(&cell_cache->x[pjd]);
+      pjy.v = vec_load(&cell_cache->y[pjd]);
+      pjz.v = vec_load(&cell_cache->z[pjd]);
+      pjvx.v = vec_load(&cell_cache->vx[pjd]);
+      pjvy.v = vec_load(&cell_cache->vy[pjd]);
+      pjvz.v = vec_load(&cell_cache->vz[pjd]);
+      mj.v = vec_load(&cell_cache->m[pjd]);
+
+      pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]);
+      pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]);
+      pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]);
+      pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]);
+      pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]);
+      pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]);
+      mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]);
+
+      /* Compute the pairwise distance. */
+      vector v_dx_tmp, v_dy_tmp, v_dz_tmp;
+      vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2;
+      vector v_dx2_tmp, v_dy2_tmp, v_dz2_tmp;
+      vector v_dx2_tmp2, v_dy2_tmp2, v_dz2_tmp2, v2_r2_2;
+
+      v_dx_tmp.v = vec_sub(pix.v, pjx.v);
+      v_dy_tmp.v = vec_sub(piy.v, pjy.v);
+      v_dz_tmp.v = vec_sub(piz.v, pjz.v);
+      v_dx_tmp2.v = vec_sub(pix.v, pjx2.v);
+      v_dy_tmp2.v = vec_sub(piy.v, pjy2.v);
+      v_dz_tmp2.v = vec_sub(piz.v, pjz2.v);
+
+      v_dx2_tmp.v = vec_sub(pix2.v, pjx.v);
+      v_dy2_tmp.v = vec_sub(piy2.v, pjy.v);
+      v_dz2_tmp.v = vec_sub(piz2.v, pjz.v);
+      v_dx2_tmp2.v = vec_sub(pix2.v, pjx2.v);
+      v_dy2_tmp2.v = vec_sub(piy2.v, pjy2.v);
+      v_dz2_tmp2.v = vec_sub(piz2.v, pjz2.v);
+
+      v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v);
+      v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v);
+      v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v);
+      v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v);
+      v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v);
+      v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v);
+
+      v2_r2.v = vec_mul(v_dx2_tmp.v, v_dx2_tmp.v);
+      v2_r2.v = vec_fma(v_dy2_tmp.v, v_dy2_tmp.v, v2_r2.v);
+      v2_r2.v = vec_fma(v_dz2_tmp.v, v_dz2_tmp.v, v2_r2.v);
+      v2_r2_2.v = vec_mul(v_dx2_tmp2.v, v_dx2_tmp2.v);
+      v2_r2_2.v = vec_fma(v_dy2_tmp2.v, v_dy2_tmp2.v, v2_r2_2.v);
+      v2_r2_2.v = vec_fma(v_dz2_tmp2.v, v_dz2_tmp2.v, v2_r2_2.v);
+
+/* Form a mask from r2 < hig2 and r2 > 0.*/
+#ifdef HAVE_AVX512_F
+      // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check;
+      KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check;
+      KNL_MASK_16 doi2_mask_check, doi2_mask2, doi2_mask2_check;
+
+      doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero());
+      doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v);
+
+      doi2_mask_check = vec_cmp_gt(v2_r2.v, vec_setzero());
+      doi2_mask = vec_cmp_lt(v2_r2.v, v_hig2_2.v);
+
+      doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero());
+      doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v);
+
+      doi2_mask2_check = vec_cmp_gt(v2_r2_2.v, vec_setzero());
+      doi2_mask2 = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v);
+
+      doi_mask = doi_mask & doi_mask_check;
+      doi_mask2 = doi_mask2 & doi_mask2_check;
+
+      doi2_mask = doi2_mask & doi2_mask_check;
+      doi2_mask2 = doi2_mask2 & doi2_mask2_check;
+#else
+      vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check;
+      int doi_mask2;
+
+      vector v_doi2_mask, v_doi2_mask_check, v_doi2_mask2, v_doi2_mask2_check;
+      int doi2_mask2;
+
+      v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero());
+      v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v);
+
+      v_doi2_mask_check.v = vec_cmp_gt(v2_r2.v, vec_setzero());
+      v_doi2_mask.v = vec_cmp_lt(v2_r2.v, v_hig2_2.v);
+
+      v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero());
+      v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v);
+
+      v_doi2_mask2_check.v = vec_cmp_gt(v2_r2_2.v, vec_setzero());
+      v_doi2_mask2.v = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v);
+
+      doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v));
+      doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v));
+      doi2_mask = vec_cmp_result(vec_and(v_doi2_mask.v, v_doi2_mask_check.v));
+      doi2_mask2 =
+          vec_cmp_result(vec_and(v_doi2_mask2.v, v_doi2_mask2_check.v));
+#endif /* HAVE_AVX512_F */
+
+      /* Hit or miss? */
+      // if (doi_mask) {
+      storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp,
+                        &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache,
+                        &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum,
+                        &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv,
+                        v_vix, v_viy, v_viz);
+      //}
+      // if (doi2_mask) {
+      storeInteractions(
+          doi2_mask, pjd, &v2_r2, &v_dx2_tmp, &v_dy2_tmp, &v_dz2_tmp, &mj,
+          &pjvx, &pjvy, &pjvz, cell_cache, &int_cache2, &icount2, &rhoSum2,
+          &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2,
+          &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, v_viz2);
+      //}
+      /* Hit or miss? */
+      // if (doi_mask2) {
+      storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2,
+                        &v_dy_tmp2, &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2,
+                        cell_cache, &int_cache, &icount, &rhoSum, &rho_dhSum,
+                        &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum,
+                        &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
+      //}
+      // if (doi2_mask2) {
+      storeInteractions(doi2_mask2, pjd + VEC_SIZE, &v2_r2_2, &v_dx2_tmp2,
+                        &v_dy2_tmp2, &v_dz2_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2,
+                        cell_cache, &int_cache2, &icount2, &rhoSum2,
+                        &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2,
+                        &curlvxSum2, &curlvySum2, &curlvzSum2, v_hi_inv2,
+                        v_vix2, v_viy2, v_viz2);
+      //}
+    }
+
+    /* Perform padded vector remainder interactions if any are present. */
+    calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum,
+                        &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum,
+                        &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
+                        &icount_align);
+
+    calcRemInteractions(&int_cache2, icount2, &rhoSum2, &rho_dhSum2,
+                        &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2,
+                        &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2,
+                        v_viz2, &icount_align2);
+
+    /* Initialise masks to true incase remainder interactions have been
+     * performed. */
+    vector int_mask, int_mask2;
+    vector int2_mask, int2_mask2;
+#ifdef HAVE_AVX512_F
+    KNL_MASK_16 knl_mask = 0xFFFF;
+    KNL_MASK_16 knl_mask2 = 0xFFFF;
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+    int2_mask.m = vec_setint1(0xFFFFFFFF);
+    int2_mask2.m = vec_setint1(0xFFFFFFFF);
+#else
+    int_mask.m = vec_setint1(0xFFFFFFFF);
+    int_mask2.m = vec_setint1(0xFFFFFFFF);
+
+    int2_mask.m = vec_setint1(0xFFFFFFFF);
+    int2_mask2.m = vec_setint1(0xFFFFFFFF);
+#endif
+
+    /* Perform interaction with 2 vectors. */
+    for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
+      runner_iact_nonsym_2_vec_density(
+          &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
+          &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
+          &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
+          &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum,
+          &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2,
+#ifdef HAVE_AVX512_F
+          knl_mask, knl_mask2);
+#else
+          0, 0);
+#endif
+    }
+
+    for (int pjd = 0; pjd < icount_align2; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
+      runner_iact_nonsym_2_vec_density(
+          &int_cache2.r2q[pjd], &int_cache2.dxq[pjd], &int_cache2.dyq[pjd],
+          &int_cache2.dzq[pjd], v_hi_inv2, v_vix2, v_viy2, v_viz2,
+          &int_cache2.vxq[pjd], &int_cache2.vyq[pjd], &int_cache2.vzq[pjd],
+          &int_cache2.mq[pjd], &rhoSum2, &rho_dhSum2, &wcountSum2,
+          &wcount_dhSum2, &div_vSum2, &curlvxSum2, &curlvySum2, &curlvzSum2,
+          int2_mask, int2_mask2,
+#ifdef HAVE_AVX512_F
+          knl_mask, knl_mask2);
+#else
+          0, 0);
+#endif
+    }
+    /* Perform horizontal adds on vector sums and store result in particle pi.
+     */
+    VEC_HADD(rhoSum, pi->rho);
+    VEC_HADD(rho_dhSum, pi->density.rho_dh);
+    VEC_HADD(wcountSum, pi->density.wcount);
+    VEC_HADD(wcount_dhSum, pi->density.wcount_dh);
+    VEC_HADD(div_vSum, pi->density.div_v);
+    VEC_HADD(curlvxSum, pi->density.rot_v[0]);
+    VEC_HADD(curlvySum, pi->density.rot_v[1]);
+    VEC_HADD(curlvzSum, pi->density.rot_v[2]);
+
+    VEC_HADD(rhoSum2, pi2->rho);
+    VEC_HADD(rho_dhSum2, pi2->density.rho_dh);
+    VEC_HADD(wcountSum2, pi2->density.wcount);
+    VEC_HADD(wcount_dhSum2, pi2->density.wcount_dh);
+    VEC_HADD(div_vSum2, pi2->density.div_v);
+    VEC_HADD(curlvxSum2, pi2->density.rot_v[0]);
+    VEC_HADD(curlvySum2, pi2->density.rot_v[1]);
+    VEC_HADD(curlvzSum2, pi2->density.rot_v[2]);
+
+    /* Reset interaction count. */
+    icount = 0;
+    icount2 = 0;
+  } /* loop over all particles. */
+
+  TIMER_TOC(timer_doself_density);
+#endif /* WITH_VECTORIZATION */
+}
diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_vec.h
new file mode 100644
index 0000000000000000000000000000000000000000..9bb24f12cedf03ec49a5a03f92d308f92d49aa54
--- /dev/null
+++ b/src/runner_doiact_vec.h
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_RUNNER_VEC_H
+#define SWIFT_RUNNER_VEC_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers */
+#include "cell.h"
+#include "engine.h"
+#include "hydro.h"
+#include "part.h"
+#include "runner.h"
+#include "timers.h"
+#include "vector.h"
+
+/* Function prototypes. */
+void runner_doself1_density_vec(struct runner *r, struct cell *restrict c);
+void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c);
+
+#endif /* SWIFT_RUNNER_VEC_H */
diff --git a/src/scheduler.c b/src/scheduler.c
index 0d7c8c4754bac931c7886200176e3e9441c63c53..f98c1082afbf7ec029a7556e36eb9d18ed37bd0a 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -132,9 +132,14 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
 
     /* Non-splittable task? */
     if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) ||
-        ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) ||
+        ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) ||
+        ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) ||
+        ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) ||
+        ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID) ||
         ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) {
       t->type = task_type_none;
+      t->subtype = task_subtype_none;
+      t->cj = NULL;
       t->skip = 1;
       break;
     }
@@ -214,7 +219,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) {
       /* Get the sort ID, use space_getsid and not t->flags
          to make sure we get ci and cj swapped if needed. */
       double shift[3];
-      int sid = space_getsid(s->space, &ci, &cj, shift);
+      const int sid = space_getsid(s->space, &ci, &cj, shift);
 
       /* Should this task be split-up? */
       if (ci->split && cj->split &&
@@ -690,6 +695,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
                                enum task_subtypes subtype, int flags, int wait,
                                struct cell *ci, struct cell *cj, int tight) {
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (ci == NULL && cj != NULL)
+    error("Added a task with ci==NULL and cj!=NULL type=%s/%s",
+          taskID_names[type], subtaskID_names[subtype]);
+#endif
+
   /* Get the next free task. */
   const int ind = atomic_inc(&s->tasks_next);
 
@@ -782,7 +793,10 @@ void scheduler_set_unlocks(struct scheduler *s) {
     for (int i = 0; i < t->nr_unlock_tasks; i++) {
       for (int j = i + 1; j < t->nr_unlock_tasks; j++) {
         if (t->unlock_tasks[i] == t->unlock_tasks[j])
-          error("duplicate unlock!");
+          error("duplicate unlock! t->type=%s/%s unlocking type=%s/%s",
+                taskID_names[t->type], subtaskID_names[t->subtype],
+                taskID_names[t->unlock_tasks[i]->type],
+                subtaskID_names[t->unlock_tasks[i]->subtype]);
       }
     }
   }
@@ -959,7 +973,16 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
       case task_type_ghost:
         if (t->ci == t->ci->super) cost = wscale * t->ci->count;
         break;
-      case task_type_kick:
+      case task_type_drift:
+        cost = wscale * t->ci->count;
+        break;
+      case task_type_kick1:
+        cost = wscale * t->ci->count;
+        break;
+      case task_type_kick2:
+        cost = wscale * t->ci->count;
+        break;
+      case task_type_timestep:
         cost = wscale * t->ci->count;
         break;
       case task_type_init:
@@ -1052,7 +1075,7 @@ void scheduler_start(struct scheduler *s) {
 /* Check we have not missed an active task */
 #ifdef SWIFT_DEBUG_CHECKS
 
-  const int ti_current = s->space->e->ti_current;
+  const integertime_t ti_current = s->space->e->ti_current;
 
   if (ti_current > 0) {
 
@@ -1062,13 +1085,24 @@ void scheduler_start(struct scheduler *s) {
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
 
-      if (cj == NULL) { /* self */
+      if (t->type == task_type_none) continue;
+
+      /* Don't check MPI stuff */
+      if (t->type == task_type_send || t->type == task_type_recv) continue;
+
+      if (ci == NULL && cj == NULL) {
+
+        if (t->type != task_type_grav_gather_m && t->type != task_type_grav_fft)
+          error("Task not associated with cells!");
+
+      } else if (cj == NULL) { /* self */
 
         if (ci->ti_end_min == ti_current && t->skip &&
-            t->type != task_type_sort)
+            t->type != task_type_sort && t->type)
           error(
-              "Task (type='%s/%s') should not have been skipped ti_current=%d "
-              "c->ti_end_min=%d",
+              "Task (type='%s/%s') should not have been skipped "
+              "ti_current=%lld "
+              "c->ti_end_min=%lld",
               taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
               ci->ti_end_min);
 
@@ -1076,20 +1110,26 @@ void scheduler_start(struct scheduler *s) {
         if (ci->ti_end_min == ti_current && t->skip &&
             t->type == task_type_sort && t->flags == 0)
           error(
-              "Task (type='%s/%s') should not have been skipped ti_current=%d "
-              "c->ti_end_min=%d t->flags=%d",
+              "Task (type='%s/%s') should not have been skipped "
+              "ti_current=%lld "
+              "c->ti_end_min=%lld t->flags=%d",
               taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
               ci->ti_end_min, t->flags);
 
       } else { /* pair */
 
-        if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) &&
-            t->skip)
-          error(
-              "Task (type='%s/%s') should not have been skipped ti_current=%d "
-              "ci->ti_end_min=%d cj->ti_end_min=%d",
-              taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
-              ci->ti_end_min, cj->ti_end_min);
+        if (t->skip) {
+
+          /* Check that the pair is active if the local cell is active */
+          if ((ci->ti_end_min == ti_current && ci->nodeID == engine_rank) ||
+              (cj->ti_end_min == ti_current && cj->nodeID == engine_rank))
+            error(
+                "Task (type='%s/%s') should not have been skipped "
+                "ti_current=%lld "
+                "ci->ti_end_min=%lld cj->ti_end_min=%lld",
+                taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
+                ci->ti_end_min, cj->ti_end_min);
+        }
       }
     }
   }
@@ -1137,7 +1177,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
   /* Otherwise, look for a suitable queue. */
   else {
 #ifdef WITH_MPI
-    int err;
+    int err = MPI_SUCCESS;
 #endif
 
     /* Find the previous owner for each task type, and do
@@ -1147,7 +1187,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_sub_self:
       case task_type_sort:
       case task_type_ghost:
-      case task_type_kick:
+      case task_type_kick1:
+      case task_type_kick2:
+      case task_type_drift:
+      case task_type_timestep:
       case task_type_init:
         qid = t->ci->super->owner;
         break;
@@ -1161,19 +1204,29 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_recv:
 #ifdef WITH_MPI
         if (t->subtype == task_subtype_tend) {
-          t->buff = malloc(sizeof(int) * t->ci->pcell_size);
-          err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID,
-                          t->flags, MPI_COMM_WORLD, &t->req);
-        } else {
+          t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size);
+          err = MPI_Irecv(t->buff, t->ci->pcell_size * sizeof(integertime_t),
+                          MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD,
+                          &t->req);
+        } else if (t->subtype == task_subtype_xv ||
+                   t->subtype == task_subtype_rho) {
           err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
                           t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          // message( "receiving %i parts with tag=%i from %i to %i." ,
+          //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
+          // fflush(stdout);
+        } else if (t->subtype == task_subtype_gpart) {
+          err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
+                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else if (t->subtype == task_subtype_spart) {
+          err = MPI_Irecv(t->ci->sparts, t->ci->scount, spart_mpi_type,
+                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else {
+          error("Unknown communication sub-type");
         }
         if (err != MPI_SUCCESS) {
           mpi_error(err, "Failed to emit irecv for particle data.");
         }
-        // message( "receiving %i parts with tag=%i from %i to %i." ,
-        //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
-        // fflush(stdout);
         qid = 1 % s->nr_queues;
 #else
         error("SWIFT was not compiled with MPI support.");
@@ -1182,20 +1235,35 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
       case task_type_send:
 #ifdef WITH_MPI
         if (t->subtype == task_subtype_tend) {
-          t->buff = malloc(sizeof(int) * t->ci->pcell_size);
+          t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size);
           cell_pack_ti_ends(t->ci, t->buff);
-          err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID,
-                          t->flags, MPI_COMM_WORLD, &t->req);
-        } else {
+          err = MPI_Isend(t->buff, t->ci->pcell_size * sizeof(integertime_t),
+                          MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD,
+                          &t->req);
+        } else if (t->subtype == task_subtype_xv ||
+                   t->subtype == task_subtype_rho) {
+#ifdef SWIFT_DEBUG_CHECKS
+          for (int k = 0; k < t->ci->count; k++)
+            if (t->ci->parts[k].ti_drift != s->space->e->ti_current)
+              error("Sending un-drifted particle !");
+#endif
           err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
                           t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          // message( "sending %i parts with tag=%i from %i to %i." ,
+          //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
+          // fflush(stdout);
+        } else if (t->subtype == task_subtype_gpart) {
+          err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type,
+                          t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else if (t->subtype == task_subtype_spart) {
+          err = MPI_Isend(t->ci->sparts, t->ci->scount, spart_mpi_type,
+                          t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        } else {
+          error("Unknown communication sub-type");
         }
         if (err != MPI_SUCCESS) {
           mpi_error(err, "Failed to emit isend for particle data.");
         }
-        // message( "sending %i parts with tag=%i from %i to %i." ,
-        //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
-        // fflush(stdout);
         qid = 0;
 #else
         error("SWIFT was not compiled with MPI support.");
@@ -1408,8 +1476,8 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
   lock_init(&s->lock);
 
   /* Allocate the queues. */
-  if ((s->queues = (struct queue *)malloc(sizeof(struct queue) * nr_queues)) ==
-      NULL)
+  if (posix_memalign((void **)&s->queues, queue_struct_align,
+                     sizeof(struct queue) * nr_queues) != 0)
     error("Failed to allocate queues.");
 
   /* Initialize each queue. */
diff --git a/src/serial_io.c b/src/serial_io.c
index b9ad0fbaa856a889d3f84bb42013282f3640fd5e..eaf5541992981463213db9685290fa9f624a4130 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -46,6 +46,7 @@
 #include "io_properties.h"
 #include "kernel_hydro.h"
 #include "part.h"
+#include "stars_io.h"
 #include "units.h"
 
 /*-----------------------------------------------------------------------------
@@ -397,11 +398,16 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  * @param dim (output) The dimension of the volume read from the file.
  * @param parts (output) The array of #part (gas particles) read from the file.
  * @param gparts (output) The array of #gpart read from the file.
+ * @param sparts (output) Array of #spart particles.
  * @param Ngas (output) The number of #part read from the file on that node.
  * @param Ngparts (output) The number of #gpart read from the file on that node.
+ * @param Nstars (output) The number of #spart read from the file on that node.
  * @param periodic (output) 1 if the volume is periodic, 0 if not.
  * @param flag_entropy (output) 1 if the ICs contained Entropy in the
  * InternalEnergy field
+ * @param with_hydro Are we reading gas particles ?
+ * @param with_gravity Are we reading/creating #gpart arrays ?
+ * @param with_stars Are we reading star particles ?
  * @param mpi_rank The MPI rank of this node
  * @param mpi_size The number of MPI ranks
  * @param comm The MPI communicator
@@ -418,19 +424,23 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  */
 void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
                     double dim[3], struct part** parts, struct gpart** gparts,
-                    size_t* Ngas, size_t* Ngparts, int* periodic,
-                    int* flag_entropy, int mpi_rank, int mpi_size,
-                    MPI_Comm comm, MPI_Info info, int dry_run) {
+                    struct spart** sparts, size_t* Ngas, size_t* Ngparts,
+                    size_t* Nstars, int* periodic, int* flag_entropy,
+                    int with_hydro, int with_gravity, int with_stars,
+                    int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info,
+                    int dry_run) {
+
   hid_t h_file = 0, h_grp = 0;
   /* GADGET has only cubic boxes (in cosmological mode) */
   double boxSize[3] = {0.0, -1.0, -1.0};
   /* GADGET has 6 particle types. We only keep the type 0 & 1 for now*/
-  int numParticles[NUM_PARTICLE_TYPES] = {0};
-  int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
+  long long numParticles[NUM_PARTICLE_TYPES] = {0};
+  long long numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
   size_t N[NUM_PARTICLE_TYPES] = {0};
   long long N_total[NUM_PARTICLE_TYPES] = {0};
   long long offset[NUM_PARTICLE_TYPES] = {0};
   int dimension = 3; /* Assume 3D if nothing is specified */
+  size_t Ndm = 0;
   struct UnitSystem* ic_units = malloc(sizeof(struct UnitSystem));
 
   /* First read some information about the content */
@@ -472,12 +482,13 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
     readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
     *flag_entropy = flag_entropy_temp[0];
     readAttribute(h_grp, "BoxSize", DOUBLE, boxSize);
-    readAttribute(h_grp, "NumPart_Total", UINT, numParticles);
-    readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord);
+    readAttribute(h_grp, "NumPart_Total", LONGLONG, numParticles);
+    readAttribute(h_grp, "NumPart_Total_HighWord", LONGLONG,
+                  numParticles_highWord);
 
     for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
-      N_total[ptype] = ((long long)numParticles[ptype]) +
-                       ((long long)numParticles_highWord[ptype] << 32);
+      N_total[ptype] =
+          (numParticles[ptype]) + (numParticles_highWord[ptype] << 32);
 
     dim[0] = boxSize[0];
     dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
@@ -536,7 +547,7 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
   /* Now need to broadcast that information to all ranks. */
   MPI_Bcast(flag_entropy, 1, MPI_INT, 0, comm);
   MPI_Bcast(periodic, 1, MPI_INT, 0, comm);
-  MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm);
+  MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG_INT, 0, comm);
   MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm);
   MPI_Bcast(ic_units, sizeof(struct UnitSystem), MPI_BYTE, 0, comm);
 
@@ -547,19 +558,32 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
   }
 
   /* Allocate memory to store SPH particles */
-  *Ngas = N[0];
-  if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) !=
-      0)
-    error("Error while allocating memory for particles");
-  bzero(*parts, *Ngas * sizeof(struct part));
-
-  /* Allocate memory to store all particles */
-  const size_t Ndm = N[1];
-  *Ngparts = N[1] + N[0];
-  if (posix_memalign((void*)gparts, gpart_align,
-                     *Ngparts * sizeof(struct gpart)) != 0)
-    error("Error while allocating memory for gravity particles");
-  bzero(*gparts, *Ngparts * sizeof(struct gpart));
+  if (with_hydro) {
+    *Ngas = N[0];
+    if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) !=
+        0)
+      error("Error while allocating memory for SPH particles");
+    bzero(*parts, *Ngas * sizeof(struct part));
+  }
+
+  /* Allocate memory to store star particles */
+  if (with_stars) {
+    *Nstars = N[STAR];
+    if (posix_memalign((void*)sparts, spart_align,
+                       *Nstars * sizeof(struct spart)) != 0)
+      error("Error while allocating memory for star particles");
+    bzero(*sparts, *Nstars * sizeof(struct spart));
+  }
+
+  /* Allocate memory to store all gravity  particles */
+  if (with_gravity) {
+    Ndm = N[1];
+    *Ngparts = (with_hydro ? N[GAS] : 0) + N[DM] + (with_stars ? N[STAR] : 0);
+    if (posix_memalign((void*)gparts, gpart_align,
+                       *Ngparts * sizeof(struct gpart)) != 0)
+      error("Error while allocating memory for gravity particles");
+    bzero(*gparts, *Ngparts * sizeof(struct gpart));
+  }
 
   /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / */
   /* 	  (1024.*1024.)); */
@@ -602,13 +626,24 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
         switch (ptype) {
 
           case GAS:
-            Nparticles = *Ngas;
-            hydro_read_particles(*parts, list, &num_fields);
+            if (with_hydro) {
+              Nparticles = *Ngas;
+              hydro_read_particles(*parts, list, &num_fields);
+            }
             break;
 
           case DM:
-            Nparticles = Ndm;
-            darkmatter_read_particles(*gparts, list, &num_fields);
+            if (with_gravity) {
+              Nparticles = Ndm;
+              darkmatter_read_particles(*gparts, list, &num_fields);
+            }
+            break;
+
+          case STAR:
+            if (with_stars) {
+              Nparticles = *Nstars;
+              star_read_particles(*sparts, list, &num_fields);
+            }
             break;
 
           default:
@@ -634,16 +669,21 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
     MPI_Barrier(comm);
   }
 
-  /* Clean up */
-  free(ic_units);
-
   /* Prepare the DM particles */
-  if (!dry_run) prepare_dm_gparts(*gparts, Ndm);
+  if (!dry_run && with_gravity) prepare_dm_gparts(*gparts, Ndm);
 
-  /* Now duplicate the hydro particle into gparts */
-  if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+  /* Duplicate the hydro particles into gparts */
+  if (!dry_run && with_gravity && with_hydro)
+    duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+
+  /* Duplicate the star particles into gparts */
+  if (!dry_run && with_gravity && with_stars)
+    duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas);
 
   /* message("Done Reading particles..."); */
+
+  /* Clean up */
+  free(ic_units);
 }
 
 /**
@@ -673,17 +713,19 @@ void write_output_serial(struct engine* e, const char* baseName,
 
   hid_t h_file = 0, h_grp = 0;
   const size_t Ngas = e->s->nr_parts;
+  const size_t Nstars = e->s->nr_sparts;
   const size_t Ntot = e->s->nr_gparts;
   int periodic = e->s->periodic;
   int numFiles = 1;
   struct part* parts = e->s->parts;
   struct gpart* gparts = e->s->gparts;
   struct gpart* dmparts = NULL;
+  struct spart* sparts = e->s->sparts;
   static int outputCount = 0;
   FILE* xmfFile = 0;
 
   /* Number of unassociated gparts */
-  const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0;
+  const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0;
 
   /* File name */
   char fileName[FILENAME_BUFFER_SIZE];
@@ -691,15 +733,15 @@ void write_output_serial(struct engine* e, const char* baseName,
            outputCount);
 
   /* Compute offset in the file and total number of particles */
-  size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0};
+  size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0, 0, Nstars, 0};
   long long N_total[NUM_PARTICLE_TYPES] = {0};
   long long offset[NUM_PARTICLE_TYPES] = {0};
-  MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm);
+  MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG_INT, MPI_SUM, comm);
   for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
     N_total[ptype] = offset[ptype] + N[ptype];
 
   /* The last rank now has the correct N_total. Let's broadcast from there */
-  MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm);
+  MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm);
 
   /* Now everybody konws its offset and the total number of particles of each
    * type */
@@ -909,7 +951,11 @@ void write_output_serial(struct engine* e, const char* baseName,
             /* Write DM particles */
             Nparticles = Ndm;
             darkmatter_write_particles(dmparts, list, &num_fields);
+            break;
 
+          case STAR:
+            Nparticles = Nstars;
+            star_write_particles(sparts, list, &num_fields);
             break;
 
           default:
@@ -923,7 +969,10 @@ void write_output_serial(struct engine* e, const char* baseName,
                      internal_units, snapshot_units);
 
         /* Free temporary array */
-        free(dmparts);
+        if (dmparts) {
+          free(dmparts);
+          dmparts = 0;
+        }
 
         /* Close particle group */
         H5Gclose(h_grp);
diff --git a/src/serial_io.h b/src/serial_io.h
index a2226e5cd9848ff2515b15111af43ccc67275a28..94dd68b93626411ec7dc314d783d80c9e0e967b6 100644
--- a/src/serial_io.h
+++ b/src/serial_io.h
@@ -36,9 +36,11 @@
 
 void read_ic_serial(char* fileName, const struct UnitSystem* internal_units,
                     double dim[3], struct part** parts, struct gpart** gparts,
-                    size_t* Ngas, size_t* Ngparts, int* periodic,
-                    int* flag_entropy, int mpi_rank, int mpi_size,
-                    MPI_Comm comm, MPI_Info info, int dry_run);
+                    struct spart** sparts, size_t* Ngas, size_t* Ngparts,
+                    size_t* Nstars, int* periodic, int* flag_entropy,
+                    int with_hydro, int with_gravity, int with_stars,
+                    int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info,
+                    int dry_run);
 
 void write_output_serial(struct engine* e, const char* baseName,
                          const struct UnitSystem* internal_units,
diff --git a/src/single_io.c b/src/single_io.c
index ceeba4eb80a47c3feed7e898deb5e1fe7e427c0c..b279f22086833bc689919f41a8904232e234a394 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -45,6 +45,7 @@
 #include "io_properties.h"
 #include "kernel_hydro.h"
 #include "part.h"
+#include "stars_io.h"
 #include "units.h"
 
 /*-----------------------------------------------------------------------------
@@ -312,14 +313,18 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  * @param fileName The file to read.
  * @param internal_units The system units used internally
  * @param dim (output) The dimension of the volume.
- * @param parts (output) Array of Gas particles.
+ * @param parts (output) Array of #part particles.
  * @param gparts (output) Array of #gpart particles.
+ * @param sparts (output) Array of #spart particles.
  * @param Ngas (output) number of Gas particles read.
  * @param Ngparts (output) The number of #gpart read.
+ * @param Nstars (output) The number of #spart read.
  * @param periodic (output) 1 if the volume is periodic, 0 if not.
  * @param flag_entropy (output) 1 if the ICs contained Entropy in the
- * InternalEnergy
- * field
+ * InternalEnergy field
+ * @param with_hydro Are we reading gas particles ?
+ * @param with_gravity Are we reading/creating #gpart arrays ?
+ * @param with_stars Are we reading star particles ?
  * @param dry_run If 1, don't read the particle. Only allocates the arrays.
  *
  * Opens the HDF5 file fileName and reads the particles contained
@@ -332,8 +337,10 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
  */
 void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
                     double dim[3], struct part** parts, struct gpart** gparts,
-                    size_t* Ngas, size_t* Ngparts, int* periodic,
-                    int* flag_entropy, int dry_run) {
+                    struct spart** sparts, size_t* Ngas, size_t* Ngparts,
+                    size_t* Nstars, int* periodic, int* flag_entropy,
+                    int with_hydro, int with_gravity, int with_stars,
+                    int dry_run) {
 
   hid_t h_file = 0, h_grp = 0;
   /* GADGET has only cubic boxes (in cosmological mode) */
@@ -343,7 +350,7 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
   int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
   size_t N[NUM_PARTICLE_TYPES] = {0};
   int dimension = 3; /* Assume 3D if nothing is specified */
-  size_t Ndm;
+  size_t Ndm = 0;
 
   /* Open file */
   /* message("Opening file '%s' as IC.", fileName); */
@@ -439,19 +446,32 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
         units_conversion_factor(ic_units, internal_units, UNIT_CONV_LENGTH);
 
   /* Allocate memory to store SPH particles */
-  *Ngas = N[0];
-  if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) !=
-      0)
-    error("Error while allocating memory for SPH particles");
-  bzero(*parts, *Ngas * sizeof(struct part));
-
-  /* Allocate memory to store all particles */
-  Ndm = N[1];
-  *Ngparts = N[1] + N[0];
-  if (posix_memalign((void*)gparts, gpart_align,
-                     *Ngparts * sizeof(struct gpart)) != 0)
-    error("Error while allocating memory for gravity particles");
-  bzero(*gparts, *Ngparts * sizeof(struct gpart));
+  if (with_hydro) {
+    *Ngas = N[GAS];
+    if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) !=
+        0)
+      error("Error while allocating memory for SPH particles");
+    bzero(*parts, *Ngas * sizeof(struct part));
+  }
+
+  /* Allocate memory to store star particles */
+  if (with_stars) {
+    *Nstars = N[STAR];
+    if (posix_memalign((void*)sparts, spart_align,
+                       *Nstars * sizeof(struct spart)) != 0)
+      error("Error while allocating memory for star particles");
+    bzero(*sparts, *Nstars * sizeof(struct spart));
+  }
+
+  /* Allocate memory to store all gravity particles */
+  if (with_gravity) {
+    Ndm = N[DM];
+    *Ngparts = (with_hydro ? N[GAS] : 0) + N[DM] + (with_stars ? N[STAR] : 0);
+    if (posix_memalign((void*)gparts, gpart_align,
+                       *Ngparts * sizeof(struct gpart)) != 0)
+      error("Error while allocating memory for gravity particles");
+    bzero(*gparts, *Ngparts * sizeof(struct gpart));
+  }
 
   /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) /
    * (1024.*1024.)); */
@@ -482,13 +502,24 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
     switch (ptype) {
 
       case GAS:
-        Nparticles = *Ngas;
-        hydro_read_particles(*parts, list, &num_fields);
+        if (with_hydro) {
+          Nparticles = *Ngas;
+          hydro_read_particles(*parts, list, &num_fields);
+        }
         break;
 
       case DM:
-        Nparticles = Ndm;
-        darkmatter_read_particles(*gparts, list, &num_fields);
+        if (with_gravity) {
+          Nparticles = Ndm;
+          darkmatter_read_particles(*gparts, list, &num_fields);
+        }
+        break;
+
+      case STAR:
+        if (with_stars) {
+          Nparticles = *Nstars;
+          star_read_particles(*sparts, list, &num_fields);
+        }
         break;
 
       default:
@@ -505,10 +536,15 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
   }
 
   /* Prepare the DM particles */
-  if (!dry_run) prepare_dm_gparts(*gparts, Ndm);
+  if (!dry_run && with_gravity) prepare_dm_gparts(*gparts, Ndm);
+
+  /* Duplicate the hydro particles into gparts */
+  if (!dry_run && with_gravity && with_hydro)
+    duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
 
-  /* Now duplicate the hydro particle into gparts */
-  if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+  /* Duplicate the star particles into gparts */
+  if (!dry_run && with_gravity && with_stars)
+    duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas);
 
   /* message("Done Reading particles..."); */
 
@@ -541,18 +577,20 @@ void write_output_single(struct engine* e, const char* baseName,
 
   hid_t h_file = 0, h_grp = 0;
   const size_t Ngas = e->s->nr_parts;
+  const size_t Nstars = e->s->nr_sparts;
   const size_t Ntot = e->s->nr_gparts;
   int periodic = e->s->periodic;
   int numFiles = 1;
   struct part* parts = e->s->parts;
   struct gpart* gparts = e->s->gparts;
   struct gpart* dmparts = NULL;
+  struct spart* sparts = e->s->sparts;
   static int outputCount = 0;
 
   /* Number of unassociated gparts */
-  const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0;
+  const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0;
 
-  long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0};
+  long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0, 0, Nstars, 0};
 
   /* File name */
   char fileName[FILENAME_BUFFER_SIZE];
@@ -729,6 +767,11 @@ void write_output_single(struct engine* e, const char* baseName,
         darkmatter_write_particles(dmparts, list, &num_fields);
         break;
 
+      case STAR:
+        N = Nstars;
+        star_write_particles(sparts, list, &num_fields);
+        break;
+
       default:
         error("Particle Type %d not yet supported. Aborting", ptype);
     }
@@ -739,7 +782,10 @@ void write_output_single(struct engine* e, const char* baseName,
                  internal_units, snapshot_units);
 
     /* Free temporary array */
-    free(dmparts);
+    if (dmparts) {
+      free(dmparts);
+      dmparts = NULL;
+    }
 
     /* Close particle group */
     H5Gclose(h_grp);
diff --git a/src/single_io.h b/src/single_io.h
index 51a30a7bc6af7f3aaf5708a3d2df14982e026e3e..bc803b262f70f72ea93090d56112f5a70737c840 100644
--- a/src/single_io.h
+++ b/src/single_io.h
@@ -31,7 +31,9 @@
 
 void read_ic_single(char* fileName, const struct UnitSystem* internal_units,
                     double dim[3], struct part** parts, struct gpart** gparts,
-                    size_t* Ngas, size_t* Ndm, int* periodic, int* flag_entropy,
+                    struct spart** sparts, size_t* Ngas, size_t* Ndm,
+                    size_t* Nstars, int* periodic, int* flag_entropy,
+                    int with_hydro, int with_gravity, int with_stars,
                     int dry_run);
 
 void write_output_single(struct engine* e, const char* baseName,
diff --git a/src/space.c b/src/space.c
index 6e6a0768ff6a3a2982fd23edd84d61ac9afd5515..802dc30d1bcd44d4cce46b2a803afade07f5d685 100644
--- a/src/space.c
+++ b/src/space.c
@@ -52,6 +52,7 @@
 #include "memswap.h"
 #include "minmax.h"
 #include "runner.h"
+#include "stars.h"
 #include "threadpool.h"
 #include "tools.h"
 
@@ -107,6 +108,7 @@ struct parallel_sort {
   struct part *parts;
   struct gpart *gparts;
   struct xpart *xparts;
+  struct spart *sparts;
   int *ind;
   struct qstack *stack;
   unsigned int stack_size;
@@ -173,6 +175,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
  *
  * @param s The #space.
  * @param c The #cell to recycle.
+ * @param rec_begin Pointer to the start of the list of cells to recycle.
+ * @param rec_end Pointer to the end of the list of cells to recycle.
  */
 void space_rebuild_recycle_rec(struct space *s, struct cell *c,
                                struct cell **rec_begin, struct cell **rec_end) {
@@ -208,10 +212,14 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->sorted = 0;
     c->count = 0;
     c->gcount = 0;
+    c->scount = 0;
     c->init = NULL;
     c->extra_ghost = NULL;
     c->ghost = NULL;
-    c->kick = NULL;
+    c->kick1 = NULL;
+    c->kick2 = NULL;
+    c->timestep = NULL;
+    c->drift = NULL;
     c->cooling = NULL;
     c->sourceterms = NULL;
     c->super = c;
@@ -243,7 +251,7 @@ void space_regrid(struct space *s, int verbose) {
 
   const size_t nr_parts = s->nr_parts;
   const ticks tic = getticks();
-  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
+  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   /* Run through the cells and get the current h_max. */
   // tic = getticks();
@@ -343,6 +351,12 @@ void space_regrid(struct space *s, int verbose) {
   if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
       cdim[2] < s->cdim[2]) {
 
+/* Be verbose about this. */
+#ifdef SWIFT_DEBUG_CHECKS
+    message("re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
+    fflush(stdout);
+#endif
+
     /* Free the old cells, if they were allocated. */
     if (s->cells_top != NULL) {
       threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
@@ -386,6 +400,7 @@ void space_regrid(struct space *s, int verbose) {
           c->depth = 0;
           c->count = 0;
           c->gcount = 0;
+          c->scount = 0;
           c->super = c;
           c->ti_old = ti_current;
           lock_init(&c->lock);
@@ -459,16 +474,20 @@ void space_rebuild(struct space *s, int verbose) {
 
   const ticks tic = getticks();
 
-  /* Be verbose about this. */
-  // message("re)building space..."); fflush(stdout);
+/* Be verbose about this. */
+#ifdef SWIFT_DEBUG_CHECKS
+  if (s->e->nodeID == 0 || verbose) message("re)building space");
+  fflush(stdout);
+#endif
 
   /* Re-grid if necessary, or just re-set the cell data. */
   space_regrid(s, verbose);
 
   size_t nr_parts = s->nr_parts;
   size_t nr_gparts = s->nr_gparts;
+  size_t nr_sparts = s->nr_sparts;
   struct cell *restrict cells_top = s->cells_top;
-  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
+  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
   /* Run through the particles and get their cell index. Allocates
      an index that is larger than the number of particles to avoid
@@ -487,25 +506,37 @@ void space_rebuild(struct space *s, int verbose) {
   if (s->size_gparts > 0)
     space_gparts_get_cell_index(s, gind, cells_top, verbose);
 
+  /* Run through the star particles and get their cell index. */
+  const size_t sind_size = s->size_sparts + 100;
+  int *sind;
+  if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL)
+    error("Failed to allocate temporary s-particle indices.");
+  if (s->size_sparts > 0)
+    space_sparts_get_cell_index(s, sind, cells_top, verbose);
+
 #ifdef WITH_MPI
+  const int local_nodeID = s->e->nodeID;
 
   /* Move non-local parts to the end of the list. */
-  const int local_nodeID = s->e->nodeID;
   for (size_t k = 0; k < nr_parts;) {
     if (cells_top[ind[k]].nodeID != local_nodeID) {
       nr_parts -= 1;
+      /* Swap the particle */
       const struct part tp = s->parts[k];
       s->parts[k] = s->parts[nr_parts];
       s->parts[nr_parts] = tp;
+      /* Swap the link with the gpart */
       if (s->parts[k].gpart != NULL) {
         s->parts[k].gpart->id_or_neg_offset = -k;
       }
       if (s->parts[nr_parts].gpart != NULL) {
         s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
       }
+      /* Swap the xpart */
       const struct xpart txp = s->xparts[k];
       s->xparts[k] = s->xparts[nr_parts];
       s->xparts[nr_parts] = txp;
+      /* Swap the index */
       const int t = ind[k];
       ind[k] = ind[nr_parts];
       ind[nr_parts] = t;
@@ -529,20 +560,67 @@ void space_rebuild(struct space *s, int verbose) {
   }
 #endif
 
+  /* Move non-local sparts to the end of the list. */
+  for (size_t k = 0; k < nr_sparts;) {
+    if (cells_top[sind[k]].nodeID != local_nodeID) {
+      nr_sparts -= 1;
+      /* Swap the particle */
+      const struct spart tp = s->sparts[k];
+      s->sparts[k] = s->sparts[nr_sparts];
+      s->sparts[nr_sparts] = tp;
+      /* Swap the link with the gpart */
+      if (s->sparts[k].gpart != NULL) {
+        s->sparts[k].gpart->id_or_neg_offset = -k;
+      }
+      if (s->sparts[nr_sparts].gpart != NULL) {
+        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
+      }
+      /* Swap the index */
+      const int t = sind[k];
+      sind[k] = sind[nr_sparts];
+      sind[nr_sparts] = t;
+    } else {
+      /* Increment when not exchanging otherwise we need to retest "k".*/
+      k++;
+    }
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Check that all sparts are in the correct place (untested). */
+  for (size_t k = 0; k < nr_sparts; k++) {
+    if (cells_top[sind[k]].nodeID != local_nodeID) {
+      error("Failed to move all non-local sparts to send list");
+    }
+  }
+  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
+    if (cells_top[sind[k]].nodeID == local_nodeID) {
+      error("Failed to remove local sparts from send list");
+    }
+  }
+#endif
+
   /* Move non-local gparts to the end of the list. */
   for (size_t k = 0; k < nr_gparts;) {
     if (cells_top[gind[k]].nodeID != local_nodeID) {
       nr_gparts -= 1;
+      /* Swap the particle */
       const struct gpart tp = s->gparts[k];
       s->gparts[k] = s->gparts[nr_gparts];
       s->gparts[nr_gparts] = tp;
-      if (s->gparts[k].id_or_neg_offset <= 0) {
+      /* Swap the link with part/spart */
+      if (s->gparts[k].type == swift_type_gas) {
         s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
+      } else if (s->gparts[k].type == swift_type_star) {
+        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
       }
-      if (s->gparts[nr_gparts].id_or_neg_offset <= 0) {
+      if (s->gparts[nr_gparts].type == swift_type_gas) {
         s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
             &s->gparts[nr_gparts];
+      } else if (s->gparts[nr_gparts].type == swift_type_star) {
+        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
+            &s->gparts[nr_gparts];
       }
+      /* Swap the index */
       const int t = gind[k];
       gind[k] = gind[nr_gparts];
       gind[nr_gparts] = t;
@@ -570,14 +648,17 @@ void space_rebuild(struct space *s, int verbose) {
      the parts arrays. */
   size_t nr_parts_exchanged = s->nr_parts - nr_parts;
   size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
+  size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts;
   engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
-                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);
+                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged,
+                         nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged);
 
   /* Set the new particle counts. */
   s->nr_parts = nr_parts + nr_parts_exchanged;
   s->nr_gparts = nr_gparts + nr_gparts_exchanged;
+  s->nr_sparts = nr_sparts + nr_sparts_exchanged;
 
-  /* Re-allocate the index array if needed.. */
+  /* Re-allocate the index array for the parts if needed.. */
   if (s->nr_parts + 1 > ind_size) {
     int *ind_new;
     if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL)
@@ -587,10 +668,20 @@ void space_rebuild(struct space *s, int verbose) {
     ind = ind_new;
   }
 
+  /* Re-allocate the index array for the sparts if needed.. */
+  if (s->nr_sparts + 1 > sind_size) {
+    int *sind_new;
+    if ((sind_new = (int *)malloc(sizeof(int) * (s->nr_sparts + 1))) == NULL)
+      error("Failed to allocate temporary s-particle indices.");
+    memcpy(sind_new, sind, sizeof(int) * nr_sparts);
+    free(sind);
+    sind = sind_new;
+  }
+
   const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
   const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
 
-  /* Assign each particle to its cell. */
+  /* Assign each received part to its cell. */
   for (size_t k = nr_parts; k < s->nr_parts; k++) {
     const struct part *const p = &s->parts[k];
     ind[k] =
@@ -603,28 +694,81 @@ void space_rebuild(struct space *s, int verbose) {
   }
   nr_parts = s->nr_parts;
 
+  /* Assign each received spart to its cell. */
+  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
+    const struct spart *const sp = &s->sparts[k];
+    sind[k] =
+        cell_getid(cdim, sp->x[0] * ih[0], sp->x[1] * ih[1], sp->x[2] * ih[2]);
+#ifdef SWIFT_DEBUG_CHECKS
+    if (cells_top[sind[k]].nodeID != local_nodeID)
+      error("Received s-part that does not belong to me (nodeID=%i).",
+            cells_top[sind[k]].nodeID);
+#endif
+  }
+  nr_sparts = s->nr_sparts;
+
 #endif /* WITH_MPI */
 
   /* Sort the parts according to their cells. */
   if (nr_parts > 0)
     space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
 
-  /* Re-link the gparts. */
-  if (nr_parts > 0 && nr_gparts > 0) part_relink_gparts(s->parts, nr_parts, 0);
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the part have been sorted correctly. */
+  for (size_t k = 0; k < nr_parts; k++) {
+    const struct part *p = &s->parts[k];
+
+    /* New cell index */
+    const int new_ind =
+        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
+                   p->x[2] * s->iwidth[2]);
+
+    /* New cell of this part */
+    const struct cell *c = &s->cells_top[new_ind];
+
+    if (ind[k] != new_ind)
+      error("part's new cell index not matching sorted index.");
+
+    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
+        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
+        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
+      error("part not sorted into the right top-level cell!");
+  }
+#endif
+
+  /* Sort the sparts according to their cells. */
+  if (nr_sparts > 0)
+    space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose);
 
 #ifdef SWIFT_DEBUG_CHECKS
-  /* Verify space_sort_struct. */
-  for (size_t k = 1; k < nr_parts; k++) {
-    if (ind[k - 1] > ind[k]) {
-      error("Sort failed!");
-    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
-                                    s->parts[k].x[1] * s->iwidth[1],
-                                    s->parts[k].x[2] * s->iwidth[2])) {
-      error("Incorrect indices!");
-    }
+  /* Verify that the spart have been sorted correctly. */
+  for (size_t k = 0; k < nr_sparts; k++) {
+    const struct spart *sp = &s->sparts[k];
+
+    /* New cell index */
+    const int new_sind =
+        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
+                   sp->x[2] * s->iwidth[2]);
+
+    /* New cell of this spart */
+    const struct cell *c = &s->cells_top[new_sind];
+
+    if (sind[k] != new_sind)
+      error("spart's new cell index not matching sorted index.");
+
+    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
+        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
+        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
+      error("spart not sorted into the right top-level cell!");
   }
 #endif
 
+  /* Re-link the gparts to their (s-)particles. */
+  if (nr_parts > 0 && nr_gparts > 0)
+    part_relink_gparts_to_parts(s->parts, nr_parts, 0);
+  if (nr_sparts > 0 && nr_gparts > 0)
+    part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0);
+
   /* Extract the cell counts from the sorted indices. */
   size_t last_index = 0;
   ind[nr_parts] = s->nr_cells;  // sentinel.
@@ -635,12 +779,23 @@ void space_rebuild(struct space *s, int verbose) {
     }
   }
 
+  /* Extract the cell counts from the sorted indices. */
+  size_t last_sindex = 0;
+  sind[nr_sparts] = s->nr_cells;  // sentinel.
+  for (size_t k = 0; k < nr_sparts; k++) {
+    if (sind[k] < sind[k + 1]) {
+      cells_top[sind[k]].scount = k - last_sindex + 1;
+      last_sindex = k + 1;
+    }
+  }
+
   /* We no longer need the indices as of here. */
   free(ind);
+  free(sind);
 
 #ifdef WITH_MPI
 
-  /* Re-allocate the index array if needed.. */
+  /* Re-allocate the index array for the gparts if needed.. */
   if (s->nr_gparts + 1 > gind_size) {
     int *gind_new;
     if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL)
@@ -650,7 +805,7 @@ void space_rebuild(struct space *s, int verbose) {
     gind = gind_new;
   }
 
-  /* Assign each particle to its cell. */
+  /* Assign each received gpart to its cell. */
   for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
     const struct gpart *const p = &s->gparts[k];
     gind[k] =
@@ -658,21 +813,48 @@ void space_rebuild(struct space *s, int verbose) {
 
 #ifdef SWIFT_DEBUG_CHECKS
     if (cells_top[gind[k]].nodeID != s->e->nodeID)
-      error("Received part that does not belong to me (nodeID=%i).",
+      error("Received g-part that does not belong to me (nodeID=%i).",
             cells_top[gind[k]].nodeID);
 #endif
   }
   nr_gparts = s->nr_gparts;
 
-#endif
+#endif /* WITH_MPI */
 
   /* Sort the gparts according to their cells. */
   if (nr_gparts > 0)
     space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify that the gpart have been sorted correctly. */
+  for (size_t k = 0; k < nr_gparts; k++) {
+    const struct gpart *gp = &s->gparts[k];
+
+    /* New cell index */
+    const int new_gind =
+        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
+                   gp->x[2] * s->iwidth[2]);
+
+    /* New cell of this gpart */
+    const struct cell *c = &s->cells_top[new_gind];
+
+    if (gind[k] != new_gind)
+      error("gpart's new cell index not matching sorted index.");
+
+    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
+        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
+        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
+      error("gpart not sorted into the right top-level cell!");
+  }
+#endif
+
   /* Re-link the parts. */
   if (nr_parts > 0 && nr_gparts > 0)
-    part_relink_parts(s->gparts, nr_gparts, s->parts);
+    part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts);
+
+  /* Re-link the sparts. */
+  if (nr_sparts > 0 && nr_gparts > 0)
+    part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts);
 
   /* Extract the cell counts from the sorted indices. */
   size_t last_gindex = 0;
@@ -689,26 +871,8 @@ void space_rebuild(struct space *s, int verbose) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that the links are correct */
-  for (size_t k = 0; k < nr_gparts; ++k) {
-
-    if (s->gparts[k].id_or_neg_offset < 0) {
-
-      const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset];
-
-      if (part->gpart != &s->gparts[k]) error("Linking problem !");
-
-      if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] ||
-          s->gparts[k].x[2] != part->x[2])
-        error("Linked particles are not at the same position !");
-    }
-  }
-  for (size_t k = 0; k < nr_parts; ++k) {
-
-    if (s->parts[k].gpart != NULL &&
-        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
-      error("Linking problem !");
-    }
-  }
+  part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
+                    nr_sparts, verbose);
 #endif
 
   /* Hook the cells up to the parts. */
@@ -716,15 +880,18 @@ void space_rebuild(struct space *s, int verbose) {
   struct part *finger = s->parts;
   struct xpart *xfinger = s->xparts;
   struct gpart *gfinger = s->gparts;
+  struct spart *sfinger = s->sparts;
   for (int k = 0; k < s->nr_cells; k++) {
     struct cell *restrict c = &cells_top[k];
     c->ti_old = ti_current;
     c->parts = finger;
     c->xparts = xfinger;
     c->gparts = gfinger;
+    c->sparts = sfinger;
     finger = &finger[c->count];
     xfinger = &xfinger[c->count];
     gfinger = &gfinger[c->gcount];
+    sfinger = &sfinger[c->scount];
   }
   // message( "hooking up cells took %.3f %s." ,
   // clocks_from_ticks(getticks() - tic), clocks_getunit());
@@ -830,6 +997,13 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
         cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
     ind[k] = index;
 
+#ifdef SWIFT_DEBUG_CHECKS
+    if (pos_x > dim_x || pos_y > dim_y || pos_z > pos_z || pos_x < 0. ||
+        pos_y < 0. || pos_z < 0.)
+      error("Particle outside of simulation box. p->x=[%e %e %e]", pos_x, pos_y,
+            pos_z);
+#endif
+
     /* Update the position */
     p->x[0] = pos_x;
     p->x[1] = pos_y;
@@ -889,8 +1063,58 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts,
 }
 
 /**
- * @brief Computes the cell index of all the particles and update the cell
- * count.
+ * @brief #threadpool mapper function to compute the s-particle cell indices.
+ *
+ * @param map_data Pointer towards the s-particles.
+ * @param nr_sparts The number of s-particles to treat.
+ * @param extra_data Pointers to the space and index list
+ */
+void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts,
+                                        void *extra_data) {
+
+  /* Unpack the data */
+  struct spart *restrict sparts = (struct spart *)map_data;
+  struct index_data *data = (struct index_data *)extra_data;
+  struct space *s = data->s;
+  int *const ind = data->ind + (ptrdiff_t)(sparts - s->sparts);
+
+  /* Get some constants */
+  const double dim_x = s->dim[0];
+  const double dim_y = s->dim[1];
+  const double dim_z = s->dim[2];
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const double ih_x = s->iwidth[0];
+  const double ih_y = s->iwidth[1];
+  const double ih_z = s->iwidth[2];
+
+  for (int k = 0; k < nr_sparts; k++) {
+
+    /* Get the particle */
+    struct spart *restrict sp = &sparts[k];
+
+    const double old_pos_x = sp->x[0];
+    const double old_pos_y = sp->x[1];
+    const double old_pos_z = sp->x[2];
+
+    /* Put it back into the simulation volume */
+    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
+    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
+    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
+
+    /* Get its cell index */
+    const int index =
+        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
+    ind[k] = index;
+
+    /* Update the position */
+    sp->x[0] = pos_x;
+    sp->x[1] = pos_y;
+    sp->x[2] = pos_z;
+  }
+}
+
+/**
+ * @brief Computes the cell index of all the particles.
  *
  * @param s The #space.
  * @param ind The array of indices to fill.
@@ -917,8 +1141,7 @@ void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
 }
 
 /**
- * @brief Computes the cell index of all the g-particles and update the cell
- * gcount.
+ * @brief Computes the cell index of all the g-particles.
  *
  * @param s The #space.
  * @param gind The array of indices to fill.
@@ -944,6 +1167,33 @@ void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
             clocks_getunit());
 }
 
+/**
+ * @brief Computes the cell index of all the s-particles.
+ *
+ * @param s The #space.
+ * @param sind The array of indices to fill.
+ * @param cells The array of #cell to update.
+ * @param verbose Are we talkative ?
+ */
+void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells,
+                                 int verbose) {
+
+  const ticks tic = getticks();
+
+  /* Pack the extra information */
+  struct index_data data;
+  data.s = s;
+  data.cells = cells;
+  data.ind = sind;
+
+  threadpool_map(&s->e->threadpool, space_sparts_get_cell_index_mapper,
+                 s->sparts, s->nr_sparts, sizeof(struct spart), 1000, &data);
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
 /**
  * @brief Sort the particles and condensed particles according to the given
  * indices.
@@ -993,7 +1243,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
     if (ind[i - 1] > ind[i])
       error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
-  message("Sorting succeeded.");
+  if (s->e->nodeID == 0 || verbose) message("Sorting succeeded.");
 #endif
 
   /* Clean up. */
@@ -1055,18 +1305,207 @@ void space_parts_sort_mapper(void *map_data, int num_elements,
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Verify space_sort_struct. */
-      for (int k = i; k <= jj; k++)
-        if (ind[k] > pivot) {
-          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
-                  k, ind[k], pivot, i, j);
-          error("Partition failed (<=pivot).");
+      if (i != j) {
+        for (int k = i; k <= jj; k++) {
+          if (ind[k] > pivot) {
+            message(
+                "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k,
+                ind[k], pivot, i, j);
+            error("Partition failed (<=pivot).");
+          }
         }
-      for (int k = jj + 1; k <= j; k++)
-        if (ind[k] <= pivot) {
-          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
-                  k, ind[k], pivot, i, j);
-          error("Partition failed (>pivot).");
+        for (int k = jj + 1; k <= j; k++) {
+          if (ind[k] <= pivot) {
+            message(
+                "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k,
+                ind[k], pivot, i, j);
+            error("Partition failed (>pivot).");
+          }
         }
+      }
+#endif
+
+      /* Split-off largest interval. */
+      if (jj - i > j - jj + 1) {
+
+        /* Recurse on the left? */
+        if (jj > i && pivot > min) {
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
+          while (sort_struct->stack[qid].ready)
+            ;
+          sort_struct->stack[qid].i = i;
+          sort_struct->stack[qid].j = jj;
+          sort_struct->stack[qid].min = min;
+          sort_struct->stack[qid].max = pivot;
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
+            error("Qstack overflow.");
+          sort_struct->stack[qid].ready = 1;
+        }
+
+        /* Recurse on the right? */
+        if (jj + 1 < j && pivot + 1 < max) {
+          i = jj + 1;
+          min = pivot + 1;
+        } else
+          break;
+
+      } else {
+
+        /* Recurse on the right? */
+        if (pivot + 1 < max) {
+          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
+          while (sort_struct->stack[qid].ready)
+            ;
+          sort_struct->stack[qid].i = jj + 1;
+          sort_struct->stack[qid].j = j;
+          sort_struct->stack[qid].min = pivot + 1;
+          sort_struct->stack[qid].max = max;
+          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
+            error("Qstack overflow.");
+          sort_struct->stack[qid].ready = 1;
+        }
+
+        /* Recurse on the left? */
+        if (jj > i && pivot > min) {
+          j = jj;
+          max = pivot;
+        } else
+          break;
+      }
+
+    } /* loop over sub-intervals. */
+
+    atomic_dec(&sort_struct->waiting);
+
+  } /* main loop. */
+}
+
+/**
+ * @brief Sort the s-particles according to the given indices.
+ *
+ * @param s The #space.
+ * @param ind The indices with respect to which the #spart are sorted.
+ * @param N The number of parts
+ * @param min Lowest index.
+ * @param max highest index.
+ * @param verbose Are we talkative ?
+ */
+void space_sparts_sort(struct space *s, int *ind, size_t N, int min, int max,
+                       int verbose) {
+
+  const ticks tic = getticks();
+
+  /* Populate a parallel_sort structure with the input data */
+  struct parallel_sort sort_struct;
+  sort_struct.sparts = s->sparts;
+  sort_struct.ind = ind;
+  sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
+  if ((sort_struct.stack =
+           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
+    error("Failed to allocate sorting stack.");
+  for (unsigned int i = 0; i < sort_struct.stack_size; i++)
+    sort_struct.stack[i].ready = 0;
+
+  /* Add the first interval. */
+  sort_struct.stack[0].i = 0;
+  sort_struct.stack[0].j = N - 1;
+  sort_struct.stack[0].min = min;
+  sort_struct.stack[0].max = max;
+  sort_struct.stack[0].ready = 1;
+  sort_struct.first = 0;
+  sort_struct.last = 1;
+  sort_struct.waiting = 1;
+
+  /* Launch the sorting tasks with a stride of zero such that the same
+     map data is passed to each thread. */
+  threadpool_map(&s->e->threadpool, space_sparts_sort_mapper, &sort_struct,
+                 s->e->threadpool.num_threads, 0, 1, NULL);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Verify space_sort_struct. */
+  for (size_t i = 1; i < N; i++)
+    if (ind[i - 1] > ind[i])
+      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
+            ind[i - 1], i, ind[i], min, max);
+  if (s->e->nodeID == 0 || verbose) message("Sorting succeeded.");
+#endif
+
+  /* Clean up. */
+  free(sort_struct.stack);
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+void space_sparts_sort_mapper(void *map_data, int num_elements,
+                              void *extra_data) {
+
+  /* Unpack the mapping data. */
+  struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
+
+  /* Pointers to the sorting data. */
+  int *ind = sort_struct->ind;
+  struct spart *sparts = sort_struct->sparts;
+
+  /* Main loop. */
+  while (sort_struct->waiting) {
+
+    /* Grab an interval off the queue. */
+    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
+
+    /* Wait for the entry to be ready, or for the sorting do be done. */
+    while (!sort_struct->stack[qid].ready)
+      if (!sort_struct->waiting) return;
+
+    /* Get the stack entry. */
+    ptrdiff_t i = sort_struct->stack[qid].i;
+    ptrdiff_t j = sort_struct->stack[qid].j;
+    int min = sort_struct->stack[qid].min;
+    int max = sort_struct->stack[qid].max;
+    sort_struct->stack[qid].ready = 0;
+
+    /* Loop over sub-intervals. */
+    while (1) {
+
+      /* Bring beer. */
+      const int pivot = (min + max) / 2;
+      /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
+              i, j, min, max, pivot); */
+
+      /* One pass of QuickSort's partitioning. */
+      ptrdiff_t ii = i;
+      ptrdiff_t jj = j;
+      while (ii < jj) {
+        while (ii <= j && ind[ii] <= pivot) ii++;
+        while (jj >= i && ind[jj] > pivot) jj--;
+        if (ii < jj) {
+          memswap(&ind[ii], &ind[jj], sizeof(int));
+          memswap(&sparts[ii], &sparts[jj], sizeof(struct spart));
+        }
+      }
+
+#ifdef SWIFT_DEBUG_CHECKS
+      /* Verify space_sort_struct. */
+      if (i != j) {
+        for (int k = i; k <= jj; k++) {
+          if (ind[k] > pivot) {
+            message(
+                "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li "
+                "min=%i max=%i.",
+                k, ind[k], pivot, i, j, min, max);
+            error("Partition failed (<=pivot).");
+          }
+        }
+        for (int k = jj + 1; k <= j; k++) {
+          if (ind[k] <= pivot) {
+            message(
+                "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k,
+                ind[k], pivot, i, j);
+            error("Partition failed (>pivot).");
+          }
+        }
+      }
 #endif
 
       /* Split-off largest interval. */
@@ -1171,7 +1610,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
     if (ind[i - 1] > ind[i])
       error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
             ind[i - 1], i, ind[i], min, max);
-  message("Sorting succeeded.");
+  if (s->e->nodeID == 0 || verbose) message("Sorting succeeded.");
 #endif
 
   /* Clean up. */
@@ -1231,18 +1670,24 @@ void space_gparts_sort_mapper(void *map_data, int num_elements,
 
 #ifdef SWIFT_DEBUG_CHECKS
       /* Verify space_sort_struct. */
-      for (int k = i; k <= jj; k++)
-        if (ind[k] > pivot) {
-          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
-                  k, ind[k], pivot, i, j);
-          error("Partition failed (<=pivot).");
+      if (i != j) {
+        for (int k = i; k <= jj; k++) {
+          if (ind[k] > pivot) {
+            message(
+                "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k,
+                ind[k], pivot, i, j);
+            error("Partition failed (<=pivot).");
+          }
         }
-      for (int k = jj + 1; k <= j; k++)
-        if (ind[k] <= pivot) {
-          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
-                  k, ind[k], pivot, i, j);
-          error("Partition failed (>pivot).");
+        for (int k = jj + 1; k <= j; k++) {
+          if (ind[k] <= pivot) {
+            message(
+                "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k,
+                ind[k], pivot, i, j);
+            error("Partition failed (>pivot).");
+          }
         }
+      }
 #endif
 
       /* Split-off largest interval. */
@@ -1456,27 +1901,63 @@ void space_map_cells_pre(struct space *s, int full,
  * @param s The #space in which the cell lives.
  * @param c The #cell to split recursively.
  * @param buff A buffer for particle sorting, should be of size at least
- *        max(c->count, c->gount) or @c NULL.
+ *        c->count or @c NULL.
+ * @param sbuff A buffer for particle sorting, should be of size at least
+ *        c->scount or @c NULL.
+ * @param gbuff A buffer for particle sorting, should be of size at least
+ *        c->gcount or @c NULL.
  */
-void space_split_recursive(struct space *s, struct cell *c, int *buff) {
+void space_split_recursive(struct space *s, struct cell *c,
+                           struct cell_buff *buff, struct cell_buff *sbuff,
+                           struct cell_buff *gbuff) {
 
   const int count = c->count;
   const int gcount = c->gcount;
+  const int scount = c->scount;
   const int depth = c->depth;
   int maxdepth = 0;
   float h_max = 0.0f;
-  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
-  struct cell *temp;
+  integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0;
   struct part *parts = c->parts;
   struct gpart *gparts = c->gparts;
+  struct spart *sparts = c->sparts;
   struct xpart *xparts = c->xparts;
   struct engine *e = s->e;
 
   /* If the buff is NULL, allocate it, and remember to free it. */
-  const int allocate_buffer = (buff == NULL);
-  if (allocate_buffer &&
-      (buff = (int *)malloc(sizeof(int) * max(count, gcount))) == NULL)
-    error("Failed to allocate temporary indices.");
+  const int allocate_buffer = (buff == NULL && gbuff == NULL && sbuff == NULL);
+  if (allocate_buffer) {
+    if (count > 0) {
+      if (posix_memalign((void *)&buff, SWIFT_STRUCT_ALIGNMENT,
+                         sizeof(struct cell_buff) * count) != 0)
+        error("Failed to allocate temporary indices.");
+      for (int k = 0; k < count; k++) {
+        buff[k].x[0] = parts[k].x[0];
+        buff[k].x[1] = parts[k].x[1];
+        buff[k].x[2] = parts[k].x[2];
+      }
+    }
+    if (gcount > 0) {
+      if (posix_memalign((void *)&gbuff, SWIFT_STRUCT_ALIGNMENT,
+                         sizeof(struct cell_buff) * gcount) != 0)
+        error("Failed to allocate temporary indices.");
+      for (int k = 0; k < gcount; k++) {
+        gbuff[k].x[0] = gparts[k].x[0];
+        gbuff[k].x[1] = gparts[k].x[1];
+        gbuff[k].x[2] = gparts[k].x[2];
+      }
+    }
+    if (scount > 0) {
+      if (posix_memalign((void *)&sbuff, SWIFT_STRUCT_ALIGNMENT,
+                         sizeof(struct cell_buff) * scount) != 0)
+        error("Failed to allocate temporary indices.");
+      for (int k = 0; k < scount; k++) {
+        sbuff[k].x[0] = sparts[k].x[0];
+        sbuff[k].x[1] = sparts[k].x[1];
+        sbuff[k].x[2] = sparts[k].x[2];
+      }
+    }
+  }
 
   /* Check the depth. */
   while (depth > (maxdepth = s->maxdepth)) {
@@ -1490,47 +1971,57 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) {
   }
 
   /* Split or let it be? */
-  if (count > space_splitsize || gcount > space_splitsize) {
+  if (count > space_splitsize || gcount > space_splitsize ||
+      scount > space_splitsize) {
 
     /* No longer just a leaf. */
     c->split = 1;
 
     /* Create the cell's progeny. */
+    space_getcells(s, 8, c->progeny);
     for (int k = 0; k < 8; k++) {
-      temp = space_getcell(s);
-      temp->count = 0;
-      temp->gcount = 0;
-      temp->ti_old = e->ti_current;
-      temp->loc[0] = c->loc[0];
-      temp->loc[1] = c->loc[1];
-      temp->loc[2] = c->loc[2];
-      temp->width[0] = c->width[0] / 2;
-      temp->width[1] = c->width[1] / 2;
-      temp->width[2] = c->width[2] / 2;
-      temp->dmin = c->dmin / 2;
-      if (k & 4) temp->loc[0] += temp->width[0];
-      if (k & 2) temp->loc[1] += temp->width[1];
-      if (k & 1) temp->loc[2] += temp->width[2];
-      temp->depth = c->depth + 1;
-      temp->split = 0;
-      temp->h_max = 0.0;
-      temp->dx_max = 0.f;
-      temp->nodeID = c->nodeID;
-      temp->parent = c;
-      temp->super = NULL;
-      c->progeny[k] = temp;
+      struct cell *cp = c->progeny[k];
+      cp->count = 0;
+      cp->gcount = 0;
+      cp->scount = 0;
+      cp->ti_old = c->ti_old;
+      cp->loc[0] = c->loc[0];
+      cp->loc[1] = c->loc[1];
+      cp->loc[2] = c->loc[2];
+      cp->width[0] = c->width[0] / 2;
+      cp->width[1] = c->width[1] / 2;
+      cp->width[2] = c->width[2] / 2;
+      cp->dmin = c->dmin / 2;
+      if (k & 4) cp->loc[0] += cp->width[0];
+      if (k & 2) cp->loc[1] += cp->width[1];
+      if (k & 1) cp->loc[2] += cp->width[2];
+      cp->depth = c->depth + 1;
+      cp->split = 0;
+      cp->h_max = 0.0;
+      cp->dx_max = 0.f;
+      cp->nodeID = c->nodeID;
+      cp->parent = c;
+      cp->super = NULL;
     }
 
     /* Split the cell data. */
-    cell_split(c, c->parts - s->parts, buff);
+    cell_split(c, c->parts - s->parts, c->sparts - s->sparts, buff, sbuff,
+               gbuff);
 
     /* Remove any progeny with zero parts. */
+    struct cell_buff *progeny_buff = buff, *progeny_gbuff = gbuff,
+                     *progeny_sbuff = sbuff;
     for (int k = 0; k < 8; k++)
-      if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) {
+      if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0 &&
+          c->progeny[k]->scount == 0) {
         space_recycle(s, c->progeny[k]);
         c->progeny[k] = NULL;
       } else {
-        space_split_recursive(s, c->progeny[k], buff);
+        space_split_recursive(s, c->progeny[k], progeny_buff, progeny_sbuff,
+                              progeny_gbuff);
+        progeny_buff += c->progeny[k]->count;
+        progeny_gbuff += c->progeny[k]->gcount;
+        progeny_sbuff += c->progeny[k]->scount;
         h_max = max(h_max, c->progeny[k]->h_max);
         ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
         ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
@@ -1553,7 +2044,8 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) {
       struct part *p = &parts[k];
       struct xpart *xp = &xparts[k];
       const float h = p->h;
-      const int ti_end = p->ti_end;
+      const integertime_t ti_end =
+          get_integer_time_end(e->ti_current, p->time_bin);
       xp->x_diff[0] = 0.f;
       xp->x_diff[1] = 0.f;
       xp->x_diff[2] = 0.f;
@@ -1563,13 +2055,21 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) {
     }
     for (int k = 0; k < gcount; k++) {
       struct gpart *gp = &gparts[k];
-      const int ti_end = gp->ti_end;
+      const integertime_t ti_end =
+          get_integer_time_end(e->ti_current, gp->time_bin);
       gp->x_diff[0] = 0.f;
       gp->x_diff[1] = 0.f;
       gp->x_diff[2] = 0.f;
       if (ti_end < ti_end_min) ti_end_min = ti_end;
       if (ti_end > ti_end_max) ti_end_max = ti_end;
     }
+    for (int k = 0; k < scount; k++) {
+      struct spart *sp = &sparts[k];
+      const integertime_t ti_end =
+          get_integer_time_end(e->ti_current, sp->time_bin);
+      if (ti_end < ti_end_min) ti_end_min = ti_end;
+      if (ti_end > ti_end_max) ti_end_max = ti_end;
+    }
   }
 
   /* Set the values for this cell. */
@@ -1582,6 +2082,9 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) {
   if (s->nr_parts > 0)
     c->owner =
         ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts;
+  else if (s->nr_sparts > 0)
+    c->owner =
+        ((c->sparts - s->sparts) % s->nr_sparts) * s->nr_queues / s->nr_sparts;
   else if (s->nr_gparts > 0)
     c->owner =
         ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts;
@@ -1589,7 +2092,11 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) {
     c->owner = 0; /* Ok, there is really nothing on this rank... */
 
   /* Clean up. */
-  if (allocate_buffer) free(buff);
+  if (allocate_buffer) {
+    if (buff != NULL) free(buff);
+    if (gbuff != NULL) free(gbuff);
+    if (sbuff != NULL) free(sbuff);
+  }
 }
 
 /**
@@ -1608,7 +2115,7 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) {
 
   for (int ind = 0; ind < num_cells; ind++) {
     struct cell *c = &cells_top[ind];
-    space_split_recursive(s, c, NULL);
+    space_split_recursive(s, c, NULL, NULL, NULL);
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -1654,7 +2161,7 @@ void space_recycle(struct space *s, struct cell *c) {
  * @param s The #space.
  * @param list_begin Pointer to the first #cell in the linked list of
  *        cells joined by their @c next pointers.
- * @param list_begin Pointer to the last #cell in the linked list of
+ * @param list_end Pointer to the last #cell in the linked list of
  *        cells joined by their @c next pointers. It is assumed that this
  *        cell's @c next pointer is @c NULL.
  */
@@ -1695,39 +2202,46 @@ void space_recycle_list(struct space *s, struct cell *list_begin,
  * If we have no cells, allocate a new chunk of memory and pick one from there.
  *
  * @param s The #space.
+ * @param nr_cells Number of #cell to pick up.
+ * @param cells Array of @c nr_cells #cell pointers in which to store the
+ *        new cells.
  */
-struct cell *space_getcell(struct space *s) {
+void space_getcells(struct space *s, int nr_cells, struct cell **cells) {
 
   /* Lock the space. */
   lock_lock(&s->lock);
 
-  /* Is the buffer empty? */
-  if (s->cells_sub == NULL) {
-    if (posix_memalign((void *)&s->cells_sub, cell_align,
-                       space_cellallocchunk * sizeof(struct cell)) != 0)
-      error("Failed to allocate more cells.");
+  /* For each requested cell... */
+  for (int j = 0; j < nr_cells; j++) {
 
-    /* Constructed a linked list */
-    for (int k = 0; k < space_cellallocchunk - 1; k++)
-      s->cells_sub[k].next = &s->cells_sub[k + 1];
-    s->cells_sub[space_cellallocchunk - 1].next = NULL;
-  }
+    /* Is the buffer empty? */
+    if (s->cells_sub == NULL) {
+      if (posix_memalign((void *)&s->cells_sub, cell_align,
+                         space_cellallocchunk * sizeof(struct cell)) != 0)
+        error("Failed to allocate more cells.");
 
-  /* Pick off the next cell. */
-  struct cell *c = s->cells_sub;
-  s->cells_sub = c->next;
-  s->tot_cells += 1;
+      /* Constructed a linked list */
+      for (int k = 0; k < space_cellallocchunk - 1; k++)
+        s->cells_sub[k].next = &s->cells_sub[k + 1];
+      s->cells_sub[space_cellallocchunk - 1].next = NULL;
+    }
+
+    /* Pick off the next cell. */
+    cells[j] = s->cells_sub;
+    s->cells_sub = cells[j]->next;
+    s->tot_cells += 1;
+  }
 
   /* Unlock the space. */
   lock_unlock_blind(&s->lock);
 
   /* Init some things in the cell we just got. */
-  bzero(c, sizeof(struct cell));
-  c->nodeID = -1;
-  if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0)
-    error("Failed to initialize cell spinlocks.");
-
-  return c;
+  for (int j = 0; j < nr_cells; j++) {
+    bzero(cells[j], sizeof(struct cell));
+    cells[j]->nodeID = -1;
+    if (lock_init(&cells[j]->lock) != 0 || lock_init(&cells[j]->glock) != 0)
+      error("Failed to initialize cell spinlocks.");
+  }
 }
 
 /**
@@ -1754,6 +2268,11 @@ void space_init_parts(struct space *s) {
 #endif
 
     hydro_first_init_part(&p[i], &xp[i]);
+
+#ifdef SWIFT_DEBUG_CHECKS
+    p->ti_drift = 0;
+    p->ti_kick = 0;
+#endif
   }
 }
 
@@ -1800,6 +2319,32 @@ void space_init_gparts(struct space *s) {
   }
 }
 
+/**
+ * @brief Initialises all the s-particles by setting them into a valid state
+ *
+ * Calls star_first_init_spart() on all the particles
+ */
+void space_init_sparts(struct space *s) {
+
+  const size_t nr_sparts = s->nr_sparts;
+  struct spart *restrict sp = s->sparts;
+
+  for (size_t i = 0; i < nr_sparts; ++i) {
+
+#ifdef HYDRO_DIMENSION_2D
+    sp[i].x[2] = 0.f;
+    sp[i].v[2] = 0.f;
+#endif
+
+#ifdef HYDRO_DIMENSION_1D
+    sp[i].x[1] = sp[i].x[2] = 0.f;
+    sp[i].v[1] = sp[i].v[2] = 0.f;
+#endif
+
+    star_first_init_spart(&sp[i]);
+  }
+}
+
 /**
  * @brief Split the space into cells given the array of particles.
  *
@@ -1808,8 +2353,10 @@ void space_init_gparts(struct space *s) {
  * @param dim Spatial dimensions of the domain.
  * @param parts Array of Gas particles.
  * @param gparts Array of Gravity particles.
+ * @param sparts Array of star particles.
  * @param Npart The number of Gas particles in the space.
  * @param Ngpart The number of Gravity particles in the space.
+ * @param Nspart The number of star particles in the space.
  * @param periodic flag whether the domain is periodic or not.
  * @param gravity flag whether we are doing gravity or not.
  * @param verbose Print messages to stdout or not.
@@ -1822,8 +2369,9 @@ void space_init_gparts(struct space *s) {
  */
 void space_init(struct space *s, const struct swift_params *params,
                 double dim[3], struct part *parts, struct gpart *gparts,
-                size_t Npart, size_t Ngpart, int periodic, int gravity,
-                int verbose, int dry_run) {
+                struct spart *sparts, size_t Npart, size_t Ngpart,
+                size_t Nspart, int periodic, int gravity, int verbose,
+                int dry_run) {
 
   /* Clean-up everything */
   bzero(s, sizeof(struct space));
@@ -1841,6 +2389,9 @@ void space_init(struct space *s, const struct swift_params *params,
   s->nr_gparts = Ngpart;
   s->size_gparts = Ngpart;
   s->gparts = gparts;
+  s->nr_sparts = Nspart;
+  s->size_sparts = Nspart;
+  s->sparts = sparts;
   s->nr_queues = 1; /* Temporary value until engine construction */
 
   /* Decide on the minimal top-level cell size */
@@ -1900,6 +2451,11 @@ void space_init(struct space *s, const struct swift_params *params,
       gparts[k].x[1] += shift[1];
       gparts[k].x[2] += shift[2];
     }
+    for (size_t k = 0; k < Nspart; k++) {
+      sparts[k].x[0] += shift[0];
+      sparts[k].x[1] += shift[1];
+      sparts[k].x[2] += shift[2];
+    }
   }
 
   if (!dry_run) {
@@ -1931,9 +2487,23 @@ void space_init(struct space *s, const struct swift_params *params,
           if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j])
             error("Not all g-particles are within the specified domain.");
     }
+
+    /* Same for the sparts */
+    if (periodic) {
+      for (size_t k = 0; k < Nspart; k++)
+        for (int j = 0; j < 3; j++) {
+          while (sparts[k].x[j] < 0) sparts[k].x[j] += dim[j];
+          while (sparts[k].x[j] >= dim[j]) sparts[k].x[j] -= dim[j];
+        }
+    } else {
+      for (size_t k = 0; k < Nspart; k++)
+        for (int j = 0; j < 3; j++)
+          if (sparts[k].x[j] < 0 || sparts[k].x[j] >= dim[j])
+            error("Not all s-particles are within the specified domain.");
+    }
   }
 
-  /* Allocate the extra parts array. */
+  /* Allocate the extra parts array for the gas particles. */
   if (Npart > 0) {
     if (posix_memalign((void *)&s->xparts, xpart_align,
                        Npart * sizeof(struct xpart)) != 0)
@@ -1945,6 +2515,7 @@ void space_init(struct space *s, const struct swift_params *params,
   space_init_parts(s);
   space_init_xparts(s);
   space_init_gparts(s);
+  space_init_sparts(s);
 
   /* Init the space lock. */
   if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock.");
@@ -1974,12 +2545,25 @@ void space_link_cleanup(struct space *s) {
  * @param s The #space to check.
  * @param ti_current The (integer) time.
  */
-void space_check_drift_point(struct space *s, int ti_current) {
+void space_check_drift_point(struct space *s, integertime_t ti_current) {
 
   /* Recursively check all cells */
   space_map_cells_pre(s, 1, cell_check_drift_point, &ti_current);
 }
 
+/**
+ * @brief Checks that all particles and local cells have a non-zero time-step.
+ */
+void space_check_timesteps(struct space *s) {
+#ifdef SWIFT_DEBUG_CHECKS
+
+  for (int i = 0; i < s->nr_cells; ++i) {
+    cell_check_timesteps(&s->cells_top[i]);
+  }
+
+#endif
+}
+
 /**
  * @brief Frees up the memory allocated for this #space
  */
@@ -1990,4 +2574,5 @@ void space_clean(struct space *s) {
   free(s->parts);
   free(s->xparts);
   free(s->gparts);
+  free(s->sparts);
 }
diff --git a/src/space.h b/src/space.h
index 4aea2a07560865c8d8a474f069b370748e12e65e..a25149e8fe6971b24856a2a60cae23747fbc56ac 100644
--- a/src/space.h
+++ b/src/space.h
@@ -108,6 +108,9 @@ struct space {
   /*! The total number of g-parts in the space. */
   size_t nr_gparts, size_gparts;
 
+  /*! The total number of g-parts in the space. */
+  size_t nr_sparts, size_sparts;
+
   /*! The particle data (cells have pointers to this). */
   struct part *parts;
 
@@ -117,6 +120,9 @@ struct space {
   /*! The g-particle data (cells have pointers to this). */
   struct gpart *gparts;
 
+  /*! The s-particle data (cells have pointers to this). */
+  struct spart *sparts;
+
   /*! General-purpose lock for this space. */
   swift_lock_type lock;
 
@@ -139,6 +145,10 @@ struct space {
   struct gpart *gparts_foreign;
   size_t nr_gparts_foreign, size_gparts_foreign;
 
+  /*! Buffers for g-parts that we will receive from foreign cells. */
+  struct spart *sparts_foreign;
+  size_t nr_sparts_foreign, size_sparts_foreign;
+
 #endif
 };
 
@@ -147,13 +157,16 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose);
 void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
                        int verbose);
-struct cell *space_getcell(struct space *s);
+void space_sparts_sort(struct space *s, int *ind, size_t N, int min, int max,
+                       int verbose);
+void space_getcells(struct space *s, int nr_cells, struct cell **cells);
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift);
 void space_init(struct space *s, const struct swift_params *params,
                 double dim[3], struct part *parts, struct gpart *gparts,
-                size_t Npart, size_t Ngpart, int periodic, int gravity,
-                int verbose, int dry_run);
+                struct spart *sparts, size_t Npart, size_t Ngpart,
+                size_t Nspart, int periodic, int gravity, int verbose,
+                int dry_run);
 void space_sanitize(struct space *s);
 void space_map_cells_pre(struct space *s, int full,
                          void (*fun)(struct cell *c, void *data), void *data);
@@ -169,6 +182,8 @@ void space_parts_sort_mapper(void *map_data, int num_elements,
                              void *extra_data);
 void space_gparts_sort_mapper(void *map_data, int num_elements,
                               void *extra_data);
+void space_sparts_sort_mapper(void *map_data, int num_elements,
+                              void *extra_data);
 void space_rebuild(struct space *s, int verbose);
 void space_recycle(struct space *s, struct cell *c);
 void space_recycle_list(struct space *s, struct cell *list_begin,
@@ -180,12 +195,17 @@ void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
                                 int verbose);
 void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
                                  int verbose);
+void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells,
+                                 int verbose);
 void space_do_parts_sort();
 void space_do_gparts_sort();
+void space_do_sparts_sort();
 void space_init_parts(struct space *s);
 void space_init_gparts(struct space *s);
+void space_init_sparts(struct space *s);
 void space_link_cleanup(struct space *s);
-void space_check_drift_point(struct space *s, int ti_current);
+void space_check_drift_point(struct space *s, integertime_t ti_current);
+void space_check_timesteps(struct space *s);
 void space_clean(struct space *s);
 
 #endif /* SWIFT_SPACE_H */
diff --git a/src/stars.h b/src/stars.h
new file mode 100644
index 0000000000000000000000000000000000000000..ade47ff57298c13bf205e991548945576a802293
--- /dev/null
+++ b/src/stars.h
@@ -0,0 +1,30 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_STAR_H
+#define SWIFT_STAR_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* So far only one model here */
+/* Straight-forward import */
+#include "./stars/Default/star.h"
+#include "./stars/Default/star_iact.h"
+
+#endif
diff --git a/src/stars/Default/star.h b/src/stars/Default/star.h
new file mode 100644
index 0000000000000000000000000000000000000000..61ae4aeb5c51e18e39c3f4c6855d7c6ddfe05abb
--- /dev/null
+++ b/src/stars/Default/star.h
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DEFAULT_STAR_H
+#define SWIFT_DEFAULT_STAR_H
+
+#include <float.h>
+#include "minmax.h"
+
+/**
+ * @brief Computes the gravity time-step of a given star particle.
+ *
+ * @param sp Pointer to the s-particle data.
+ */
+__attribute__((always_inline)) INLINE static float star_compute_timestep(
+    const struct spart* const sp) {
+
+  return FLT_MAX;
+}
+
+/**
+ * @brief Initialises the s-particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param sp The particle to act upon
+ */
+__attribute__((always_inline)) INLINE static void star_first_init_spart(
+    struct spart* sp) {
+
+  sp->time_bin = 0;
+}
+
+/**
+ * @brief Prepares a s-particle for its interactions
+ *
+ * @param sp The particle to act upon
+ */
+__attribute__((always_inline)) INLINE static void star_init_spart(
+    struct spart* sp) {}
+
+/**
+ * @brief Sets the values to be predicted in the drifts to their values at a
+ * kick time
+ *
+ * @param sp The particle.
+ */
+__attribute__((always_inline)) INLINE static void star_reset_predicted_values(
+    struct spart* restrict sp) {}
+
+/**
+ * @brief Finishes the calculation of (non-gravity) forces acting on stars
+ *
+ * Multiplies the forces and accelerations by the appropiate constants
+ *
+ * @param sp The particle to act upon
+ */
+__attribute__((always_inline)) INLINE static void star_end_force(
+    struct spart* sp) {}
+
+/**
+ * @brief Kick the additional variables
+ *
+ * @param sp The particle to act upon
+ * @param dt The time-step for this kick
+ */
+__attribute__((always_inline)) INLINE static void star_kick_extra(
+    struct spart* sp, float dt) {}
+
+#endif /* SWIFT_DEFAULT_STAR_H */
diff --git a/src/stars/Default/star_debug.h b/src/stars/Default/star_debug.h
new file mode 100644
index 0000000000000000000000000000000000000000..d940afac2eb67c97481f48a4bda6fa56085166d5
--- /dev/null
+++ b/src/stars/Default/star_debug.h
@@ -0,0 +1,31 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DEFAULT_STAR_DEBUG_H
+#define SWIFT_DEFAULT_STAR_DEBUG_H
+
+__attribute__((always_inline)) INLINE static void star_debug_particle(
+    const struct spart* p) {
+  printf(
+      "x=[%.3e,%.3e,%.3e], "
+      "v_full=[%.3e,%.3e,%.3e] p->mass=%.3e \n t_begin=%d, t_end=%d\n",
+      p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2],
+      p->mass, p->ti_begin, p->ti_end);
+}
+
+#endif /* SWIFT_DEFAULT_STAR_DEBUG_H */
diff --git a/src/stars/Default/star_iact.h b/src/stars/Default/star_iact.h
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/stars/Default/star_io.h b/src/stars/Default/star_io.h
new file mode 100644
index 0000000000000000000000000000000000000000..96bbdce6d83dc241d05e7dd1754f476dc0b8e5f9
--- /dev/null
+++ b/src/stars/Default/star_io.h
@@ -0,0 +1,72 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DEFAULT_STAR_IO_H
+#define SWIFT_DEFAULT_STAR_IO_H
+
+#include "io_properties.h"
+
+/**
+ * @brief Specifies which s-particle fields to read from a dataset
+ *
+ * @param sparts The s-particle array.
+ * @param list The list of i/o properties to read.
+ * @param num_fields The number of i/o fields to read.
+ */
+void star_read_particles(struct spart* sparts, struct io_props* list,
+                         int* num_fields) {
+
+  /* Say how much we want to read */
+  *num_fields = 4;
+
+  /* List what we want to read */
+  list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY,
+                                UNIT_CONV_LENGTH, sparts, x);
+  list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY,
+                                UNIT_CONV_SPEED, sparts, v);
+  list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS,
+                                sparts, mass);
+  list[3] = io_make_input_field("ParticleIDs", LONGLONG, 1, COMPULSORY,
+                                UNIT_CONV_NO_UNITS, sparts, id);
+}
+
+/**
+ * @brief Specifies which s-particle fields to write to a dataset
+ *
+ * @param sparts The s-particle array.
+ * @param list The list of i/o properties to write.
+ * @param num_fields The number of i/o fields to write.
+ */
+void star_write_particles(struct spart* sparts, struct io_props* list,
+                          int* num_fields) {
+
+  /* Say how much we want to read */
+  *num_fields = 4;
+
+  /* List what we want to read */
+  list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
+                                 sparts, x);
+  list[1] =
+      io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, sparts, v);
+  list[2] =
+      io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, sparts, mass);
+  list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS,
+                                 sparts, id);
+}
+
+#endif /* SWIFT_DEFAULT_STAR_IO_H */
diff --git a/src/stars/Default/star_part.h b/src/stars/Default/star_part.h
new file mode 100644
index 0000000000000000000000000000000000000000..e958e3d68bc58855a4f57f24d876cfaf73362bd6
--- /dev/null
+++ b/src/stars/Default/star_part.h
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_DEFAULT_STAR_PART_H
+#define SWIFT_DEFAULT_STAR_PART_H
+
+/* Some standard headers. */
+#include <stdlib.h>
+
+/**
+ * @brief Particle fields for the star particles.
+ *
+ * All quantities related to gravity are stored in the associate #gpart.
+ */
+struct spart {
+
+  /*! Particle ID. */
+  long long id;
+
+  /*! Pointer to corresponding gravity part. */
+  struct gpart* gpart;
+
+  /*! Particle position. */
+  double x[3];
+
+  /*! Particle velocity. */
+  float v[3];
+
+  /*! Star mass */
+  float mass;
+
+  /*! Particle time bin */
+  timebin_t time_bin;
+
+} SWIFT_STRUCT_ALIGN;
+
+#endif /* SWIFT_DEFAULT_STAR_PART_H */
diff --git a/src/stars_io.h b/src/stars_io.h
new file mode 100644
index 0000000000000000000000000000000000000000..18a13ec19163008f1c8e9f64cf544ddf812db655
--- /dev/null
+++ b/src/stars_io.h
@@ -0,0 +1,26 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_STAR_IO_H
+#define SWIFT_STAR_IO_H
+
+#include "./const.h"
+
+#include "./stars/Default/star_io.h"
+
+#endif /* SWIFT_STAR_IO_H */
diff --git a/src/statistics.c b/src/statistics.c
index 7a567a447a7514634435823e03bec5e4ac157d4e..297d88c1f25c1b5b42be8edcd1282fd437964894 100644
--- a/src/statistics.c
+++ b/src/statistics.c
@@ -104,8 +104,9 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) {
   const struct part *restrict parts = (struct part *)map_data;
   const struct xpart *restrict xparts =
       s->xparts + (ptrdiff_t)(parts - s->parts);
-  const int ti_current = s->e->ti_current;
+  const integertime_t ti_current = s->e->ti_current;
   const double timeBase = s->e->timeBase;
+  const double time = s->e->time;
   struct statistics *const global_stats = data->stats;
 
   /* Required for external potential energy */
@@ -124,20 +125,27 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) {
     const struct xpart *xp = &xparts[k];
     const struct gpart *gp = (p->gpart != NULL) ? gp = p->gpart : NULL;
 
-    /* Get useful variables */
-    const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
-    const double x[3] = {p->x[0], p->x[1], p->x[2]};
+    /* Get useful time variables */
+    const integertime_t ti_begin =
+        get_integer_time_begin(ti_current, p->time_bin);
+    const integertime_t ti_step = get_integer_timestep(p->time_bin);
+    const float dt = (ti_current - (ti_begin + ti_step / 2)) * timeBase;
+
+    /* Get the total acceleration */
     float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]};
     if (gp != NULL) {
       a_tot[0] += gp->a_grav[0];
       a_tot[1] += gp->a_grav[1];
       a_tot[2] += gp->a_grav[2];
     }
+
+    /* Extrapolate velocities to current time */
     const float v[3] = {xp->v_full[0] + a_tot[0] * dt,
                         xp->v_full[1] + a_tot[1] * dt,
                         xp->v_full[2] + a_tot[2] * dt};
 
     const float m = hydro_get_mass(p);
+    const double x[3] = {p->x[0], p->x[1], p->x[2]};
 
     /* Collect mass */
     stats.mass += m;
@@ -154,15 +162,14 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) {
 
     /* Collect energies. */
     stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+    stats.E_int += m * hydro_get_internal_energy(p);
+    stats.E_rad += cooling_get_radiated_energy(xp);
     stats.E_pot_self += 0.f;
     if (gp != NULL)
-      stats.E_pot_ext +=
-          m * external_gravity_get_potential_energy(potential, phys_const, gp);
-    stats.E_int += m * hydro_get_internal_energy(p, dt);
-    stats.E_rad += cooling_get_radiated_energy(xp);
-
+      stats.E_pot_ext += m * external_gravity_get_potential_energy(
+                                 time, potential, phys_const, gp);
     /* Collect entropy */
-    stats.entropy += m * hydro_get_entropy(p, dt);
+    stats.entropy += m * hydro_get_entropy(p);
   }
 
   /* Now write back to memory */
@@ -184,8 +191,9 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts,
   const struct index_data *data = (struct index_data *)extra_data;
   const struct space *s = data->s;
   const struct gpart *restrict gparts = (struct gpart *)map_data;
-  const int ti_current = s->e->ti_current;
+  const integertime_t ti_current = s->e->ti_current;
   const double timeBase = s->e->timeBase;
+  const double time = s->e->time;
   struct statistics *const global_stats = data->stats;
 
   /* Required for external potential energy */
@@ -206,13 +214,18 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts,
     if (gp->id_or_neg_offset < 0) continue;
 
     /* Get useful variables */
-    const float dt = (ti_current - (gp->ti_begin + gp->ti_end) / 2) * timeBase;
-    const double x[3] = {gp->x[0], gp->x[1], gp->x[2]};
+    const integertime_t ti_begin =
+        get_integer_time_begin(ti_current, gp->time_bin);
+    const integertime_t ti_step = get_integer_timestep(gp->time_bin);
+    const float dt = (ti_current - (ti_begin + ti_step / 2)) * timeBase;
+
+    /* Extrapolate velocities */
     const float v[3] = {gp->v_full[0] + gp->a_grav[0] * dt,
                         gp->v_full[1] + gp->a_grav[1] * dt,
                         gp->v_full[2] + gp->a_grav[2] * dt};
 
     const float m = gp->mass;
+    const double x[3] = {gp->x[0], gp->x[1], gp->x[2]};
 
     /* Collect mass */
     stats.mass += m;
@@ -230,8 +243,8 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts,
     /* Collect energies. */
     stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
     stats.E_pot_self += 0.f;
-    stats.E_pot_ext +=
-        m * external_gravity_get_potential_energy(potential, phys_const, gp);
+    stats.E_pot_ext += m * external_gravity_get_potential_energy(
+                               time, potential, phys_const, gp);
   }
 
   /* Now write back to memory */
diff --git a/src/swift.h b/src/swift.h
index 2928c263525f57a7ee999b50547aa374b456f556..c08a4f3209d9eea0fe02ad9112179a0ed7ccae1e 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -23,7 +23,9 @@
 #include "../config.h"
 
 /* Local headers. */
+#include "active.h"
 #include "atomic.h"
+#include "cache.h"
 #include "cell.h"
 #include "clocks.h"
 #include "const.h"
@@ -53,6 +55,7 @@
 #include "sourceterms.h"
 #include "space.h"
 #include "task.h"
+#include "timeline.h"
 #include "timers.h"
 #include "tools.h"
 #include "units.h"
diff --git a/src/task.c b/src/task.c
index ea97fdd1bb930d005889fa7c73a3f2cb7b5f054a..b05d782af305b25bf95b25279c6abc2e1f4037c2 100644
--- a/src/task.c
+++ b/src/task.c
@@ -48,13 +48,15 @@
 
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
-    "none",     "sort",    "self",          "pair",        "sub_self",
-    "sub_pair", "init",    "ghost",         "extra_ghost", "kick",
-    "send",     "recv",    "grav_gather_m", "grav_fft",    "grav_mm",
-    "grav_up",  "cooling", "sourceterms"};
+    "none",          "sort",     "self",     "pair",        "sub_self",
+    "sub_pair",      "init",     "ghost",    "extra_ghost", "drift",
+    "kick1",         "kick2",    "timestep", "send",        "recv",
+    "grav_gather_m", "grav_fft", "grav_mm",  "grav_up",     "cooling",
+    "sourceterms"};
 
 const char *subtaskID_names[task_subtype_count] = {
-    "none", "density", "gradient", "force", "grav", "external_grav", "tend"};
+    "none", "density", "gradient", "force", "grav", "external_grav",
+    "tend", "xv",      "rho",      "gpart", "spart"};
 
 /**
  * @brief Computes the overlap between the parts array of two given cells.
@@ -147,9 +149,12 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       break;
 
     case task_type_init:
-    case task_type_kick:
+    case task_type_kick1:
+    case task_type_kick2:
+    case task_type_timestep:
     case task_type_send:
     case task_type_recv:
+    case task_type_drift:
       if (t->ci->count > 0 && t->ci->gcount > 0)
         return task_action_all;
       else if (t->ci->count > 0)
@@ -260,6 +265,11 @@ void task_unlock(struct task *t) {
   /* Act based on task type. */
   switch (type) {
 
+    case task_type_drift:
+      cell_unlocktree(ci);
+      cell_gunlocktree(ci);
+      break;
+
     case task_type_sort:
       cell_unlocktree(ci);
       break;
@@ -327,6 +337,15 @@ int task_lock(struct task *t) {
 #endif
       break;
 
+    case task_type_drift:
+      if (ci->hold || ci->ghold) return 0;
+      if (cell_locktree(ci) != 0) return 0;
+      if (cell_glocktree(ci) != 0) {
+        cell_unlocktree(ci);
+        return 0;
+      }
+      break;
+
     case task_type_sort:
       if (cell_locktree(ci) != 0) return 0;
       break;
diff --git a/src/task.h b/src/task.h
index c9425fdd137e2c1708dbd05436d1db685bdd3bfd..f2733318a34421fa39f3130f9e76f1ed09246d55 100644
--- a/src/task.h
+++ b/src/task.h
@@ -45,7 +45,10 @@ enum task_types {
   task_type_init,
   task_type_ghost,
   task_type_extra_ghost,
-  task_type_kick,
+  task_type_drift,
+  task_type_kick1,
+  task_type_kick2,
+  task_type_timestep,
   task_type_send,
   task_type_recv,
   task_type_grav_gather_m,
@@ -68,6 +71,10 @@ enum task_subtypes {
   task_subtype_grav,
   task_subtype_external_grav,
   task_subtype_tend,
+  task_subtype_xv,
+  task_subtype_rho,
+  task_subtype_gpart,
+  task_subtype_spart,
   task_subtype_count
 } __attribute__((packed));
 
@@ -157,6 +164,10 @@ struct task {
   ticks tic, toc;
 #endif
 
+#ifdef SWIFT_DEBUG_CHECKS
+  int ti_run;
+#endif
+
 } SWIFT_STRUCT_ALIGN;
 
 /* Function prototypes. */
diff --git a/src/threadpool.c b/src/threadpool.c
index 35e5f2139de0689d9761d0d8f19030a076329cba..c11fd8121bb02f36fce1796d79a7eb55a38102c4 100644
--- a/src/threadpool.c
+++ b/src/threadpool.c
@@ -90,7 +90,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) {
   /* Initialize the thread counters. */
   tp->num_threads = num_threads;
   tp->num_threads_waiting = 0;
-  
+
   /* If there is only a single thread, do nothing more as of here as
      we will just do work in the (blocked) calling thread. */
   if (num_threads == 1) return;
@@ -147,7 +147,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) {
 void threadpool_map(struct threadpool *tp, threadpool_map_function map_function,
                     void *map_data, size_t N, int stride, int chunk,
                     void *extra_data) {
-                    
+
   /* If we just have a single thread, call the map function directly. */
   if (tp->num_threads == 1) {
     map_function(map_data, N, extra_data);
diff --git a/src/threadpool.h b/src/threadpool.h
index 76aa0c119610c4d540e117f046b286095a9c676d..f9c7eeffb700adc579ec05902193b888cdd6363d 100644
--- a/src/threadpool.h
+++ b/src/threadpool.h
@@ -32,9 +32,6 @@ typedef void (*threadpool_map_function)(void *map_data, int num_elements,
 /* Data of a threadpool. */
 struct threadpool {
 
-  /* Number of threads in this pool. */
-  int num_threads;
-
   /* The threads themselves. */
   pthread_t *threads;
 
@@ -48,6 +45,9 @@ struct threadpool {
       map_data_chunk;
   volatile threadpool_map_function map_function;
 
+  /* Number of threads in this pool. */
+  int num_threads;
+
   /* Counter for the number of threads that are done. */
   volatile int num_threads_waiting, num_threads_running;
 };
diff --git a/src/timeline.h b/src/timeline.h
new file mode 100644
index 0000000000000000000000000000000000000000..c73b2432b219a8ab0254d21c59102841557a57b9
--- /dev/null
+++ b/src/timeline.h
@@ -0,0 +1,122 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_TIMELINE_H
+#define SWIFT_TIMELINE_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "inline.h"
+#include "intrinsics.h"
+
+#include <math.h>
+
+typedef long long integertime_t;
+typedef char timebin_t;
+
+/*! The number of time bins */
+#define num_time_bins 56
+
+/*! The maximal number of timesteps in a simulation */
+#define max_nr_timesteps (1LL << (num_time_bins + 1))
+
+/**
+ * @brief Returns the integer time interval corresponding to a time bin
+ *
+ * @param bin The time bin of interest.
+ */
+static INLINE integertime_t get_integer_timestep(timebin_t bin) {
+
+  if (bin <= 0) return 0;
+  return 1LL << (bin + 1);
+}
+
+/**
+ * @brief Returns the time bin corresponding to a given time_step size.
+ *
+ * Assumes that integertime_t maps to an unsigned long long.
+ */
+static INLINE timebin_t get_time_bin(integertime_t time_step) {
+
+  /* ((int) log_2(time_step)) - 1 */
+  return (timebin_t)(62 - intrinsics_clzll(time_step));
+}
+
+/**
+ * @brief Returns the physical time interval corresponding to a time bin.
+ *
+ * @param bin The time bin of interest.
+ * @param timeBase the minimal time-step size of the simulation.
+ */
+static INLINE double get_timestep(timebin_t bin, double timeBase) {
+
+  return get_integer_timestep(bin) * timeBase;
+}
+
+/**
+ * @brief Returns the integer time corresponding to the start of the time-step
+ * given by a time-bin.
+ *
+ * @param ti_current The current time on the integer time line.
+ * @param bin The time bin of interest.
+ */
+static INLINE integertime_t get_integer_time_begin(integertime_t ti_current,
+                                                   timebin_t bin) {
+
+  const integertime_t dti = get_integer_timestep(bin);
+  if (dti == 0)
+    return 0;
+  else
+    return dti * ((ti_current - 1) / dti);
+}
+
+/**
+ * @brief Returns the integer time corresponding to the start of the time-step
+ * given by a time-bin.
+ *
+ * @param ti_current The current time on the integer time line.
+ * @param bin The time bin of interest.
+ */
+static INLINE integertime_t get_integer_time_end(integertime_t ti_current,
+                                                 timebin_t bin) {
+
+  const integertime_t dti = get_integer_timestep(bin);
+  if (dti == 0)
+    return 0;
+  else
+    return dti * ceil((double)ti_current / (double)dti);
+}
+
+/**
+ * @brief Returns the highest active time bin at a given point on the time line.
+ *
+ * @param time The current point on the time line.
+ */
+static INLINE timebin_t get_max_active_bin(integertime_t time) {
+
+  if (time == 0) return num_time_bins;
+
+  timebin_t bin = 1;
+  while (!((1LL << (bin + 1)) & time)) ++bin;
+
+  return bin;
+}
+
+#endif /* SWIFT_TIMELINE_H */
diff --git a/src/timers.h b/src/timers.h
index bc877d4094425a4948290d2c7c099f49cbd44280..50f630e7fc355808596967d8d7d887583674d24a 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -33,7 +33,10 @@ enum {
   timer_prepare,
   timer_init,
   timer_drift,
-  timer_kick,
+  timer_kick1,
+  timer_kick2,
+  timer_timestep,
+  timer_endforce,
   timer_dosort,
   timer_doself_density,
   timer_doself_gradient,
@@ -57,7 +60,9 @@ enum {
   timer_dopair_subset,
   timer_do_ghost,
   timer_do_extra_ghost,
-  timer_dorecv_cell,
+  timer_dorecv_part,
+  timer_dorecv_gpart,
+  timer_dorecv_spart,
   timer_gettask,
   timer_qget,
   timer_qsteal,
diff --git a/src/timestep.h b/src/timestep.h
index db52911ec1e8fbf31f35e8877e0a7ae7ba5ee478..432f0fd2c4eb713e11272546cfe84e8f6c342cbd 100644
--- a/src/timestep.h
+++ b/src/timestep.h
@@ -23,39 +23,41 @@
 #include "../config.h"
 
 /* Local headers. */
-#include "const.h"
 #include "cooling.h"
 #include "debug.h"
+#include "timeline.h"
+
 /**
  * @brief Compute a valid integer time-step form a given time-step
  *
  * @param new_dt The time-step to convert.
- * @param ti_begin The (integer) start of the previous time-step.
- * @param ti_end The (integer) end of the previous time-step.
+ * @param old_bin The old time bin.
+ * @param ti_current The current time on the integer time-line.
  * @param timeBase_inv The inverse of the system's minimal time-step.
  */
-__attribute__((always_inline)) INLINE static int get_integer_timestep(
-    float new_dt, int ti_begin, int ti_end, double timeBase_inv) {
+__attribute__((always_inline)) INLINE static integertime_t
+make_integer_timestep(float new_dt, timebin_t old_bin, integertime_t ti_current,
+                      double timeBase_inv) {
 
   /* Convert to integer time */
-  int new_dti = (int)(new_dt * timeBase_inv);
+  integertime_t new_dti = (integertime_t)(new_dt * timeBase_inv);
 
-  /* Recover the current timestep */
-  const int current_dti = ti_end - ti_begin;
+  /* Current time-step */
+  integertime_t current_dti = get_integer_timestep(old_bin);
+  integertime_t ti_end = get_integer_time_end(ti_current, old_bin);
 
   /* Limit timestep increase */
-  if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);
+  if (old_bin > 0) new_dti = min(new_dti, 2 * current_dti);
 
   /* Put this timestep on the time line */
-  int dti_timeline = max_nr_timesteps;
-  while (new_dti < dti_timeline) dti_timeline /= 2;
+  integertime_t dti_timeline = max_nr_timesteps;
+  while (new_dti < dti_timeline) dti_timeline /= 2LL;
   new_dti = dti_timeline;
 
   /* Make sure we are allowed to increase the timestep size */
   if (new_dti > current_dti) {
     if ((max_nr_timesteps - ti_end) % new_dti > 0) new_dti = current_dti;
   }
-
   return new_dti;
 }
 
@@ -65,25 +67,26 @@ __attribute__((always_inline)) INLINE static int get_integer_timestep(
  * @param gp The #gpart.
  * @param e The #engine (used to get some constants).
  */
-__attribute__((always_inline)) INLINE static int get_gpart_timestep(
+__attribute__((always_inline)) INLINE static integertime_t get_gpart_timestep(
     const struct gpart *restrict gp, const struct engine *restrict e) {
 
-  const float new_dt_external = external_gravity_timestep(
-      e->time, e->external_potential, e->physical_constants, gp);
+  float new_dt = FLT_MAX;
 
-  /* const float new_dt_self = */
-  /*     gravity_compute_timestep_self(e->physical_constants, gp); */
-  const float new_dt_self = FLT_MAX;  // MATTHIEU
+  if (e->policy & engine_policy_external_gravity)
+    new_dt =
+        min(new_dt, external_gravity_timestep(e->time, e->external_potential,
+                                              e->physical_constants, gp));
 
-  float new_dt = min(new_dt_external, new_dt_self);
+  if (e->policy & engine_policy_self_gravity)
+    new_dt = min(new_dt, gravity_compute_timestep_self(gp));
 
   /* Limit timestep within the allowed range */
   new_dt = min(new_dt, e->dt_max);
   new_dt = max(new_dt, e->dt_min);
 
   /* Convert to integer time */
-  const int new_dti =
-      get_integer_timestep(new_dt, gp->ti_begin, gp->ti_end, e->timeBase_inv);
+  const integertime_t new_dti = make_integer_timestep(
+      new_dt, gp->time_bin, e->ti_current, e->timeBase_inv);
 
   return new_dti;
 }
@@ -95,7 +98,7 @@ __attribute__((always_inline)) INLINE static int get_gpart_timestep(
  * @param xp The #xpart partner of p.
  * @param e The #engine (used to get some constants).
  */
-__attribute__((always_inline)) INLINE static int get_part_timestep(
+__attribute__((always_inline)) INLINE static integertime_t get_part_timestep(
     const struct part *restrict p, const struct xpart *restrict xp,
     const struct engine *restrict e) {
 
@@ -112,14 +115,13 @@ __attribute__((always_inline)) INLINE static int get_part_timestep(
   float new_dt_grav = FLT_MAX;
   if (p->gpart != NULL) {
 
-    const float new_dt_external = external_gravity_timestep(
-        e->time, e->external_potential, e->physical_constants, p->gpart);
-
-    /* const float new_dt_self = */
-    /*     gravity_compute_timestep_self(e->physical_constants, p->gpart); */
-    const float new_dt_self = FLT_MAX;  // MATTHIEU
+    if (e->policy & engine_policy_external_gravity)
+      new_dt_grav = min(new_dt_grav, external_gravity_timestep(
+                                         e->time, e->external_potential,
+                                         e->physical_constants, p->gpart));
 
-    new_dt_grav = min(new_dt_external, new_dt_self);
+    if (e->policy & engine_policy_self_gravity)
+      new_dt_grav = min(new_dt_grav, gravity_compute_timestep_self(p->gpart));
   }
 
   /* Final time-step is minimum of hydro and gravity */
@@ -138,8 +140,38 @@ __attribute__((always_inline)) INLINE static int get_part_timestep(
   new_dt = max(new_dt, e->dt_min);
 
   /* Convert to integer time */
-  const int new_dti =
-      get_integer_timestep(new_dt, p->ti_begin, p->ti_end, e->timeBase_inv);
+  const integertime_t new_dti = make_integer_timestep(
+      new_dt, p->time_bin, e->ti_current, e->timeBase_inv);
+
+  return new_dti;
+}
+
+/**
+ * @brief Compute the new (integer) time-step of a given #spart
+ *
+ * @param sp The #spart.
+ * @param e The #engine (used to get some constants).
+ */
+__attribute__((always_inline)) INLINE static integertime_t get_spart_timestep(
+    const struct spart *restrict sp, const struct engine *restrict e) {
+
+  float new_dt = star_compute_timestep(sp);
+
+  if (e->policy & engine_policy_external_gravity)
+    new_dt = min(new_dt,
+                 external_gravity_timestep(e->time, e->external_potential,
+                                           e->physical_constants, sp->gpart));
+
+  if (e->policy & engine_policy_self_gravity)
+    new_dt = min(new_dt, gravity_compute_timestep_self(sp->gpart));
+
+  /* Limit timestep within the allowed range */
+  new_dt = min(new_dt, e->dt_max);
+  new_dt = max(new_dt, e->dt_min);
+
+  /* Convert to integer time */
+  const integertime_t new_dti = make_integer_timestep(
+      new_dt, sp->time_bin, e->ti_current, e->timeBase_inv);
 
   return new_dti;
 }
diff --git a/src/tools.c b/src/tools.c
index e526bb1b838f6d97b72eadb4070f3f2a94938c04..ab11d1f5930cf5319aaf6424f1559f144718e154 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -558,7 +558,181 @@ void shuffle_particles(struct part *parts, const int count) {
 }
 
 /**
- * @brief Computes the forces between all g-particles using the N^2 algorithm
+ * @brief Compares two values based on their relative difference: |a - b|/|a +
+ * b|
+ *
+ * @param a Value a
+ * @param b Value b
+ * @param threshold The limit on the relative difference between the two values
+ * @param absDiff Absolute difference: |a - b|
+ * @param absSum Absolute sum: |a + b|
+ * @param relDiff Relative difference: |a - b|/|a + b|
+ *
+ * @return 1 if difference found, 0 otherwise
+ */
+int compare_values(double a, double b, double threshold, double *absDiff,
+                   double *absSum, double *relDiff) {
+
+  int result = 0;
+  *absDiff = 0.0, *absSum = 0.0, *relDiff = 0.0;
+
+  *absDiff = fabs(a - b);
+  *absSum = fabs(a + b);
+  if (*absSum > 0.f) {
+    *relDiff = *absDiff / *absSum;
+  }
+
+  if (*relDiff > threshold) {
+    result = 1;
+  }
+
+  return result;
+}
+
+/**
+ * @brief Compares two particles' properties using the relative difference and a
+ * threshold.
+ *
+ * @param a Particle A
+ * @param b Particle B
+ * @param threshold The limit on the relative difference between the two values
+ *
+ * @return 1 if difference found, 0 otherwise
+ */
+int compare_particles(struct part a, struct part b, double threshold) {
+
+#ifdef GADGET2_SPH
+
+  int result = 0;
+  double absDiff = 0.0, absSum = 0.0, relDiff = 0.0;
+
+  for (int k = 0; k < 3; k++) {
+    if (compare_values(a.x[k], b.x[k], threshold, &absDiff, &absSum,
+                       &relDiff)) {
+      message(
+          "Relative difference (%e) larger than tolerance (%e) for x[%d] of "
+          "particle %lld.",
+          relDiff, threshold, k, a.id);
+      message("a = %e, b = %e", a.x[k], b.x[k]);
+      result = 1;
+    }
+  }
+  for (int k = 0; k < 3; k++) {
+    if (compare_values(a.v[k], b.v[k], threshold, &absDiff, &absSum,
+                       &relDiff)) {
+      message(
+          "Relative difference (%e) larger than tolerance (%e) for v[%d] of "
+          "particle %lld.",
+          relDiff, threshold, k, a.id);
+      message("a = %e, b = %e", a.v[k], b.v[k]);
+      result = 1;
+    }
+  }
+  for (int k = 0; k < 3; k++) {
+    if (compare_values(a.a_hydro[k], b.a_hydro[k], threshold, &absDiff, &absSum,
+                       &relDiff)) {
+      message(
+          "Relative difference (%e) larger than tolerance (%e) for a_hydro[%d] "
+          "of particle %lld.",
+          relDiff, threshold, k, a.id);
+      message("a = %e, b = %e", a.a_hydro[k], b.a_hydro[k]);
+      result = 1;
+    }
+  }
+  if (compare_values(a.rho, b.rho, threshold, &absDiff, &absSum, &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for rho of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.rho, b.rho);
+    result = 1;
+  }
+  if (compare_values(a.density.rho_dh, b.density.rho_dh, threshold, &absDiff,
+                     &absSum, &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for rho_dh of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.density.rho_dh, b.density.rho_dh);
+    result = 1;
+  }
+  if (compare_values(a.density.wcount, b.density.wcount, threshold, &absDiff,
+                     &absSum, &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for wcount of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.density.wcount, b.density.wcount);
+    result = 1;
+  }
+  if (compare_values(a.density.wcount_dh, b.density.wcount_dh, threshold,
+                     &absDiff, &absSum, &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for wcount_dh of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.density.wcount_dh, b.density.wcount_dh);
+    result = 1;
+  }
+  if (compare_values(a.force.h_dt, b.force.h_dt, threshold, &absDiff, &absSum,
+                     &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for h_dt of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.force.h_dt, b.force.h_dt);
+    result = 1;
+  }
+  if (compare_values(a.force.v_sig, b.force.v_sig, threshold, &absDiff, &absSum,
+                     &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for v_sig of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.force.v_sig, b.force.v_sig);
+    result = 1;
+  }
+  if (compare_values(a.entropy_dt, b.entropy_dt, threshold, &absDiff, &absSum,
+                     &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for entropy_dt of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.entropy_dt, b.entropy_dt);
+    result = 1;
+  }
+  if (compare_values(a.density.div_v, b.density.div_v, threshold, &absDiff,
+                     &absSum, &relDiff)) {
+    message(
+        "Relative difference (%e) larger than tolerance (%e) for div_v of "
+        "particle %lld.",
+        relDiff, threshold, a.id);
+    message("a = %e, b = %e", a.density.div_v, b.density.div_v);
+    result = 1;
+  }
+  for (int k = 0; k < 3; k++) {
+    if (compare_values(a.density.rot_v[k], b.density.rot_v[k], threshold,
+                       &absDiff, &absSum, &relDiff)) {
+      message(
+          "Relative difference (%e) larger than tolerance (%e) for rot_v[%d] "
+          "of particle %lld.",
+          relDiff, threshold, k, a.id);
+      message("a = %e, b = %e", a.density.rot_v[k], b.density.rot_v[k]);
+      result = 1;
+    }
+  }
+
+  return result;
+
+#else
+
+  error("Function not supported for this flavour of SPH");
+  return 0;
+
+#endif
+}
+
+/** @brief Computes the forces between all g-particles using the N^2 algorithm
  *
  * Overwrites the accelerations of the gparts with the values.
  * Do not use for actual runs.
diff --git a/src/tools.h b/src/tools.h
index 43ddd946c3e8cdf53139bb917135dffd8a8acd12..ece3078dce7cc8ab4b15538a1e5d9a990d81b36d 100644
--- a/src/tools.h
+++ b/src/tools.h
@@ -47,4 +47,8 @@ void shuffle_particles(struct part *parts, const int count);
 void gravity_n2(struct gpart *gparts, const int gcount,
                 const struct phys_const *constants, float rlr);
 
+int compare_values(double a, double b, double threshold, double *absDiff,
+                   double *absSum, double *relDiff);
+int compare_particles(struct part a, struct part b, double threshold);
+
 #endif /* SWIFT_TOOL_H */
diff --git a/src/vector.h b/src/vector.h
index 53869fd2594227d3332d7435f47cdff7cded224b..5e7c978ce6c3df9b1fbc47be2a43ee76c85a352a 100644
--- a/src/vector.h
+++ b/src/vector.h
@@ -46,19 +46,33 @@
 #define VEC_FLOAT __m512
 #define VEC_DBL __m512d
 #define VEC_INT __m512i
+#define KNL_MASK_16 __mmask16
 #define vec_load(a) _mm512_load_ps(a)
+#define vec_store(a, addr) _mm512_store_ps(addr, a)
+#define vec_setzero() _mm512_setzero_ps()
+#define vec_setintzero() _mm512_setzero_epi32()
 #define vec_set1(a) _mm512_set1_ps(a)
+#define vec_setint1(a) _mm512_set1_epi32(a)
 #define vec_set(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
   _mm512_set_ps(p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a)
 #define vec_dbl_set(a, b, c, d, e, f, g, h) \
   _mm512_set_pd(h, g, f, e, d, c, b, a)
+#define vec_add(a, b) _mm512_add_ps(a, b)
+#define vec_sub(a, b) _mm512_sub_ps(a, b)
+#define vec_mul(a, b) _mm512_mul_ps(a, b)
+#define vec_fma(a, b, c) _mm512_fmadd_ps(a, b, c)
 #define vec_sqrt(a) _mm512_sqrt_ps(a)
-#define vec_rcp(a) _mm512_rcp_ps(a)
-#define vec_rsqrt(a) _mm512_rsqrt_ps(a)
+#define vec_rcp(a) _mm512_rcp14_ps(a)
+#define vec_rsqrt(a) _mm512_rsqrt14_ps(a)
 #define vec_ftoi(a) _mm512_cvttps_epi32(a)
 #define vec_fmin(a, b) _mm512_min_ps(a, b)
 #define vec_fmax(a, b) _mm512_max_ps(a, b)
 #define vec_fabs(a) _mm512_andnot_ps(_mm512_set1_ps(-0.f), a)
+#define vec_floor(a) _mm512_floor_ps(a)
+#define vec_cmp_gt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_GT_OQ)
+#define vec_cmp_lt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ)
+#define vec_cmp_lte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ)
+#define vec_and(a, b) _mm512_and_ps(a, b)
 #define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0))
 #define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 1))
 #define vec_dbl_tofloat(a, b) _mm512_insertf128(_mm512_castps128_ps512(a), b, 1)
@@ -86,15 +100,28 @@
     .f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \
     .f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a                      \
   }
+#define VEC_HADD(a, b) b += _mm512_reduce_add_ps(a.v)
+#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \
+  pack += __builtin_popcount(mask);
+#define VEC_LEFT_PACK(a, mask, result) \
+  _mm512_mask_compressstoreu_ps(result, mask, a)
 #elif defined(HAVE_AVX)
 #define VEC_SIZE 8
 #define VEC_FLOAT __m256
 #define VEC_DBL __m256d
 #define VEC_INT __m256i
 #define vec_load(a) _mm256_load_ps(a)
+#define vec_store(a, addr) _mm256_store_ps(addr, a)
+#define vec_unaligned_store(a, addr) _mm256_storeu_ps(addr, a)
+#define vec_setzero() _mm256_setzero_ps()
+#define vec_setintzero() _mm256_setzero_si256()
 #define vec_set1(a) _mm256_set1_ps(a)
+#define vec_setint1(a) _mm256_set1_epi32(a)
 #define vec_set(a, b, c, d, e, f, g, h) _mm256_set_ps(h, g, f, e, d, c, b, a)
 #define vec_dbl_set(a, b, c, d) _mm256_set_pd(d, c, b, a)
+#define vec_add(a, b) _mm256_add_ps(a, b)
+#define vec_sub(a, b) _mm256_sub_ps(a, b)
+#define vec_mul(a, b) _mm256_mul_ps(a, b)
 #define vec_sqrt(a) _mm256_sqrt_ps(a)
 #define vec_rcp(a) _mm256_rcp_ps(a)
 #define vec_rsqrt(a) _mm256_rsqrt_ps(a)
@@ -102,6 +129,12 @@
 #define vec_fmin(a, b) _mm256_min_ps(a, b)
 #define vec_fmax(a, b) _mm256_max_ps(a, b)
 #define vec_fabs(a) _mm256_andnot_ps(_mm256_set1_ps(-0.f), a)
+#define vec_floor(a) _mm256_floor_ps(a)
+#define vec_cmp_lt(a, b) _mm256_cmp_ps(a, b, _CMP_LT_OQ)
+#define vec_cmp_gt(a, b) _mm256_cmp_ps(a, b, _CMP_GT_OQ)
+#define vec_cmp_lte(a, b) _mm256_cmp_ps(a, b, _CMP_LE_OQ)
+#define vec_cmp_result(a) _mm256_movemask_ps(a)
+#define vec_and(a, b) _mm256_and_ps(a, b)
 #define vec_todbl_lo(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 0))
 #define vec_todbl_hi(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 1))
 #define vec_dbl_tofloat(a, b) _mm256_insertf128(_mm256_castps128_ps256(a), b, 1)
@@ -118,9 +151,63 @@
     .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \
     .f[6] = a, .f[7] = a                                              \
   }
+#define VEC_HADD(a, b)            \
+  a.v = _mm256_hadd_ps(a.v, a.v); \
+  a.v = _mm256_hadd_ps(a.v, a.v); \
+  b += a.f[0] + a.f[4];
+#define VEC_GET_LOW(a) _mm256_castps256_ps128(a)
+#define VEC_GET_HIGH(a) _mm256_extractf128_ps(a, 1)
 #ifdef HAVE_AVX2
+#define vec_fma(a, b, c) _mm256_fmadd_ps(a, b, c)
+#define identity_indices 0x0706050403020100
 #define VEC_HAVE_GATHER
 #define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1)
+#define VEC_FORM_PACKED_MASK(mask, v_mask, pack)                               \
+  {                                                                            \
+    unsigned long expanded_mask = _pdep_u64(mask, 0x0101010101010101);         \
+    expanded_mask *= 0xFF;                                                     \
+    unsigned long wanted_indices = _pext_u64(identity_indices, expanded_mask); \
+    __m128i bytevec = _mm_cvtsi64_si128(wanted_indices);                       \
+    v_mask = _mm256_cvtepu8_epi32(bytevec);                                    \
+    pack += __builtin_popcount(mask);                                          \
+  }
+#define VEC_LEFT_PACK(a, mask, result) \
+  vec_unaligned_store(_mm256_permutevar8x32_ps(a, mask), result)
+#endif
+#ifndef vec_fma
+#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c)
+#endif
+#ifndef VEC_FORM_PACKED_MASK
+#define VEC_FORM_PACKED_MASK(mask, v_mask, pack)   \
+  {                                                \
+    for (int i = 0; i < VEC_SIZE; i++)             \
+      if ((mask & (1 << i))) v_mask.i[pack++] = i; \
+  }
+#define VEC_FORM_PACKED_MASK_2(mask, v_mask, pack, mask2, v_mask2, pack2) \
+  {                                                                       \
+    for (int i = 0; i < VEC_SIZE; i++) {                                  \
+      if ((mask & (1 << i))) v_mask.i[pack++] = i;                        \
+      if ((mask2 & (1 << i))) v_mask2.i[pack2++] = i;                     \
+    }                                                                     \
+  }
+#endif
+#ifndef VEC_LEFT_PACK
+#define VEC_LEFT_PACK(a, mask, result)                                     \
+  {                                                                        \
+    __m256 t1 = _mm256_castps128_ps256(_mm256_extractf128_ps(a, 1));       \
+    __m256 t2 = _mm256_insertf128_ps(t1, _mm256_castps256_ps128(a), 1);    \
+    __m256 r0 = _mm256_permutevar_ps(a, mask);                             \
+    __m256 r1 = _mm256_permutevar_ps(t2, mask);                            \
+    __m128i k1 = _mm_slli_epi32(                                           \
+        (__m128i)(_mm_xor_si128((__m128i)VEC_GET_HIGH((__m256)mask),       \
+                                (__m128i)_mm_set1_epi32(4))),              \
+        29);                                                               \
+    __m128i k0 = _mm_slli_epi32((__m128i)(VEC_GET_LOW((__m256)mask)), 29); \
+    __m256 kk =                                                            \
+        _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(k0)), \
+                             _mm_castsi128_ps(k1), 1);                     \
+    *((__m256 *)(result)) = _mm256_blendv_ps(r0, r1, kk);                  \
+  }
 #endif
 #elif defined(HAVE_SSE2)
 #define VEC_SIZE 4
@@ -128,9 +215,16 @@
 #define VEC_DBL __m128d
 #define VEC_INT __m128i
 #define vec_load(a) _mm_load_ps(a)
+#define vec_store(a, addr) _mm_store_ps(addr, a)
+#define vec_setzero() _mm_setzero_ps()
+#define vec_setintzero() _mm_setzero_si256()
 #define vec_set1(a) _mm_set1_ps(a)
+#define vec_setint1(a) _mm_set1_epi32(a)
 #define vec_set(a, b, c, d) _mm_set_ps(d, c, b, a)
 #define vec_dbl_set(a, b) _mm_set_pd(b, a)
+#define vec_add(a, b) _mm_add_ps(a, b)
+#define vec_sub(a, b) _mm_sub_ps(a, b)
+#define vec_mul(a, b) _mm_mul_ps(a, b)
 #define vec_sqrt(a) _mm_sqrt_ps(a)
 #define vec_rcp(a) _mm_rcp_ps(a)
 #define vec_rsqrt(a) _mm_rsqrt_ps(a)
@@ -138,6 +232,12 @@
 #define vec_fmin(a, b) _mm_min_ps(a, b)
 #define vec_fmax(a, b) _mm_max_ps(a, b)
 #define vec_fabs(a) _mm_andnot_ps(_mm_set1_ps(-0.f), a)
+#define vec_floor(a) _mm_floor_ps(a)
+#define vec_cmp_gt(a, b) _mm_cmpgt_ps(a, b)
+#define vec_cmp_lt(a, b) _mm_cmplt_ps(a, b)
+#define vec_cmp_lte(a, b) _mm_cmp_ps(a, b, _CMP_LE_OQ)
+#define vec_cmp_result(a) _mm_movemask_ps(a)
+#define vec_and(a, b) _mm_and_ps(a, b)
 #define vec_todbl_lo(a) _mm_cvtps_pd(a)
 #define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a, a))
 #define vec_dbl_tofloat(a, b) _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b))
@@ -151,6 +251,12 @@
 #define vec_dbl_fmax(a, b) _mm_max_pd(a, b)
 #define FILL_VEC(a) \
   { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a }
+#define VEC_HADD(a, b)         \
+  a.v = _mm_hadd_ps(a.v, a.v); \
+  b += a.f[0] + a.f[1];
+#ifndef vec_fma
+#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c)
+#endif
 #else
 #define VEC_SIZE 4
 #endif
@@ -165,6 +271,45 @@ typedef union {
   int i[VEC_SIZE];
 } vector;
 
+/**
+ * @brief Calculates the inverse ($1/x$) of a vector using intrinsics and a
+ * Newton iteration to obtain the correct level of accuracy.
+ *
+ * @param x #vector to be inverted.
+ * @return x_inv #vector inverted x.
+ */
+__attribute__((always_inline)) INLINE vector vec_reciprocal(vector x) {
+
+  vector x_inv;
+
+  x_inv.v = vec_rcp(x.v);
+  x_inv.v = vec_sub(x_inv.v,
+                    vec_mul(x_inv.v, (vec_fma(x.v, x_inv.v, vec_set1(-1.0f)))));
+
+  return x_inv;
+}
+
+/**
+ * @brief Calculates the inverse and square root (\f$1/\sqrt{x}\f$) of a vector
+ * using intrinsics and a Newton iteration to obtain the correct level of
+ * accuracy.
+ *
+ * @param x #vector to be inverted.
+ * @return x_inv #vector inverted x.
+ */
+__attribute__((always_inline)) INLINE vector vec_reciprocal_sqrt(vector x) {
+
+  vector x_inv;
+
+  x_inv.v = vec_rsqrt(x.v);
+  x_inv.v = vec_sub(
+      x_inv.v,
+      vec_mul(vec_mul(vec_set1(0.5f), x_inv.v),
+              (vec_fma(x.v, vec_mul(x_inv.v, x_inv.v), vec_set1(-1.0f)))));
+
+  return x_inv;
+}
+
 #else
 /* Needed for cache alignment. */
 #define VEC_SIZE 16
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 136b7ad231947574a5459298e7fb85902028a3f4..0db5c2544433012dcd7f451f535391aa81b1f802 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -25,15 +25,15 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
         testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh  \
         testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \
         testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \
-        testMatrixInversion testThreadpool
+        testMatrixInversion testThreadpool testDump testLogger
 
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
 		 testSPHStep testPair test27cells test125cells testParser \
                  testKernel testKernelGrav testFFT testInteractions testMaths \
-                 testSymmetry testThreadpool \
+                 testSymmetry testThreadpool benchmarkInteractions \
                  testAdiabaticIndex testRiemannExact testRiemannTRRS \
-                 testRiemannHLLC testMatrixInversion
+                 testRiemannHLLC testMatrixInversion testDump testLogger
 
 # Sources for the individual programs
 testGreetings_SOURCES = testGreetings.c
@@ -66,6 +66,8 @@ testFFT_SOURCES = testFFT.c
 
 testInteractions_SOURCES = testInteractions.c
 
+benchmarkInteractions_SOURCES = benchmarkInteractions.c
+
 testAdiabaticIndex_SOURCES = testAdiabaticIndex.c
 
 testRiemannExact_SOURCES = testRiemannExact.c
@@ -78,6 +80,10 @@ testMatrixInversion_SOURCES = testMatrixInversion.c
 
 testThreadpool_SOURCES = testThreadpool.c
 
+testDump_SOURCES = testDump.c
+
+testLogger_SOURCES = testLogger.c
+
 # Files necessary for distribution
 EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
 	     test27cells.sh test27cellsPerturbed.sh testParser.sh \
diff --git a/tests/benchmarkInteractions.c b/tests/benchmarkInteractions.c
new file mode 100644
index 0000000000000000000000000000000000000000..6d6d345bee743d28fb4bdda911bd4bcc4c78205f
--- /dev/null
+++ b/tests/benchmarkInteractions.c
@@ -0,0 +1,500 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <fenv.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "swift.h"
+
+#define array_align sizeof(float) * VEC_SIZE
+#define ACC_THRESHOLD 1e-5
+
+#ifdef NONSYM_DENSITY
+#define IACT runner_iact_nonsym_density
+#define IACT_VEC runner_iact_nonsym_2_vec_density
+#define IACT_NAME "test_nonsym_density"
+#endif
+
+#ifdef SYM_DENSITY
+#define IACT runner_iact_density
+#define IACT_VEC runner_iact_vec_density
+#define IACT_NAME "test_sym_density"
+#endif
+
+#ifdef NONSYM_FORCE
+#define IACT runner_iact_nonsym_force
+#define IACT_VEC runner_iact_nonsym_vec_force
+#define IACT_NAME "test_nonsym_force"
+#endif
+
+#ifdef SYM_FORCE
+#define IACT runner_iact_force
+#define IACT_VEC runner_iact_vec_force
+#define IACT_NAME "test_sym_force"
+#endif
+
+#ifndef IACT
+#define IACT runner_iact_nonsym_density
+#define IACT_VEC runner_iact_nonsym_2_vec_density
+#define IACT_NAME "test_nonsym_density"
+#endif
+
+/**
+ * @brief Constructs an array of particles in a valid state prior to
+ * a IACT_NONSYM and IACT_NONSYM_VEC call.
+ *
+ * @param count No. of particles to create
+ * @param offset The position of the particle offset from (0,0,0).
+ * @param spacing Particle spacing.
+ * @param h The smoothing length of the particles in units of the inter-particle
+ *separation.
+ * @param partId The running counter of IDs.
+ */
+struct part *make_particles(size_t count, double *offset, double spacing,
+                            double h, long long *partId) {
+
+  struct part *particles;
+  if (posix_memalign((void **)&particles, part_align,
+                     count * sizeof(struct part)) != 0) {
+    error("couldn't allocate particles, no. of particles: %d", (int)count);
+  }
+  bzero(particles, count * sizeof(struct part));
+
+  /* Construct the particles */
+  struct part *p;
+
+  /* Set test particle at centre of unit sphere. */
+  p = &particles[0];
+
+  /* Place the test particle at the centre of a unit sphere. */
+  p->x[0] = 0.0f;
+  p->x[1] = 0.0f;
+  p->x[2] = 0.0f;
+
+  p->h = h;
+  p->id = ++(*partId);
+  p->mass = 1.0f;
+
+  /* Place rest of particles around the test particle
+   * with random position within a unit sphere. */
+  for (size_t i = 1; i < count; ++i) {
+    p = &particles[i];
+
+    /* Randomise positions within a unit sphere. */
+    p->x[0] = random_uniform(-1.0, 1.0);
+    p->x[1] = random_uniform(-1.0, 1.0);
+    p->x[2] = random_uniform(-1.0, 1.0);
+
+    /* Randomise velocities. */
+    p->v[0] = random_uniform(-0.05, 0.05);
+    p->v[1] = random_uniform(-0.05, 0.05);
+    p->v[2] = random_uniform(-0.05, 0.05);
+
+    p->h = h;
+    p->id = ++(*partId);
+    p->mass = 1.0f;
+  }
+  return particles;
+}
+
+/**
+ * @brief Populates particle properties needed for the force calculation.
+ */
+void prepare_force(struct part *parts, size_t count) {
+
+  struct part *p;
+  for (size_t i = 0; i < count; ++i) {
+    p = &parts[i];
+    p->rho = i + 1;
+    p->force.balsara = random_uniform(0.0, 1.0);
+    p->force.P_over_rho2 = i + 1;
+    p->force.soundspeed = random_uniform(2.0, 3.0);
+    p->force.v_sig = 0.0f;
+    p->force.h_dt = 0.0f;
+  }
+}
+
+/**
+ * @brief Dumps all particle information to a file
+ */
+void dump_indv_particle_fields(char *fileName, struct part *p) {
+
+  FILE *file = fopen(fileName, "a");
+
+  fprintf(file,
+          "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e "
+          "%13e %13e %13e %13e "
+          "%13e %13e %13e\n",
+          p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2],
+          p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho,
+          p->density.rho_dh, p->density.wcount, p->density.wcount_dh,
+          p->force.h_dt, p->force.v_sig,
+#if defined(MINIMAL_SPH)
+          0., 0., 0., 0.
+#else
+          p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
+          p->density.rot_v[2]
+#endif
+          );
+  fclose(file);
+}
+
+/**
+ * @brief Creates a header for the output file
+ */
+void write_header(char *fileName) {
+
+  FILE *file = fopen(fileName, "w");
+  /* Write header */
+  fprintf(file,
+          "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s "
+          "%13s %13s %13s %13s"
+          "%13s %13s %13s %13s\n",
+          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y",
+          "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig",
+          "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
+  fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n");
+  fclose(file);
+}
+
+/**
+ * @brief Compares the vectorised result against
+ * the serial result of the interaction.
+ *
+ * @param serial_test_part Particle that has been updated serially
+ * @param serial_parts Particle array that has been interacted serially
+ * @param vec_test_part Particle that has been updated using vectors
+ * @param vec_parts Particle array to be interacted using vectors
+ * @param count No. of particles that have been interacted
+ *
+ * @return Non-zero value if difference found, 0 otherwise
+ */
+int check_results(struct part serial_test_part, struct part *serial_parts,
+                  struct part vec_test_part, struct part *vec_parts,
+                  int count) {
+  int result = 0;
+  result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD);
+
+  for (int i = 0; i < count; i++)
+    result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD);
+
+  return result;
+}
+
+/*
+ * @brief Calls the serial and vectorised version of the non-symmetrical density
+ * interaction.
+ *
+ * @param test_part Particle that will be updated
+ * @param parts Particle array to be interacted
+ * @param count No. of particles to be interacted
+ * @param serial_inter_func Serial interaction function to be called
+ * @param vec_inter_func Vectorised interaction function to be called
+ * @param runs No. of times to call interactions
+ *
+ */
+void test_interactions(struct part test_part, struct part *parts, size_t count,
+                       char *filePrefix, int runs) {
+
+  ticks serial_time = 0;
+#ifdef WITH_VECTORIZATION
+  ticks vec_time = 0;
+#endif
+
+  FILE *file;
+  char serial_filename[200] = "";
+  char vec_filename[200] = "";
+
+  strcpy(serial_filename, filePrefix);
+  strcpy(vec_filename, filePrefix);
+  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
+  sprintf(vec_filename + strlen(vec_filename), "_vec.dat");
+
+  write_header(serial_filename);
+  write_header(vec_filename);
+
+  struct part pi_serial, pi_vec;
+  struct part pj_serial[count], pj_vec[count];
+
+  float r2[count] __attribute__((aligned(array_align)));
+  float dx[3 * count] __attribute__((aligned(array_align)));
+
+#ifdef WITH_VECTORIZATION
+  struct part *piq[count], *pjq[count];
+  for (size_t k = 0; k < count; k++) {
+    piq[k] = NULL;
+    pjq[k] = NULL;
+  }
+
+  float r2q[count] __attribute__((aligned(array_align)));
+  float hiq[count] __attribute__((aligned(array_align)));
+  float dxq[count] __attribute__((aligned(array_align)));
+
+  float dyq[count] __attribute__((aligned(array_align)));
+  float dzq[count] __attribute__((aligned(array_align)));
+  float mjq[count] __attribute__((aligned(array_align)));
+  float vixq[count] __attribute__((aligned(array_align)));
+  float viyq[count] __attribute__((aligned(array_align)));
+  float vizq[count] __attribute__((aligned(array_align)));
+  float vjxq[count] __attribute__((aligned(array_align)));
+  float vjyq[count] __attribute__((aligned(array_align)));
+  float vjzq[count] __attribute__((aligned(array_align)));
+#endif
+
+  /* Call serial interaction a set number of times. */
+  for (int k = 0; k < runs; k++) {
+    /* Reset particle to initial setup */
+    pi_serial = test_part;
+    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
+
+    /* Only dump data on first run. */
+    if (k == 0) {
+      /* Dump state of particles before serial interaction. */
+      dump_indv_particle_fields(serial_filename, &pi_serial);
+      for (size_t i = 0; i < count; i++)
+        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
+    }
+
+    /* Perform serial interaction */
+    for (size_t i = 0; i < count; i++) {
+      /* Compute the pairwise distance. */
+      r2[i] = 0.0f;
+      for (int k = 0; k < 3; k++) {
+        int ind = (3 * i) + k;
+        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
+        r2[i] += dx[ind] * dx[ind];
+      }
+    }
+
+    const ticks tic = getticks();
+/* Perform serial interaction */
+#ifdef __ICC
+#pragma novector
+#endif
+    for (size_t i = 0; i < count; i++) {
+      IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
+           &pj_serial[i]);
+    }
+    serial_time += getticks() - tic;
+  }
+
+  file = fopen(serial_filename, "a");
+  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
+  fclose(file);
+
+  /* Dump result of serial interaction. */
+  dump_indv_particle_fields(serial_filename, &pi_serial);
+  for (size_t i = 0; i < count; i++)
+    dump_indv_particle_fields(serial_filename, &pj_serial[i]);
+
+  /* Call vector interaction a set number of times. */
+  for (int k = 0; k < runs; k++) {
+    /* Reset particle to initial setup */
+    pi_vec = test_part;
+    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
+
+    /* Setup arrays for vector interaction. */
+    for (size_t i = 0; i < count; i++) {
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (int k = 0; k < 3; k++) {
+        dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+#ifdef WITH_VECTORIZATION
+      r2q[i] = r2;
+      dxq[i] = dx[0];
+      hiq[i] = pi_vec.h;
+      piq[i] = &pi_vec;
+      pjq[i] = &pj_vec[i];
+
+      dyq[i] = dx[1];
+      dzq[i] = dx[2];
+      mjq[i] = pj_vec[i].mass;
+      vixq[i] = pi_vec.v[0];
+      viyq[i] = pi_vec.v[1];
+      vizq[i] = pi_vec.v[2];
+      vjxq[i] = pj_vec[i].v[0];
+      vjyq[i] = pj_vec[i].v[1];
+      vjzq[i] = pj_vec[i].v[2];
+#endif
+    }
+
+    /* Only dump data on first run. */
+    if (k == 0) {
+#ifdef WITH_VECTORIZATION
+      /* Dump state of particles before vector interaction. */
+      dump_indv_particle_fields(vec_filename, piq[0]);
+      for (size_t i = 0; i < count; i++)
+        dump_indv_particle_fields(vec_filename, pjq[i]);
+#endif
+    }
+
+/* Perform vector interaction. */
+#ifdef WITH_VECTORIZATION
+    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2;
+    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
+        curlvySum, curlvzSum;
+
+    rhoSum.v = vec_set1(0.f);
+    rho_dhSum.v = vec_set1(0.f);
+    wcountSum.v = vec_set1(0.f);
+    wcount_dhSum.v = vec_set1(0.f);
+    div_vSum.v = vec_set1(0.f);
+    curlvxSum.v = vec_set1(0.f);
+    curlvySum.v = vec_set1(0.f);
+    curlvzSum.v = vec_set1(0.f);
+
+    hi_vec.v = vec_load(&hiq[0]);
+    vix_vec.v = vec_load(&vixq[0]);
+    viy_vec.v = vec_load(&viyq[0]);
+    viz_vec.v = vec_load(&vizq[0]);
+
+    hi_inv_vec = vec_reciprocal(hi_vec);
+    mask.m = vec_setint1(0xFFFFFFFF);
+    mask2.m = vec_setint1(0xFFFFFFFF);
+
+#ifdef HAVE_AVX512_F
+    KNL_MASK_16 knl_mask, knl_mask2;
+    knl_mask = 0xFFFF;
+    knl_mask2 = 0xFFFF;
+#endif
+
+    const ticks vec_tic = getticks();
+
+    for (size_t i = 0; i < count; i += 2 * VEC_SIZE) {
+
+      IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec),
+               (vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]),
+               &(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum,
+               &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum,
+               mask, mask2,
+#ifdef HAVE_AVX512_F
+               knl_mask, knl_mask2);
+#else
+               0, 0);
+#endif
+    }
+
+    VEC_HADD(rhoSum, piq[0]->rho);
+    VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
+    VEC_HADD(wcountSum, piq[0]->density.wcount);
+    VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
+    VEC_HADD(div_vSum, piq[0]->density.div_v);
+    VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
+    VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
+    VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);
+
+    vec_time += getticks() - vec_tic;
+#endif
+  }
+
+  file = fopen(vec_filename, "a");
+  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
+  fclose(file);
+
+#ifdef WITH_VECTORIZATION
+  /* Dump result of serial interaction. */
+  dump_indv_particle_fields(vec_filename, piq[0]);
+  for (size_t i = 0; i < count; i++)
+    dump_indv_particle_fields(vec_filename, pjq[i]);
+#endif
+
+#ifdef WITH_VECTORIZATION
+  /* Check serial results against the vectorised results. */
+  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
+    message("Differences found...");
+#endif
+
+  message("The serial interactions took     : %15lli ticks.",
+          serial_time / runs);
+#ifdef WITH_VECTORIZATION
+  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
+  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
+#endif
+}
+
+/* And go... */
+int main(int argc, char *argv[]) {
+  size_t runs = 10000;
+  double h = 1.0, spacing = 0.5;
+  double offset[3] = {0.0, 0.0, 0.0};
+  size_t count = 256;
+
+  /* Get some randomness going */
+  srand(0);
+
+  char c;
+  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
+    switch (c) {
+      case 'h':
+        sscanf(optarg, "%lf", &h);
+        break;
+      case 's':
+        sscanf(optarg, "%lf", &spacing);
+      case 'n':
+        sscanf(optarg, "%zu", &count);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+  }
+
+  if (h < 0 || spacing < 0) {
+    printf(
+        "\nUsage: %s [OPTIONS...]\n"
+        "\nGenerates a particle array with equal particle separation."
+        "\nThese are then interacted using runner_iact_density and "
+        "runner_iact_vec_density."
+        "\n\nOptions:"
+        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
+        "\n-s SPACING=0.5     - Spacing between particles"
+        "\n-n NUMBER=9        - No. of particles",
+        argv[0]);
+    exit(1);
+  }
+
+  /* Correct count so that VEC_SIZE of particles interact with the test
+   * particle. */
+  count = count - (count % VEC_SIZE) + 1;
+
+  /* Build the infrastructure */
+  static long long partId = 0;
+  struct part test_particle;
+  struct part *particles = make_particles(count, offset, spacing, h, &partId);
+
+#if defined(NONSYM_FORCE) || defined(SYM_FORCE)
+  prepare_force(particles, count);
+#endif
+
+  test_particle = particles[0];
+  /* Call the non-sym density test. */
+  message("Testing %s interaction...", IACT_NAME);
+  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs);
+
+  return 0;
+}
diff --git a/tests/test125cells.c b/tests/test125cells.c
index 3ae80d952f78e8f50235cf38af493501c1c97634..91b1cf6dc3b321643aae1f4eec6bd3d7abb48350 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -272,8 +272,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
         hydro_first_init_part(part, xpart);
 
         part->id = ++(*partId);
-        part->ti_begin = 0;
-        part->ti_end = 1;
+        part->time_bin = 1;
 
 #if defined(GIZMO_SPH)
         part->geometry.volume = part->conserved.mass / density;
@@ -292,6 +291,11 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
                 part->conserved.mass;
 #endif
 
+#ifdef SWIFT_DEBUG_CHECKS
+        part->ti_drift = 8;
+        part->ti_kick = 8;
+#endif
+
         ++part;
         ++xpart;
       }
@@ -311,9 +315,9 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
   cell->loc[1] = offset[1];
   cell->loc[2] = offset[2];
 
-  cell->ti_old = 1;
-  cell->ti_end_min = 1;
-  cell->ti_end_max = 1;
+  cell->ti_old = 8;
+  cell->ti_end_min = 8;
+  cell->ti_end_max = 8;
 
   // shuffle_particles(cell->parts, cell->count);
 
@@ -364,10 +368,10 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
 #else
             main_cell->parts[pid].density.div_v,
 #endif
-            hydro_get_entropy(&main_cell->parts[pid], 0.f),
-            hydro_get_internal_energy(&main_cell->parts[pid], 0.f),
-            hydro_get_pressure(&main_cell->parts[pid], 0.f),
-            hydro_get_soundspeed(&main_cell->parts[pid], 0.f),
+            hydro_get_entropy(&main_cell->parts[pid]),
+            hydro_get_internal_energy(&main_cell->parts[pid]),
+            hydro_get_pressure(&main_cell->parts[pid]),
+            hydro_get_soundspeed(&main_cell->parts[pid]),
             main_cell->parts[pid].a_hydro[0], main_cell->parts[pid].a_hydro[1],
             main_cell->parts[pid].a_hydro[2], main_cell->parts[pid].force.h_dt,
 #if defined(GADGET2_SPH)
@@ -527,7 +531,7 @@ int main(int argc, char *argv[]) {
   engine.physical_constants = &prog_const;
   engine.s = &space;
   engine.time = 0.1f;
-  engine.ti_current = 1;
+  engine.ti_current = 8;
 
   struct runner runner;
   runner.e = &engine;
@@ -572,6 +576,12 @@ int main(int argc, char *argv[]) {
 
     const ticks tic = getticks();
 
+    /* Start with a gentle kick */
+    // runner_do_kick1(&runner, main_cell, 0);
+
+    /* And a gentle drift */
+    // runner_do_drift(&runner, main_cell, 0);
+
     /* First, sort stuff */
     for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0);
 
@@ -640,7 +650,8 @@ int main(int argc, char *argv[]) {
 #endif
 
     /* Finally, give a gentle kick */
-    runner_do_kick(&runner, main_cell, 0);
+    runner_do_end_force(&runner, main_cell, 0);
+    // runner_do_kick2(&runner, main_cell, 0);
 
     const ticks toc = getticks();
     time += toc - tic;
@@ -663,6 +674,12 @@ int main(int argc, char *argv[]) {
 
   const ticks tic = getticks();
 
+  /* Kick the central cell */
+  // runner_do_kick1(&runner, main_cell, 0);
+
+  /* And drift it */
+  runner_do_drift(&runner, main_cell, 0);
+
   /* Initialise the particles */
   for (int j = 0; j < 125; ++j) runner_do_init(&runner, cells[j], 0);
 
@@ -728,7 +745,8 @@ int main(int argc, char *argv[]) {
 #endif
 
   /* Finally, give a gentle kick */
-  runner_do_kick(&runner, main_cell, 0);
+  runner_do_end_force(&runner, main_cell, 0);
+  // runner_do_kick2(&runner, main_cell, 0);
 
   const ticks toc = getticks();
 
diff --git a/tests/test27cells.c b/tests/test27cells.c
index f58b4dc410637f3d91369dab1b442de0b7044c08..929a148d1f5730b63de79e9a1ab7e25f1ca7311e 100644
--- a/tests/test27cells.c
+++ b/tests/test27cells.c
@@ -30,6 +30,18 @@
 /* Local headers. */
 #include "swift.h"
 
+#define ACC_THRESHOLD 1e-5
+
+#if defined(WITH_VECTORIZATION)
+#define DOSELF1 runner_doself1_density_vec
+#define DOSELF1_NAME "runner_doself1_density_vec"
+#endif
+
+#ifndef DOSELF1
+#define DOSELF1 runner_doself1_density
+#define DOSELF1_NAME "runner_doself1_density"
+#endif
+
 enum velocity_types {
   velocity_zero,
   velocity_random,
@@ -116,8 +128,13 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
         part->entropy_one_over_gamma = 1.f;
 #endif
 
-        part->ti_begin = 0;
-        part->ti_end = 1;
+        part->time_bin = 1;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        part->ti_drift = 8;
+        part->ti_kick = 8;
+#endif
+
         ++part;
       }
     }
@@ -135,8 +152,9 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[1] = offset[1];
   cell->loc[2] = offset[2];
 
-  cell->ti_end_min = 1;
-  cell->ti_end_max = 1;
+  cell->ti_old = 8;
+  cell->ti_end_min = 8;
+  cell->ti_end_max = 8;
 
   shuffle_particles(cell->parts, cell->count);
 
@@ -254,15 +272,40 @@ void dump_particle_fields(char *fileName, struct cell *main_cell,
   fclose(file);
 }
 
+/**
+ * @brief Compares the vectorised result against
+ * the serial result of the interaction.
+ *
+ * @param serial_parts Particle array that has been interacted serially
+ * @param vec_parts Particle array to be interacted using vectors
+ * @param count No. of particles that have been interacted
+ * @param threshold Level of accuracy needed
+ *
+ * @return Non-zero value if difference found, 0 otherwise
+ */
+int check_results(struct part *serial_parts, struct part *vec_parts, int count,
+                  double threshold) {
+  int result = 0;
+
+  for (int i = 0; i < count; i++)
+    result += compare_particles(serial_parts[i], vec_parts[i], threshold);
+
+  return result;
+}
+
 /* Just a forward declaration... */
 void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
 void runner_doself1_density(struct runner *r, struct cell *ci);
+void runner_doself1_density_vec(struct runner *r, struct cell *ci);
 
 /* And go... */
 int main(int argc, char *argv[]) {
+
+  engine_pin();
   size_t runs = 0, particles = 0;
   double h = 1.23485, size = 1., rho = 1.;
   double perturbation = 0.;
+  double threshold = ACC_THRESHOLD;
   char outputFileNameExtension[200] = "";
   char outputFileName[200] = "";
   enum velocity_types vel = velocity_zero;
@@ -278,7 +321,7 @@ int main(int argc, char *argv[]) {
   srand(0);
 
   char c;
-  while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:")) != -1) {
+  while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:a:")) != -1) {
     switch (c) {
       case 'h':
         sscanf(optarg, "%lf", &h);
@@ -304,6 +347,9 @@ int main(int argc, char *argv[]) {
       case 'v':
         sscanf(optarg, "%d", (int *)&vel);
         break;
+      case 'a':
+        sscanf(optarg, "%lf", &threshold);
+        break;
       case '?':
         error("Unknown option.");
         break;
@@ -329,6 +375,8 @@ int main(int argc, char *argv[]) {
   }
 
   /* Help users... */
+  message("Function called: %s", DOSELF1_NAME);
+  message("Vector size: %d", VEC_SIZE);
   message("Adiabatic index: ga = %f", hydro_gamma);
   message("Hydro implementation: %s", SPH_IMPLEMENTATION);
   message("Smoothing length: h = %f", h * size);
@@ -347,7 +395,7 @@ int main(int argc, char *argv[]) {
   struct engine engine;
   engine.s = &space;
   engine.time = 0.1f;
-  engine.ti_current = 1;
+  engine.ti_current = 8;
 
   struct runner runner;
   runner.e = &engine;
@@ -371,6 +419,9 @@ int main(int argc, char *argv[]) {
   /* Store the main cell for future use */
   main_cell = cells[13];
 
+  ticks timings[27];
+  for (int i = 0; i < 27; i++) timings[i] = 0;
+
   ticks time = 0;
   for (size_t i = 0; i < runs; ++i) {
     /* Zero the fields */
@@ -381,12 +432,30 @@ int main(int argc, char *argv[]) {
 #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION))
 
     /* Run all the pairs */
-    for (int j = 0; j < 27; ++j)
-      if (cells[j] != main_cell)
+    for (int j = 0; j < 27; ++j) {
+      if (cells[j] != main_cell) {
+        const ticks sub_tic = getticks();
+
         runner_dopair1_density(&runner, main_cell, cells[j]);
 
-    /* And now the self-interaction */
-    runner_doself1_density(&runner, main_cell);
+        const ticks sub_toc = getticks();
+        timings[j] += sub_toc - sub_tic;
+      }
+    }
+
+/* And now the self-interaction */
+#ifdef WITH_VECTORIZATION
+    runner.par_cache.count = 0;
+    cache_init(&runner.par_cache, 512);
+#endif
+
+    const ticks self_tic = getticks();
+
+    DOSELF1(&runner, main_cell);
+
+    const ticks self_toc = getticks();
+
+    timings[13] += self_toc - self_tic;
 
 #endif
 
@@ -404,8 +473,26 @@ int main(int argc, char *argv[]) {
     }
   }
 
+  /* Store the vectorised particle results. */
+  struct part vec_parts[main_cell->count];
+  for (int i = 0; i < main_cell->count; i++) vec_parts[i] = main_cell->parts[i];
+
   /* Output timing */
-  message("SWIFT calculation took       : %15lli ticks.", time / runs);
+  ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] +
+                      timings[18] + timings[20] + timings[24] + timings[26];
+
+  ticks edge_time = timings[1] + timings[3] + timings[5] + timings[7] +
+                    timings[9] + timings[11] + timings[15] + timings[17] +
+                    timings[19] + timings[21] + timings[23] + timings[25];
+
+  ticks face_time = timings[4] + timings[10] + timings[12] + timings[14] +
+                    timings[16] + timings[22];
+
+  message("Corner calculations took       : %15lli ticks.", corner_time / runs);
+  message("Edge calculations took         : %15lli ticks.", edge_time / runs);
+  message("Face calculations took         : %15lli ticks.", face_time / runs);
+  message("Self calculations took         : %15lli ticks.", timings[13] / runs);
+  message("SWIFT calculation took         : %15lli ticks.", time / runs);
 
   /* Now perform a brute-force version for accuracy tests */
 
@@ -434,6 +521,10 @@ int main(int argc, char *argv[]) {
   sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension);
   dump_particle_fields(outputFileName, main_cell, cells);
 
+  /* Check serial results against the vectorised results. */
+  if (check_results(main_cell->parts, vec_parts, main_cell->count, threshold))
+    message("Differences found...");
+
   /* Output timing */
   message("Brute force calculation took : %15lli ticks.", toc - tic);
 
diff --git a/tests/test27cells.sh.in b/tests/test27cells.sh.in
index bf9cfeaf9a70790a321fa7ec4c63983d8cfd866c..07b6b92a82cee2bbe9c593f8f62e750d4406f84e 100755
--- a/tests/test27cells.sh.in
+++ b/tests/test27cells.sh.in
@@ -6,7 +6,7 @@ do
 
     rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
 
-    ./test27cells -n 6 -r 1 -d 0 -f standard -v $v
+    ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4
 
     if [ -e brute_force_27_standard.dat ]
     then
diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in
index 3cdaf79ab17e705ec69a0b646949cc5a71109796..30498594b659101216b51dfea2346fa9230dbc97 100755
--- a/tests/test27cellsPerturbed.sh.in
+++ b/tests/test27cellsPerturbed.sh.in
@@ -6,7 +6,7 @@ do
 
     rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat
 
-    ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v
+    ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4
 
     if [ -e brute_force_27_perturbed.dat ]
     then
diff --git a/tests/testDump.c b/tests/testDump.c
new file mode 100644
index 0000000000000000000000000000000000000000..ab74a1b1f022761efedf5258a20c525fcef47bd6
--- /dev/null
+++ b/tests/testDump.c
@@ -0,0 +1,84 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* This object's header. */
+#include "../src/dump.h"
+
+/* Local headers. */
+#include "../src/threadpool.h"
+
+void dump_mapper(void *map_data, int num_elements, void *extra_data) {
+  struct dump *d = (struct dump *)extra_data;
+  size_t offset;
+  char *out_string = dump_get(d, 7, &offset);
+  char out_buff[8];
+  snprintf(out_buff, 8, "%06zi\n", offset / 7);
+  memcpy(out_string, out_buff, 7);
+}
+
+int main(int argc, char *argv[]) {
+
+  /* Some constants. */
+  const int num_threads = 4;
+  const char *filename = "/tmp/dump_test.out";
+  const int num_runs = 20;
+  const int chunk_size = 1000;
+
+  /* Prepare a threadpool to write to the dump. */
+  struct threadpool t;
+  threadpool_init(&t, num_threads);
+
+  /* Prepare a dump. */
+  struct dump d;
+  dump_init(&d, filename, 1024);
+
+  /* Dump numbers in chunks. */
+  for (int run = 0; run < num_runs; run++) {
+
+    /* Ensure capacity. */
+    dump_ensure(&d, 7 * chunk_size);
+
+    /* Dump a few numbers. */
+    printf("dumping %i chunks...\n", chunk_size);
+    fflush(stdout);
+    threadpool_map(&t, dump_mapper, NULL, chunk_size, 0, 1, &d);
+  }
+
+  /* Sync the file, not necessary before dump_close, but just to test this. */
+  dump_sync(&d);
+
+  /* Finalize the dump. */
+  dump_close(&d);
+
+  /* Return a happy number. */
+  return 0;
+}
diff --git a/tests/testInteractions.c b/tests/testInteractions.c
index d14c840ec77819bbef5750b897c72139f4d7b2b4..4ce7fe40554d24551750629fa47c0bee7acdb6da 100644
--- a/tests/testInteractions.c
+++ b/tests/testInteractions.c
@@ -30,6 +30,9 @@ int main() { return 0; }
 #include <unistd.h>
 #include "swift.h"
 
+#define array_align sizeof(float) * VEC_SIZE
+#define ACC_THRESHOLD 1e-5
+
 /* Typdef function pointers for serial and vectorised versions of the
  * interaction functions. */
 typedef void (*serial_interaction)(float, float *, float, float, struct part *,
@@ -48,8 +51,8 @@ typedef void (*vec_interaction)(float *, float *, float *, float *,
  *separation.
  * @param partId The running counter of IDs.
  */
-struct part *make_particles(int count, double *offset, double spacing, double h,
-                            long long *partId) {
+struct part *make_particles(size_t count, double *offset, double spacing,
+                            double h, long long *partId) {
 
   struct part *particles;
   if (posix_memalign((void **)&particles, part_align,
@@ -60,11 +63,28 @@ struct part *make_particles(int count, double *offset, double spacing, double h,
 
   /* Construct the particles */
   struct part *p;
-  for (size_t i = 0; i < VEC_SIZE + 1; ++i) {
+
+  /* Set test particle at centre of unit sphere. */
+  p = &particles[0];
+
+  /* Place the test particle at the centre of a unit sphere. */
+  p->x[0] = 0.0f;
+  p->x[1] = 0.0f;
+  p->x[2] = 0.0f;
+
+  p->h = h;
+  p->id = ++(*partId);
+  p->mass = 1.0f;
+
+  /* Place rest of particles around the test particle
+   * with random position within a unit sphere. */
+  for (size_t i = 1; i < count; ++i) {
     p = &particles[i];
-    p->x[0] = offset[0] + spacing * i;
-    p->x[1] = offset[1] + spacing * i;
-    p->x[2] = offset[2] + spacing * i;
+
+    /* Randomise positions within a unit sphere. */
+    p->x[0] = random_uniform(-1.0, 1.0);
+    p->x[1] = random_uniform(-1.0, 1.0);
+    p->x[2] = random_uniform(-1.0, 1.0);
 
     /* Randomise velocities. */
     p->v[0] = random_uniform(-0.05, 0.05);
@@ -81,20 +101,17 @@ struct part *make_particles(int count, double *offset, double spacing, double h,
 /**
  * @brief Populates particle properties needed for the force calculation.
  */
-void prepare_force(struct part *parts) {
+void prepare_force(struct part *parts, size_t count) {
 
   struct part *p;
-  for (size_t i = 0; i < VEC_SIZE + 1; ++i) {
+  for (size_t i = 0; i < count; ++i) {
     p = &parts[i];
     p->rho = i + 1;
-#if defined(GADGET2_SPH)
-    p->force.balsara = i + 1;
-    p->force.P_over_rho2 = i + 1;
-#elif defined(DEFAULT_SPH)
-    p->force.balsara = i + 1;
+    p->force.balsara = random_uniform(0.0, 1.0);
     p->force.P_over_rho2 = i + 1;
-#else
-#endif
+    p->force.soundspeed = random_uniform(2.0, 3.0);
+    p->force.v_sig = 0.0f;
+    p->force.h_dt = 0.0f;
   }
 }
 
@@ -106,7 +123,7 @@ void dump_indv_particle_fields(char *fileName, struct part *p) {
   FILE *file = fopen(fileName, "a");
 
   fprintf(file,
-          "%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e "
+          "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e "
           "%13e %13e %13e %13e "
           "%13e %13e %13e %10f\n",
           p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2],
@@ -120,7 +137,8 @@ void dump_indv_particle_fields(char *fileName, struct part *p) {
           p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
           p->density.rot_v[2], 0.
 #else
-          0., 0., 0., 0., 0.
+          p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
+          p->density.rot_v[2]
 #endif
           );
   fclose(file);
@@ -140,24 +158,52 @@ void write_header(char *fileName) {
           "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y",
           "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig",
           "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
-  fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n");
+  fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n");
   fclose(file);
 }
 
 /**
- * @brief Calls the serial and vectorised version of the non-symmetrical density
- * interaction.
+ * @brief Compares the vectorised result against
+ * the serial result of the interaction.
+ *
+ * @param serial_test_part Particle that has been updated serially
+ * @param serial_parts Particle array that has been interacted serially
+ * @param vec_test_part Particle that has been updated using vectors
+ * @param vec_parts Particle array to be interacted using vectors
+ * @param count No. of particles that have been interacted
+ *
+ * @return Non-zero value if difference found, 0 otherwise
+ */
+int check_results(struct part serial_test_part, struct part *serial_parts,
+                  struct part vec_test_part, struct part *vec_parts,
+                  int count) {
+  int result = 0;
+  result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD);
+
+  for (int i = 0; i < count; i++)
+    result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD);
+
+  return result;
+}
+
+/*
+ * @brief Calls the serial and vectorised version of an interaction
+ * function given by the function pointers.
  *
+ * @param test_part Particle that will be updated
  * @param parts Particle array to be interacted
  * @param count No. of particles to be interacted
+ * @param serial_inter_func Serial interaction function to be called
+ * @param vec_inter_func Vectorised interaction function to be called
+ * @param runs No. of times to call interactions
  *
  */
-void test_interactions(struct part *parts, int count,
+void test_interactions(struct part test_part, struct part *parts, size_t count,
                        serial_interaction serial_inter_func,
-                       vec_interaction vec_inter_func, char *filePrefix) {
+                       vec_interaction vec_inter_func, char *filePrefix,
+                       size_t runs) {
 
-  /* Use the first particle in the array as the one that gets updated. */
-  struct part pi = parts[0];
+  ticks serial_time = 0, vec_time = 0;
 
   FILE *file;
   char serial_filename[200] = "";
@@ -171,98 +217,148 @@ void test_interactions(struct part *parts, int count,
   write_header(serial_filename);
   write_header(vec_filename);
 
-  /* Dump state of particles before serial interaction. */
-  dump_indv_particle_fields(serial_filename, &pi);
-  for (int i = 1; i < count; i++)
-    dump_indv_particle_fields(serial_filename, &parts[i]);
-
-  /* Make copy of pi to be used in vectorised version. */
-  struct part pi_vec = pi;
-  struct part pj_vec[VEC_SIZE];
-  for (int i = 0; i < VEC_SIZE; i++) pj_vec[i] = parts[i + 1];
-
-  float r2q[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE)));
-  float hiq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE)));
-  float hjq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE)));
-  float dxq[3 * VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE)));
-  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
-
-  /* Perform serial interaction */
-  for (int i = 1; i < count; i++) {
-    /* Compute the pairwise distance. */
-    float r2 = 0.0f;
-    float dx[3];
-    for (int k = 0; k < 3; k++) {
-      dx[k] = pi.x[k] - parts[i].x[k];
-      r2 += dx[k] * dx[k];
+  /* Test particle at the center of a unit sphere. */
+  struct part pi_serial, pi_vec;
+
+  /* Remaining particles in the sphere that will interact with test particle. */
+  struct part pj_serial[count], pj_vec[count];
+
+  /* Stores the separation, smoothing length and pointers to particles
+   * needed for the vectorised interaction. */
+  float r2q[count] __attribute__((aligned(array_align)));
+  float hiq[count] __attribute__((aligned(array_align)));
+  float hjq[count] __attribute__((aligned(array_align)));
+  float dxq[3 * count] __attribute__((aligned(array_align)));
+  struct part *piq[count], *pjq[count];
+
+  /* Call serial interaction a set number of times. */
+  for (size_t k = 0; k < runs; k++) {
+    /* Reset particle to initial setup */
+    pi_serial = test_part;
+    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
+
+    /* Only dump data on first run. */
+    if (k == 0) {
+      /* Dump state of particles before serial interaction. */
+      dump_indv_particle_fields(serial_filename, &pi_serial);
+      for (size_t i = 0; i < count; i++)
+        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
     }
 
-    serial_inter_func(r2, dx, pi.h, parts[i].h, &pi, &parts[i]);
+    /* Perform serial interaction */
+    for (size_t i = 0; i < count; i++) {
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (size_t k = 0; k < 3; k++) {
+        dx[k] = pi_serial.x[k] - pj_serial[i].x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      const ticks tic = getticks();
+
+      serial_inter_func(r2, dx, pi_serial.h, pj_serial[i].h, &pi_serial,
+                        &pj_serial[i]);
+
+      serial_time += getticks() - tic;
+    }
   }
 
   file = fopen(serial_filename, "a");
-  fprintf(file, "\nPARTICLES AFTER INTERACTION:\n");
+  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
   fclose(file);
 
   /* Dump result of serial interaction. */
-  dump_indv_particle_fields(serial_filename, &pi);
-  for (int i = 1; i < count; i++)
-    dump_indv_particle_fields(serial_filename, &parts[i]);
-
-  /* Setup arrays for vector interaction. */
-  for (int i = 0; i < VEC_SIZE; i++) {
-    /* Compute the pairwise distance. */
-    float r2 = 0.0f;
-    float dx[3];
-    for (int k = 0; k < 3; k++) {
-      dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
-      r2 += dx[k] * dx[k];
+  dump_indv_particle_fields(serial_filename, &pi_serial);
+  for (size_t i = 0; i < count; i++)
+    dump_indv_particle_fields(serial_filename, &pj_serial[i]);
+
+  /* Call vector interaction a set number of times. */
+  for (size_t k = 0; k < runs; k++) {
+    /* Reset particle to initial setup */
+    pi_vec = test_part;
+    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
+
+    /* Setup arrays for vector interaction. */
+    for (size_t i = 0; i < count; i++) {
+      /* Compute the pairwise distance. */
+      float r2 = 0.0f;
+      float dx[3];
+      for (size_t k = 0; k < 3; k++) {
+        dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
+        r2 += dx[k] * dx[k];
+      }
+
+      r2q[i] = r2;
+      dxq[3 * i + 0] = dx[0];
+      dxq[3 * i + 1] = dx[1];
+      dxq[3 * i + 2] = dx[2];
+      hiq[i] = pi_vec.h;
+      hjq[i] = pj_vec[i].h;
+      piq[i] = &pi_vec;
+      pjq[i] = &pj_vec[i];
     }
-    r2q[i] = r2;
-    dxq[3 * i + 0] = dx[0];
-    dxq[3 * i + 1] = dx[1];
-    dxq[3 * i + 2] = dx[2];
-    hiq[i] = pi_vec.h;
-    hjq[i] = pj_vec[i].h;
-    piq[i] = &pi_vec;
-    pjq[i] = &pj_vec[i];
-  }
 
-  /* Dump state of particles before vector interaction. */
-  dump_indv_particle_fields(vec_filename, piq[0]);
-  for (size_t i = 0; i < VEC_SIZE; i++)
-    dump_indv_particle_fields(vec_filename, pjq[i]);
+    /* Only dump data on first run. */
+    if (k == 0) {
+      /* Dump state of particles before vector interaction. */
+      dump_indv_particle_fields(vec_filename, piq[0]);
+      for (size_t i = 0; i < count; i++)
+        dump_indv_particle_fields(vec_filename, pjq[i]);
+    }
 
-  /* Perform vector interaction. */
-  vec_inter_func(r2q, dxq, hiq, hjq, piq, pjq);
+    const ticks vec_tic = getticks();
+
+    /* Perform vector interaction. */
+    for (size_t i = 0; i < count; i += VEC_SIZE) {
+      vec_inter_func(&(r2q[i]), &(dxq[3 * i]), &(hiq[i]), &(hjq[i]), &(piq[i]),
+                     &(pjq[i]));
+    }
+
+    vec_time += getticks() - vec_tic;
+  }
 
   file = fopen(vec_filename, "a");
-  fprintf(file, "\nPARTICLES AFTER INTERACTION:\n");
+  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
   fclose(file);
 
-  /* Dump result of serial interaction. */
+  /* Dump result of vector interaction. */
   dump_indv_particle_fields(vec_filename, piq[0]);
-  for (size_t i = 0; i < VEC_SIZE; i++)
+  for (size_t i = 0; i < count; i++)
     dump_indv_particle_fields(vec_filename, pjq[i]);
+
+  /* Check serial results against the vectorised results. */
+  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
+    message("Differences found...");
+
+  message("The serial interactions took     : %15lli ticks.",
+          serial_time / runs);
+  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
 }
 
 /* And go... */
 int main(int argc, char *argv[]) {
-  double h = 1.2348, spacing = 0.5;
+  size_t runs = 10000;
+  double h = 1.0, spacing = 0.5;
   double offset[3] = {0.0, 0.0, 0.0};
-  int count = VEC_SIZE + 1;
+  size_t count = 256;
 
   /* Get some randomness going */
   srand(0);
 
   char c;
-  while ((c = getopt(argc, argv, "s:h:")) != -1) {
+  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
     switch (c) {
       case 'h':
         sscanf(optarg, "%lf", &h);
         break;
       case 's':
         sscanf(optarg, "%lf", &spacing);
+      case 'n':
+        sscanf(optarg, "%zu", &count);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
         break;
       case '?':
         error("Unknown option.");
@@ -278,26 +374,35 @@ int main(int argc, char *argv[]) {
         "runner_iact_vec_density."
         "\n\nOptions:"
         "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
-        "\n-s spacing         - Spacing between particles",
+        "\n-s SPACING=0.5     - Spacing between particles"
+        "\n-n NUMBER=9        - No. of particles",
         argv[0]);
     exit(1);
   }
 
+  /* Correct count so that VEC_SIZE of particles interact with the test
+   * particle. */
+  count = count - (count % VEC_SIZE) + 1;
+
   /* Build the infrastructure */
   static long long partId = 0;
+  struct part density_test_particle, force_test_particle;
   struct part *density_particles =
       make_particles(count, offset, spacing, h, &partId);
   struct part *force_particles =
       make_particles(count, offset, spacing, h, &partId);
-  prepare_force(force_particles);
+  prepare_force(force_particles, count);
 
   /* Define which interactions to call */
   serial_interaction serial_inter_func = &runner_iact_nonsym_density;
   vec_interaction vec_inter_func = &runner_iact_nonsym_vec_density;
 
+  density_test_particle = density_particles[0];
   /* Call the non-sym density test. */
-  test_interactions(density_particles, count, serial_inter_func, vec_inter_func,
-                    "test_nonsym_density");
+  message("Testing non-symmetrical density interaction...");
+  test_interactions(density_test_particle, &density_particles[1], count - 1,
+                    serial_inter_func, vec_inter_func, "test_nonsym_density",
+                    runs);
 
   density_particles = make_particles(count, offset, spacing, h, &partId);
 
@@ -305,28 +410,36 @@ int main(int argc, char *argv[]) {
   serial_inter_func = &runner_iact_density;
   vec_inter_func = &runner_iact_vec_density;
 
+  density_test_particle = density_particles[0];
   /* Call the symmetrical density test. */
-  test_interactions(density_particles, count, serial_inter_func, vec_inter_func,
-                    "test_sym_density");
+  message("Testing symmetrical density interaction...");
+  test_interactions(density_test_particle, &density_particles[1], count - 1,
+                    serial_inter_func, vec_inter_func, "test_sym_density",
+                    runs);
 
   /* Re-assign function pointers. */
   serial_inter_func = &runner_iact_nonsym_force;
   vec_inter_func = &runner_iact_nonsym_vec_force;
 
+  force_test_particle = force_particles[0];
   /* Call the test non-sym force test. */
-  test_interactions(force_particles, count, serial_inter_func, vec_inter_func,
-                    "test_nonsym_force");
+  message("Testing non-symmetrical force interaction...");
+  test_interactions(force_test_particle, &force_particles[1], count - 1,
+                    serial_inter_func, vec_inter_func, "test_nonsym_force",
+                    runs);
 
   force_particles = make_particles(count, offset, spacing, h, &partId);
-  prepare_force(force_particles);
+  prepare_force(force_particles, count);
 
   /* Re-assign function pointers. */
   serial_inter_func = &runner_iact_force;
   vec_inter_func = &runner_iact_vec_force;
 
+  force_test_particle = force_particles[0];
   /* Call the test symmetrical force test. */
-  test_interactions(force_particles, count, serial_inter_func, vec_inter_func,
-                    "test_sym_force");
+  message("Testing symmetrical force interaction...");
+  test_interactions(force_test_particle, &force_particles[1], count - 1,
+                    serial_inter_func, vec_inter_func, "test_sym_force", runs);
 
   return 0;
 }
diff --git a/tests/testLogger.c b/tests/testLogger.c
new file mode 100644
index 0000000000000000000000000000000000000000..ec3b33b6a9e38741e41b4678681e7afe9b9a7950
--- /dev/null
+++ b/tests/testLogger.c
@@ -0,0 +1,247 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2017 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* This object's header. */
+#include "../src/logger.h"
+
+/* Local headers. */
+#include "../src/dump.h"
+#include "../src/part.h"
+
+void test_log_parts(struct dump *d) {
+
+  /* Write several copies of a part to the dump. */
+  struct part p;
+  bzero(&p, sizeof(struct part));
+  p.x[0] = 1.0;
+  p.v[0] = 0.1;
+
+  /* Start with an offset at the end of the dump. */
+  size_t offset = d->count;
+
+  /* Write the full part. */
+  logger_log_part(&p, logger_mask_x | logger_mask_v | logger_mask_a |
+                          logger_mask_u | logger_mask_h | logger_mask_rho |
+                          logger_mask_consts,
+                  &offset, d);
+  printf("Wrote part at offset %#016zx.\n", offset);
+
+  /* Write only the position. */
+  p.x[0] = 2.0;
+  logger_log_part(&p, logger_mask_x, &offset, d);
+  printf("Wrote part at offset %#016zx.\n", offset);
+
+  /* Write the position and velocity. */
+  p.x[0] = 3.0;
+  p.v[0] = 0.3;
+  logger_log_part(&p, logger_mask_x | logger_mask_v, &offset, d);
+  printf("Wrote part at offset %#016zx.\n", offset);
+
+  /* Recover the last part from the dump. */
+  bzero(&p, sizeof(struct part));
+  size_t offset_old = offset;
+  int mask = logger_read_part(&p, &offset, d->data);
+  printf(
+      "Recovered part at offset %#016zx with mask %#04x: p.x[0]=%e, "
+      "p.v[0]=%e.\n",
+      offset_old, mask, p.x[0], p.v[0]);
+  if (p.x[0] != 3.0 || p.v[0] != 0.3f) {
+    printf("FAIL: could not read position and velocity of stored particle.\n");
+    abort();
+  }
+
+  /* Recover the second part from the dump (only position). */
+  bzero(&p, sizeof(struct part));
+  offset_old = offset;
+  mask = logger_read_part(&p, &offset, d->data);
+  printf(
+      "Recovered part at offset %#016zx with mask %#04x: p.x[0]=%e, "
+      "p.v[0]=%e.\n",
+      offset_old, mask, p.x[0], p.v[0]);
+  if (p.x[0] != 2.0 || p.v[0] != 0.0) {
+    printf("FAIL: could not read position and velocity of stored particle.\n");
+    abort();
+  }
+
+  /* Recover the first part from the dump. */
+  bzero(&p, sizeof(struct part));
+  offset_old = offset;
+  mask = logger_read_part(&p, &offset, d->data);
+  printf(
+      "Recovered part at offset %#016zx with mask %#04x: p.x[0]=%e, "
+      "p.v[0]=%e.\n",
+      offset_old, mask, p.x[0], p.v[0]);
+  if (p.x[0] != 1.0 || p.v[0] != 0.1f) {
+    printf("FAIL: could not read position and velocity of stored particle.\n");
+    abort();
+  }
+}
+
+void test_log_gparts(struct dump *d) {
+
+  /* Write several copies of a part to the dump. */
+  struct gpart p;
+  bzero(&p, sizeof(struct gpart));
+  p.x[0] = 1.0;
+  p.v_full[0] = 0.1;
+
+  /* Start with an offset at the end of the dump. */
+  size_t offset = d->count;
+
+  /* Write the full part. */
+  logger_log_gpart(&p, logger_mask_x | logger_mask_v | logger_mask_a |
+                           logger_mask_h | logger_mask_consts,
+                   &offset, d);
+  printf("Wrote gpart at offset %#016zx.\n", offset);
+
+  /* Write only the position. */
+  p.x[0] = 2.0;
+  logger_log_gpart(&p, logger_mask_x, &offset, d);
+  printf("Wrote gpart at offset %#016zx.\n", offset);
+
+  /* Write the position and velocity. */
+  p.x[0] = 3.0;
+  p.v_full[0] = 0.3;
+  logger_log_gpart(&p, logger_mask_x | logger_mask_v, &offset, d);
+  printf("Wrote gpart at offset %#016zx.\n", offset);
+
+  /* Recover the last part from the dump. */
+  bzero(&p, sizeof(struct gpart));
+  size_t offset_old = offset;
+  int mask = logger_read_gpart(&p, &offset, d->data);
+  printf(
+      "Recovered gpart at offset %#016zx with mask %#04x: p.x[0]=%e, "
+      "p.v[0]=%e.\n",
+      offset_old, mask, p.x[0], p.v_full[0]);
+  if (p.x[0] != 3.0 || p.v_full[0] != 0.3f) {
+    printf("FAIL: could not read position and velocity of stored gpart.\n");
+    abort();
+  }
+
+  /* Recover the second part from the dump. */
+  bzero(&p, sizeof(struct gpart));
+  offset_old = offset;
+  mask = logger_read_gpart(&p, &offset, d->data);
+  printf(
+      "Recovered gpart at offset %#016zx with mask %#04x: p.x[0]=%e, "
+      "p.v[0]=%e.\n",
+      offset_old, mask, p.x[0], p.v_full[0]);
+  if (p.x[0] != 2.0 || p.v_full[0] != 0.0) {
+    printf("FAIL: could not read position and velocity of stored gpart.\n");
+    abort();
+  }
+
+  /* Recover the first part from the dump. */
+  bzero(&p, sizeof(struct gpart));
+  offset_old = offset;
+  mask = logger_read_gpart(&p, &offset, d->data);
+  printf(
+      "Recovered gpart at offset %#016zx with mask %#04x: p.x[0]=%e, "
+      "p.v[0]=%e.\n",
+      offset_old, mask, p.x[0], p.v_full[0]);
+  if (p.x[0] != 1.0 || p.v_full[0] != 0.1f) {
+    printf("FAIL: could not read position and velocity of stored gpart.\n");
+    abort();
+  }
+}
+
+void test_log_timestamps(struct dump *d) {
+
+  /* The timestamp to log. */
+  unsigned long long int t = 10;
+
+  /* Start with an offset at the end of the dump. */
+  size_t offset = d->count;
+
+  /* Log three consecutive timestamps. */
+  logger_log_timestamp(t, &offset, d);
+  printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset);
+  t += 10;
+  logger_log_timestamp(t, &offset, d);
+  printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset);
+  t += 10;
+  logger_log_timestamp(t, &offset, d);
+  printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset);
+
+  /* Recover the three timestamps. */
+  size_t offset_old = offset;
+  t = 0;
+  int mask = logger_read_timestamp(&t, &offset, d->data);
+  printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t,
+         offset_old, mask);
+  if (t != 30) {
+    printf("FAIL: could not recover correct timestamp.\n");
+    abort();
+  }
+
+  offset_old = offset;
+  t = 0;
+  mask = logger_read_timestamp(&t, &offset, d->data);
+  printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t,
+         offset_old, mask);
+  if (t != 20) {
+    printf("FAIL: could not recover correct timestamp.\n");
+    abort();
+  }
+
+  offset_old = offset;
+  t = 0;
+  mask = logger_read_timestamp(&t, &offset, d->data);
+  printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t,
+         offset_old, mask);
+  if (t != 10) {
+    printf("FAIL: could not recover correct timestamp.\n");
+    abort();
+  }
+}
+
+int main(int argc, char *argv[]) {
+
+  /* Some constants. */
+  const char *filename = "/tmp/dump_test.out";
+
+  /* Prepare a dump. */
+  struct dump d;
+  dump_init(&d, filename, 1024 * 1024);
+
+  /* Test writing/reading parts. */
+  test_log_parts(&d);
+
+  /* Test writing/reading gparts. */
+  test_log_gparts(&d);
+
+  /* Test writing/reading timestamps. */
+  test_log_timestamps(&d);
+
+  /* Finalize the dump. */
+  dump_close(&d);
+
+  /* Return a happy number. */
+  printf("PASS\n");
+  return 0;
+}
diff --git a/tests/testPair.c b/tests/testPair.c
index 8b272b866431db3bfe36239222cd87d669961ae7..8b23cc419a661f4d50ea53948302729784a129f9 100644
--- a/tests/testPair.c
+++ b/tests/testPair.c
@@ -68,8 +68,13 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
 #else
         part->mass = density * volume / count;
 #endif
-        part->ti_begin = 0;
-        part->ti_end = 1;
+        part->time_bin = 1;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        part->ti_drift = 8;
+        part->ti_kick = 8;
+#endif
+
         ++part;
       }
     }
@@ -87,8 +92,9 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[1] = offset[1];
   cell->loc[2] = offset[2];
 
-  cell->ti_end_min = 1;
-  cell->ti_end_max = 1;
+  cell->ti_old = 8;
+  cell->ti_end_min = 8;
+  cell->ti_end_max = 8;
 
   shuffle_particles(cell->parts, cell->count);
 
@@ -245,7 +251,7 @@ int main(int argc, char *argv[]) {
 
   engine.s = &space;
   engine.time = 0.1f;
-  engine.ti_current = 1;
+  engine.ti_current = 8;
   runner.e = &engine;
 
   volume = particles * particles * particles;
diff --git a/tests/testReading.c b/tests/testReading.c
index 2ef32a5ef11c7e24a379ce5131df9cbea153fa7c..cbf25bf880c988bec95a91d5e141bf7554a97fe7 100644
--- a/tests/testReading.c
+++ b/tests/testReading.c
@@ -25,13 +25,14 @@
 
 int main() {
 
-  size_t Ngas = 0, Ngpart = 0;
+  size_t Ngas = 0, Ngpart = 0, Nspart = 0;
   int periodic = -1;
   int flag_entropy_ICs = -1;
   int i, j, k;
   double dim[3];
   struct part *parts = NULL;
   struct gpart *gparts = NULL;
+  struct spart *sparts = NULL;
 
   /* Default unit system */
   struct UnitSystem us;
@@ -43,8 +44,8 @@ int main() {
   const double rho = 2.;
 
   /* Read data */
-  read_ic_single("input.hdf5", &us, dim, &parts, &gparts, &Ngas, &Ngpart,
-                 &periodic, &flag_entropy_ICs, 0);
+  read_ic_single("input.hdf5", &us, dim, &parts, &gparts, &sparts, &Ngas,
+                 &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, 1, 1, 0, 0);
 
   /* Check global properties read are correct */
   assert(dim[0] == boxSize);
diff --git a/tests/testRiemannExact.c b/tests/testRiemannExact.c
index 1943820339ba2ac06d194a17d2d450157ded1a31..82b12449f1b199133de5a74fe7b68b5c386c9cf5 100644
--- a/tests/testRiemannExact.c
+++ b/tests/testRiemannExact.c
@@ -281,11 +281,11 @@ void check_riemann_symmetry() {
     check_value(Whalf1[3], Whalf2[3], "V[2] solution");
     check_value(Whalf1[4], Whalf2[4], "Pressure solution");
   } else {
-    message(
-        "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
-        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
-        Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0],
-        Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
+    /* message( */
+    /*     "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */
+    /*     "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */
+    /*     Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0], */
+    /*     Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); */
   }
 
   vij[0] = random_uniform(-10.0f, 10.0f);
@@ -314,11 +314,11 @@ void check_riemann_symmetry() {
     check_value(totflux1[3], totflux2[3], "Momentum[2] flux");
     check_value(totflux1[4], totflux2[4], "Energy flux");
   } else {
-    message(
-        "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
-        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
-        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
-        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+    /* message( */
+    /*     "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */
+    /*     "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */
+    /*     totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], */
+    /*     totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); */
   }
 }
 
diff --git a/tests/testRiemannHLLC.c b/tests/testRiemannHLLC.c
index 4cf883b68efbcfd795d0b7894adb9e7265b14d14..6bdf1192a6da8482d562895027d761f73ecc71de 100644
--- a/tests/testRiemannHLLC.c
+++ b/tests/testRiemannHLLC.c
@@ -75,11 +75,11 @@ void check_riemann_symmetry() {
         totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
     error("Asymmetry in flux solution!");
   } else {
-    message(
-        "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
-        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
-        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
-        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+    /* message( */
+    /*     "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */
+    /*     "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */
+    /*     totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], */
+    /*     totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); */
   }
 }
 
diff --git a/tests/testRiemannTRRS.c b/tests/testRiemannTRRS.c
index 18ecbdce9173f43674a63b21231322cb01620d29..4a0eac0be23581e175d2c0e599b786fd4508b14a 100644
--- a/tests/testRiemannTRRS.c
+++ b/tests/testRiemannTRRS.c
@@ -274,11 +274,11 @@ void check_riemann_symmetry() {
         Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
     error("Asymmetry in solution!");
   } else {
-    message(
-        "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
-        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
-        Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0],
-        Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]);
+    /* message( */
+    /*     "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */
+    /*     "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */
+    /*     Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0], */
+    /*     Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); */
   }
 
   vij[0] = random_uniform(-10.0f, 10.0f);
@@ -300,11 +300,11 @@ void check_riemann_symmetry() {
         totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
     error("Asymmetry in solution!");
   } else {
-    message(
-        "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == "
-        "[%.3e,%.3e,%.3e,%.3e,%.3e]\n",
-        totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4],
-        totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]);
+    /* message( */
+    /*     "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */
+    /*     "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */
+    /*     totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], */
+    /*     totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); */
   }
 }
 
diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c
index ff2ec841b27bd5ca6190517bc39f4da0c28fbc0c..0c7ae1d0d8855371b8f8f9fbf51c7c63b3221aaa 100644
--- a/tests/testSPHStep.c
+++ b/tests/testSPHStep.c
@@ -61,8 +61,7 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) {
             offset[2] * cellSize + z * cellSize / N + cellSize / (2 * N);
         part->h = h;
         part->id = x * N * N + y * N + z + id_offset;
-        part->ti_begin = 0;
-        part->ti_end = 1;
+        part->time_bin = 1;
         ++part;
       }
     }
diff --git a/tests/testTimeIntegration.c b/tests/testTimeIntegration.c
index f39adaee902ac3460b01857c002659b8bb2101f4..42a3d224f43d580e512119edc55051bd22719a3b 100644
--- a/tests/testTimeIntegration.c
+++ b/tests/testTimeIntegration.c
@@ -115,7 +115,7 @@ int main() {
     c.parts[0].a_hydro[1] = -(G * M_sun * c.parts[0].x[1] / r * r * r);
 
     /* Kick... */
-    runner_do_kick(&run, &c, 0);
+    runner_do_kick2(&run, &c, 0);
   }
 
   /* Clean-up */
diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat
index 71acaa89be231d02fc33e47c96a7bacf623bbf48..9c7ca10414507746b41e453d75426a072f989d2e 100644
--- a/tests/tolerance_27_normal.dat
+++ b/tests/tolerance_27_normal.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      2e-6	    2e-5       2e-3		 2e-6	     2e-6	   2e-6		 2e-6
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      2e-6	    1e-5       1e-4		 2e-5	     2e-5	   2e-5	 	 2e-5
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   2e-6	      4e-5	    2e-4       2e-3		 8e-6	     6e-6	   6e-6		 6e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.2e-4	    1e-4       1e-4		 2e-4	     1e-4	   1e-4	 	 1e-4
diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat
index 45293cbaa223b5887f3b0ce05cd9430d0db7440b..53de4ec7632039a56a3757488881e890296e3ac8 100644
--- a/tests/tolerance_27_perturbed.dat
+++ b/tests/tolerance_27_perturbed.dat
@@ -1,3 +1,3 @@
 #   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-5	    2.1e-5     2e-3		 2.1e-6	     2e-6	   2e-6		 2e-6
-    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      3e-3	    1e-5       1e-4		 2e-5	     4e-4	   4e-4	 	 4e-4
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1.2e-6     1e-4	    5e-5       2e-3		 3.1e-6	     3e-6	   3e-6		 3e-6
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-6	      1.2e-2	    1e-5       1e-4		 2e-5	     2e-3	   2e-3	 	 2e-3