diff --git a/.gitignore b/.gitignore
index 8137ea759b24b3f4ec9909a460da4bcb47b0a1ac..5a986acbd59a818b151540fb9303eadb4f926f77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,17 +25,30 @@ examples/swift_mindt
 examples/swift_mindt_mpi
 examples/swift_mpi
 
-tests/testVectorize
-tests/brute_force.dat
-tests/swift_dopair.dat
+tests/testPair
+tests/brute_force_standard.dat
+tests/swift_dopair_standard.dat
+tests/brute_force_perturbed.dat
+tests/swift_dopair_perturbed.dat
+tests/test27cells
+tests/brute_force_27_standard.dat
+tests/swift_dopair_27_standard.dat
+tests/brute_force_27_perturbed.dat
+tests/swift_dopair_27_perturbed.dat
 tests/testGreetings
 tests/testReading
 tests/input.hdf5
 tests/testSingle
 tests/testTimeIntegration
 tests/testSPHStep
+tests/testKernel
+tests/testParser
+tests/parser_output.yml
 
 theory/latex/swift.pdf
+theory/kernel/kernels.pdf
+theory/kernel/kernel_derivatives.pdf
+theory/kernel/kernel_definitions.pdf
 
 m4/libtool.m4
 m4/ltoptions.m4
diff --git a/README b/README
index 320df3f8ca6880776d338408c2c71ea82b1414c8..0c57e3f5656268c71bb7732af933302cbde9547b 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
- Welcome to the cosmological code
+ Welcome to the cosmological hydrodynamical code
     ______       _________________
    / ___/ |     / /  _/ ___/_  __/
    \__ \| | /| / // // /_   / /   
@@ -6,8 +6,26 @@
  /____/ |__/|__/___/_/    /_/     
  SPH With Inter-dependent Fine-grained Tasking
 
-Website: www.swiftsim.com
-Twitter: @SwiftSimulation
+ Website: www.swiftsim.com
+ Twitter: @SwiftSimulation
 
-See INSTALL.swift for instructions.
+See INSTALL.swift for install instructions.
 
+Usage: swift [OPTION] PARAMFILE
+
+Valid options are:
+  -c          Run with cosmological time integration
+  -d          Dry run. Read the parameter file, allocate memory but does not read 
+              the particles from ICs and exit before the start of time integration.
+              Allows user to check validy of parameter and IC files as well as memory limits.
+  -e          Enable floating-point exceptions (debugging mode)
+  -f    {int} Overwrite the CPU frequency (Hz) to be used for time measurements
+  -g          Run with an external gravitational potential
+  -G          Run with self-gravity
+  -s          Run with SPH
+  -v     [12] Increase the level of verbosity 1: MPI-rank 0 writes 
+              2: All MPI-ranks write
+  -y    {int} Time-step frequency at which task graphs are dumped
+  -h          Print this help message and exit
+
+See the file examples/parameter_example.yml for an example of parameter file.
diff --git a/configure.ac b/configure.ac
index e5d44fda300f15088c282b93b25499ecb242e24f..11ad6550d87f6764570f48449719292bcec3704d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -287,11 +287,11 @@ AC_SUBST([METIS_LIBS])
 AC_SUBST([METIS_INCS])
 AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"])
 
-# Check for zlib.
-AC_CHECK_LIB([z],[gzopen],[
-    AC_DEFINE([HAVE_LIBZ],[1],[Set to 1 if zlib is installed.])
-    LDFLAGS="$LDFLAGS -lz"
-    ],[])
+# # Check for zlib.
+# AC_CHECK_LIB([z],[gzopen],[
+#     AC_DEFINE([HAVE_LIBZ],[1],[Set to 1 if zlib is installed.])
+#     LDFLAGS="$LDFLAGS -lz"
+#     ],[])
 
 
 # Check for HDF5. This is required.
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 8f61a060b37b0e62189160d0a8c61e713cfd3b8f..802d8c31c251e006711934b6d30ace6c47eec4ac 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -759,7 +759,9 @@ WARN_LOGFILE           =
 # spaces.
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = @top_srcdir@ @top_srcdir@/src @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default
+INPUT                  =  @top_srcdir@ @top_srcdir@/src @top_srcdir@/tests @top_srcdir@/examples
+INPUT		       += @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default
+INPUT		       += @top_srcdir@/src/riemann 
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/examples/BigCosmoVolume/makeIC.py b/examples/BigCosmoVolume/makeIC.py
index 0994e1c95e053defe7766122c52bc405c7239776..3020feaf753f817f039d2fd09c4fa4f7fb69b896 100644
--- a/examples/BigCosmoVolume/makeIC.py
+++ b/examples/BigCosmoVolume/makeIC.py
@@ -77,7 +77,7 @@ indices = indices < numPart
 coords = coords[indices,:]
 v = v[indices,:]
 m = m[indices]
-h = h[indices]
+h = h[indices] / 1.825742 # Correct from Gadget defintion of h to physical definition
 u = u[indices]
 ids = ids[indices]
 
diff --git a/examples/CosmoVolume/cosmoVolume.yml b/examples/CosmoVolume/cosmoVolume.yml
new file mode 100644
index 0000000000000000000000000000000000000000..20d5febb280748a208633f75351d523b79286035
--- /dev/null
+++ b/examples/CosmoVolume/cosmoVolume.yml
@@ -0,0 +1,48 @@
+
+# Define the system of units to use internally. 
+UnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters for the task scheduling
+Scheduler:
+  nr_threads:       16        # The number of threads per MPI rank to use.
+  nr_queues:        0        # The number of task queues to use. Use 0  to let the system decide.
+  cell_max_size:    8000000  # Maximal number of interactions per task (this is the default value).
+  cell_sub_size:    5000     # Maximal number of interactions per sub-task  (this is the default value).
+  cell_split_size:  400      # Maximal number of particles per cell (this is the default value).
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1.    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  0.6      # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./cosmoVolume.hdf5     # The file to read
+  h_scaling:  1.                    # A scaling factor to apply to all smoothing lengths in the ICs.
+  shift_x:    0.                    # A shift to apply to all particles read from the ICs (in internal units).
+  shift_y:    0.
+  shift_z:    0.
+
+# Parameters govering domain decomposition
+DomainDecomposition:
+  initial_type:       m     # The initial strategy ("g", "m", "w", or "v"). See documentation for details.
+  initial_grid_x:    10     # Grid size if the 'g' strategy is chosen.
+  initial_grid_y:    10
+  initial_grid_z:    10
+  repartition_type:   b     # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details.
+ 
diff --git a/examples/CosmoVolume/run.sh b/examples/CosmoVolume/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a788a35c76a7c0b205297a7de922a9a7e833243a
--- /dev/null
+++ b/examples/CosmoVolume/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+ # Generate the initial conditions if they are not present.
+if [ ! -e cosmoVolume.hdf5 ]
+then
+    echo "Fetching initial conditions for the cosmo volume example..."
+    ./getIC.sh
+fi
+
+../swift -s cosmoVolume.yml
diff --git a/examples/GreshoVortex/makeIC.py b/examples/GreshoVortex/makeIC.py
index 6aceeed559324f97b0b1e388ff0c3524498b52e4..12edcb6e8154ec6f865d28a6daeb02d385d14bbf 100644
--- a/examples/GreshoVortex/makeIC.py
+++ b/examples/GreshoVortex/makeIC.py
@@ -30,6 +30,7 @@ factor = 3
 boxSize = [ 1.0 , 1.0, 1.0/factor ]
 L = 120           # Number of particles along one axis
 gamma = 5./3.     # Gas adiabatic index
+eta = 1.2349      # 48 ngbs with cubic spline kernel
 rho = 1           # Gas density
 P0 = 0.           # Constant additional pressure (should have no impact on the dynamics)
 fileName = "greshoVortex.hdf5" 
@@ -73,7 +74,7 @@ for i in range(L):
             v[index,1] =  v_phi * (x - boxSize[0] / 2) / r
             v[index,2] = 0.
             m[index] = mass
-            h[index] = 2.251 * boxSize[0] / L
+            h[index] = eta * boxSize[0] / L
             P = P0
             if r < 0.2:
                 P = P + 5. + 12.5*r2
@@ -105,6 +106,14 @@ grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0]
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 #Particle group
 grp = file.create_group("/PartType0")
 ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
diff --git a/examples/MultiTypes/makeIC.py b/examples/MultiTypes/makeIC.py
index 3a41910c22c260086b5384b248a5c86ab6340a5e..cf889f9b6eab502f692cd6c8b4506c31664ecdcb 100644
--- a/examples/MultiTypes/makeIC.py
+++ b/examples/MultiTypes/makeIC.py
@@ -32,6 +32,7 @@ Lgas = int(sys.argv[1])  # Number of particles along one axis
 rhoGas = 2.              # Density
 P = 1.                   # Pressure
 gamma = 5./3.            # Gas adiabatic index
+eta = 1.2349             # 48 ngbs with cubic spline kernel
 rhoDM = 1.
 Ldm = int(sys.argv[2])  # Number of particles along one axis
 
@@ -61,11 +62,18 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, massDM, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
 
-
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 
 # Gas Particle group
 grp = file.create_group("/PartType0")
@@ -80,7 +88,7 @@ ds = grp.create_dataset('Masses', (numGas,1), 'f')
 ds[()] = m
 m = zeros(1)
 
-h = full((numGas, 1), 1.1255 * boxSize / Lgas)
+h = full((numGas, 1), eta * boxSize / Lgas)
 ds = grp.create_dataset('SmoothingLength', (numGas,1), 'f')
 ds[()] = h
 h = zeros(1)
diff --git a/examples/PerturbedBox/makeIC.py b/examples/PerturbedBox/makeIC.py
index 69c1a69199c9a5262f5ae6c4e95ca14699300fd4..ee1d845fc2149892909a54bf588046b0b1691b03 100644
--- a/examples/PerturbedBox/makeIC.py
+++ b/examples/PerturbedBox/makeIC.py
@@ -90,6 +90,14 @@ grp.attrs["NumPart_Total"] = numPart
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 #Particle group
 grp = file.create_group("/PartType0")
 ds = grp.create_dataset('Coordinates', (numPart, 3), 'd')
diff --git a/examples/SedovBlast/makeIC.py b/examples/SedovBlast/makeIC.py
index 75ff81165df51780848e3d8ac679a6dbeb17a039..e64942e8e92ee6fe67142f841f566019b1a668be 100644
--- a/examples/SedovBlast/makeIC.py
+++ b/examples/SedovBlast/makeIC.py
@@ -33,6 +33,7 @@ P = 1.e-5         # Pressure
 E0= 1.e2          # Energy of the explosion
 pert = 0.1
 gamma = 5./3.     # Gas adiabatic index
+eta = 1.2349      # 48 ngbs with cubic spline kernel
 fileName = "sedov.hdf5" 
 
 
@@ -67,7 +68,7 @@ for i in range(L):
             v[index,1] = 0.
             v[index,2] = 0.
             m[index] = mass
-            h[index] = 1.1255 * boxSize / L
+            h[index] = eta * boxSize / L
             u[index] = internalEnergy
             ids[index] = index + 1
             if sqrt((x - boxSize/2.)**2 + (y - boxSize/2.)**2 + (z - boxSize/2.)**2) < 2.01 * boxSize/L:
@@ -98,6 +99,14 @@ grp.attrs["Flag_Entropy_ICs"] = 0
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 #Particle group
 grp = file.create_group("/PartType0")
 grp.create_dataset('Coordinates', data=coords, dtype='d')
diff --git a/examples/SedovBlast/makeIC_fcc.py b/examples/SedovBlast/makeIC_fcc.py
index 17f07440909cb5478d09a5b7a1444c72af2f3a47..0d3a017a9b7f3b30b61e723e3d1646d7797b40a4 100644
--- a/examples/SedovBlast/makeIC_fcc.py
+++ b/examples/SedovBlast/makeIC_fcc.py
@@ -33,6 +33,7 @@ P = 1.e-5         # Pressure
 E0= 1.e2          # Energy of the explosion
 pert = 0.025
 gamma = 5./3.     # Gas adiabatic index
+eta = 1.2349          # 48 ngbs with cubic spline kernel
 fileName = "sedov.hdf5" 
 
 
@@ -70,7 +71,7 @@ for i in range(L):
                 v[index,1] = 0.
                 v[index,2] = 0.
                 m[index] = mass
-                h[index] = 1.1255 * hbox
+                h[index] = eta * hbox
                 u[index] = internalEnergy
                 ids[index] = index + 1
                 if sqrt((x - boxSize/2.)**2 + (y - boxSize/2.)**2 + (z - boxSize/2.)**2) < 1.2 * hbox:
@@ -101,6 +102,14 @@ grp.attrs["Flag_Entropy_ICs"] = 0
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 #Particle group
 grp = file.create_group("/PartType0")
 grp.create_dataset('Coordinates', data=coords, dtype='d')
diff --git a/examples/SedovBlast/run.sh b/examples/SedovBlast/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..58646cf42eecc3f31fdb8a63ca2108c02d9580ba
--- /dev/null
+++ b/examples/SedovBlast/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Generate the initial conditions if they are not present.
+if [ ! -e sedov.hdf5 ]
+then
+    echo "Generating initial conditions for the SedovBlast example..."
+    python makeIC_fcc.py
+fi
+
+../swift -s sedov.yml
diff --git a/examples/SedovBlast/sedov.yml b/examples/SedovBlast/sedov.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f354ef5679eb5b6176ab90298bb307c6c2b27f0e
--- /dev/null
+++ b/examples/SedovBlast/sedov.yml
@@ -0,0 +1,48 @@
+
+# Define the system of units to use internally. 
+UnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters for the task scheduling
+Scheduler:
+  nr_threads:       16       # The number of threads per MPI rank to use.
+  nr_queues:        0        # The number of task queues to use. Use 0  to let the system decide.
+  cell_max_size:    8000000  # Maximal number of interactions per task (this is the default value).
+  cell_sub_size:    5000     # Maximal number of interactions per sub-task  (this is the default value).
+  cell_split_size:  400      # Maximal number of particles per cell (this is the default value).
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1.    # The end time of the simulation (in internal units).
+  dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  1.       # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./sedov.hdf5          # The file to read
+  h_scaling:  1.                    # A scaling factor to apply to all smoothing lengths in the ICs.
+  shift_x:    0.                    # A shift to apply to all particles read from the ICs (in internal units).
+  shift_y:    0.
+  shift_z:    0.
+
+# Parameters govering domain decomposition
+DomainDecomposition:
+  initial_type:       m     # The initial strategy ("g", "m", "w", or "v"). See documentation for details.
+  initial_grid_x:    10     # Grid size if the 'g' strategy is chosen.
+  initial_grid_y:    10
+  initial_grid_z:    10
+  repartition_type:   b     # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details.
+ 
diff --git a/examples/SodShock/makeIC.py b/examples/SodShock/makeIC.py
index 0ac0564116f8a6ceb57b4f41d23eb9907df0440d..8ae19050c11c0712579b44646c8870d7574d113b 100644
--- a/examples/SodShock/makeIC.py
+++ b/examples/SodShock/makeIC.py
@@ -43,14 +43,14 @@ vol = boxSize[0] * boxSize[1] * boxSize[2]
 glass1 = h5py.File("glass_001.hdf5")
 pos1 = glass1["/PartType0/Coordinates"][:,:]
 pos1 = pos1 / factor # Particles are in [0:0.25, 0:0.25, 0:0.25]
-
+glass_h1 = glass1["/PartType0/SmoothingLength"][:] / factor
 
 #Read in high density glass
 # glass2 = h5py.File("../Glass/glass_50000.hdf5")
 glass2 = h5py.File("glass_002.hdf5")
 pos2 = glass2["/PartType0/Coordinates"][:,:]
 pos2 = pos2 / factor # Particles are in [0:0.25, 0:0.25, 0:0.25]
-
+glass_h2 = glass2["/PartType0/SmoothingLength"][:] / factor
 
 #Generate high density region
 rho1 = 1.
@@ -61,9 +61,10 @@ coord1 = append(coord1, coord1 + [0.25, 0, 0], 0)
 # coord1 = append(coord1, pos1 + [0, 0.5, 0.5], 0)
 N1 = size(coord1)/3
 v1 = zeros((N1, 3))
-h1 = ones(N1) * 2.251 * 0.5 * vol / (size(pos1)/3)**(1./3.)
 u1 = ones(N1) * P1 / ((gamma - 1.) * rho1)
 m1 = ones(N1) * vol * 0.5 * rho1 / N1
+h1 = append(glass_h1, glass_h1, 0)
+h1 = append(h1, h1, 0)
 
 #Generate low density region
 rho2 = 0.25
@@ -74,9 +75,10 @@ coord2 = append(coord2, coord2 + [0.25, 0, 0], 0)
 # coord2 = append(coord2, pos2 + [0, 0.5, 0.5], 0)
 N2 = size(coord2)/3
 v2 = zeros((N2, 3))
-h2 = ones(N2) * 2.251 * 0.5 * vol / (size(pos2)/3)**(1./3.)
 u2 = ones(N2) * P2 / ((gamma - 1.) * rho2)
 m2 = ones(N2) * vol * 0.5 * rho2 / N2
+h2 = append(glass_h2, glass_h2, 0)
+h2 = append(h2, h2, 0)
 
 #Merge arrays
 numPart = N1 + N2
@@ -89,8 +91,8 @@ ids = zeros(numPart, dtype='L')
 for i in range(1, numPart+1):
     ids[i-1] = i
 
-#Final operations
-h /= 2
+#Final operation since we come from Gadget-2 cubic spline ICs
+h /= 1.825752
 
 #File
 file = h5py.File(fileName, 'w')
@@ -110,6 +112,14 @@ grp.attrs["Flag_Entropy_ICs"] = 0
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 #Particle group
 grp = file.create_group("/PartType0")
 grp.create_dataset('Coordinates', data=coords, dtype='d')
diff --git a/examples/SodShock/run.sh b/examples/SodShock/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..646f1e3a337170e2e406c24e7505e42b81de364b
--- /dev/null
+++ b/examples/SodShock/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Generate the initial conditions if they are not present.
+if [ ! -e sodShock.hdf5 ]
+then
+    echo "Generating initial conditions for the SodShock example..."
+    python makeIC.py
+fi
+
+../swift -s sodShock.yml
diff --git a/examples/SodShock/sodShock.yml b/examples/SodShock/sodShock.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5fe7be7b9fc13bb5bc67556d79d8ff9d9eff81d9
--- /dev/null
+++ b/examples/SodShock/sodShock.yml
@@ -0,0 +1,48 @@
+
+# Define the system of units to use internally. 
+UnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters for the task scheduling
+Scheduler:
+  nr_threads:       16        # The number of threads per MPI rank to use.
+  nr_queues:        0        # The number of task queues to use. Use 0  to let the system decide.
+  cell_max_size:    8000000  # Maximal number of interactions per task (this is the default value).
+  cell_sub_size:    5000     # Maximal number of interactions per sub-task.
+  cell_split_size:  400      # Maximal number of particles per cell (this is the default value).
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1.    # The end time of the simulation (in internal units).
+  dt_min:     1e-7  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  0.01     # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./sodShock.hdf5       # The file to read
+  h_scaling:  1.                    # A scaling factor to apply to all smoothing lengths in the ICs.
+  shift_x:    0.                    # A shift to apply to all particles read from the ICs (in internal units).
+  shift_y:    0.
+  shift_z:    0.
+
+# Parameters govering domain decomposition
+DomainDecomposition:
+  initial_type:       m     # The initial strategy ("g", "m", "w", or "v"). See documentation for details.
+  initial_grid_x:    10     # Grid size if the 'g' strategy is chosen.
+  initial_grid_y:    10
+  initial_grid_z:    10
+  repartition_type:   b     # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details.
+ 
diff --git a/examples/UniformBox/makeIC.py b/examples/UniformBox/makeIC.py
index c175349e658799cbcb30dfe2619a1594bafc18b9..1484f60596e68734f0f98685ab2ab845f2e0b407 100644
--- a/examples/UniformBox/makeIC.py
+++ b/examples/UniformBox/makeIC.py
@@ -32,6 +32,7 @@ L = int(sys.argv[1])  # Number of particles along one axis
 rho = 2.              # Density
 P = 1.                # Pressure
 gamma = 5./3.         # Gas adiabatic index
+eta = 1.2349          # 48 ngbs with cubic spline kernel
 fileName = "uniformBox.hdf5" 
 
 #---------------------------------------------------
@@ -55,11 +56,18 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
 
-
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
 #Particle group
 grp = file.create_group("/PartType0")
 
@@ -73,7 +81,7 @@ ds = grp.create_dataset('Masses', (numPart,1), 'f')
 ds[()] = m
 m = zeros(1)
 
-h = full((numPart, 1), 1.1255 * boxSize / L)
+h = full((numPart, 1), eta * boxSize / L)
 ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f')
 ds[()] = h
 h = zeros(1)
diff --git a/examples/UniformBox/makeICbig.py b/examples/UniformBox/makeICbig.py
index e475fdcbd9f3c4811e3dcfdf20bbd321be3d8b29..bd5cf627fb535595b3abb224cbc8de50589f12cf 100644
--- a/examples/UniformBox/makeICbig.py
+++ b/examples/UniformBox/makeICbig.py
@@ -32,6 +32,7 @@ N = int(sys.argv[2])  # Write N particles at a time to avoid requiring a lot of
 rho = 2.              # Density
 P = 1.                # Pressure
 gamma = 5./3.         # Gas adiabatic index
+eta = 1.2349      # 48 ngbs with cubic spline kernel
 fileName = "uniformBox_%d.hdf5"%L
 
 #---------------------------------------------------
@@ -62,11 +63,19 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
 
-
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
+
 #Particle group
 grp = file.create_group("/PartType0")
 
@@ -89,7 +98,7 @@ for n in range(n_iterations):
     ds_m[offset:offset+N] = m
     m = zeros(1)
 
-    h = full((N, 1), 1.1255 * boxSize / L)
+    h = full((N, 1), eta * boxSize / L)
     ds_h[offset:offset+N] = h
     h = zeros(1)
 
@@ -122,7 +131,7 @@ m = full((remainder, 1), mass)
 ds_m[offset:offset+remainder] = m
 m = zeros(1)
 
-h = full((remainder, 1), 1.1255 * boxSize / L)
+h = full((remainder, 1), eta * boxSize / L)
 ds_h[offset:offset+remainder] = h
 h = zeros(1)
 
@@ -139,7 +148,7 @@ coords = zeros((remainder, 3))
 coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
 coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
 coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
-ds_x[offset:offset+remainder,:] = coords
+ods_x[offset:offset+remainder,:] = coords
 
 print "Done", offset+remainder,"/", numPart
 
diff --git a/examples/UniformBox/run.sh b/examples/UniformBox/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ca78b0ac0425bf1b3f6dd9d30bfc95d35083739f
--- /dev/null
+++ b/examples/UniformBox/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Generate the initial conditions if they are not present.
+if [ ! -e uniformBox.hdf5 ]
+then
+    echo "Generating initial conditions for the uniform box example..."
+    python makeIC.py 100
+fi
+
+../swift -s uniformBox.yml
diff --git a/examples/UniformBox/uniformBox.yml b/examples/UniformBox/uniformBox.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2d5512815b60511b5dbc373df43fae4658272093
--- /dev/null
+++ b/examples/UniformBox/uniformBox.yml
@@ -0,0 +1,48 @@
+
+# Define the system of units to use internally. 
+UnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters for the task scheduling
+Scheduler:
+  nr_threads:       16        # The number of threads per MPI rank to use.
+  nr_queues:        0        # The number of task queues to use. Use 0  to let the system decide.
+  cell_max_size:    8000000  # Maximal number of interactions per task (this is the default value).
+  cell_sub_size:    5000     # Maximal number of interactions per sub-task  (this is the default value).
+  cell_split_size:  400      # Maximal number of particles per cell (this is the default value).
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1.    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  0.1      # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  ./uniformBox.hdf5     # The file to read
+  h_scaling:  1.                    # A scaling factor to apply to all smoothing lengths in the ICs.
+  shift_x:    0.                    # A shift to apply to all particles read from the ICs (in internal units).
+  shift_y:    0.
+  shift_z:    0.
+
+# Parameters govering domain decomposition
+DomainDecomposition:
+  initial_type:       m     # The initial strategy ("g", "m", "w", or "v"). See documentation for details.
+  initial_grid_x:    10     # Grid size if the 'g' strategy is chosen.
+  initial_grid_y:    10
+  initial_grid_z:    10
+  repartition_type:   b     # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details.
+ 
diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py
index 061b4d0ad1959d9e25356aff80e78adb9c1c4faa..449d780fb31bc23dd194f772be45d35e6b0bbe3f 100644
--- a/examples/UniformDMBox/makeIC.py
+++ b/examples/UniformDMBox/makeIC.py
@@ -52,11 +52,19 @@ grp.attrs["NumFilesPerSnapshot"] = 1
 grp.attrs["MassTable"] = [0.0, mass, 0.0, 0.0, 0.0, 0.0]
 grp.attrs["Flag_Entropy_ICs"] = 0
 
-
 #Runtime parameters
 grp = file.create_group("/RuntimePars")
 grp.attrs["PeriodicBoundariesOn"] = periodic
 
+#Units
+grp = file.create_group("/Units")
+grp.attrs["Unit length in cgs (U_L)"] = 1.
+grp.attrs["Unit mass in cgs (U_M)"] = 1.
+grp.attrs["Unit time in cgs (U_t)"] = 1.
+grp.attrs["Unit current in cgs (U_I)"] = 1.
+grp.attrs["Unit temperature in cgs (U_T)"] = 1.
+
+
 #Particle group
 grp = file.create_group("/PartType1")
 
diff --git a/examples/main.c b/examples/main.c
index c88f92a07a747c327692b5e0fbbc7dc07b93ac0c..5cfae5efba9157ba7b727115b03ac467287edc3d 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -23,20 +23,11 @@
 #include "../config.h"
 
 /* Some standard headers. */
+#include <fenv.h>
+#include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
 #include <string.h>
-#include <pthread.h>
-#include <math.h>
-#include <float.h>
-#include <limits.h>
-#include <fenv.h>
-
-/* Conditional headers. */
-#ifdef HAVE_LIBZ
-#include <zlib.h>
-#endif
 
 /* MPI headers. */
 #ifdef WITH_MPI
@@ -51,58 +42,55 @@
 #define ENGINE_POLICY engine_policy_none
 #endif
 
+/**
+ * @brief Help messages for the command line parameters.
+ */
+void print_help_message() {
+
+  printf("\nUsage: swift [OPTION] PARAMFILE\n\n");
+  printf("Valid options are:\n");
+  printf("  %2s %8s %s\n", "-c", "", "Run with cosmological time integration");
+  printf(
+      "  %2s %8s %s\n", "-d", "",
+      "Dry run. Read the parameter file, allocate memory but does not read ");
+  printf(
+      "  %2s %8s %s\n", "", "",
+      "the particles from ICs and exit before the start of time integration.");
+  printf("  %2s %8s %s\n", "", "",
+         "Allows user to check validy of parameter and IC files as well as "
+         "memory limits.");
+  printf("  %2s %8s %s\n", "-e", "",
+         "Enable floating-point exceptions (debugging mode)");
+  printf("  %2s %8s %s\n", "-f", "{int}",
+         "Overwrite the CPU frequency (Hz) to be used for time measurements");
+  printf("  %2s %8s %s\n", "-g", "",
+         "Run with an external gravitational potential");
+  printf("  %2s %8s %s\n", "-G", "", "Run with self-gravity");
+  printf("  %2s %8s %s\n", "-s", "", "Run with SPH");
+  printf("  %2s %8s %s\n", "-v", "[12]",
+         "Increase the level of verbosity 1: MPI-rank 0 writes ");
+  printf("  %2s %8s %s\n", "", "", "2: All MPI-ranks write");
+  printf("  %2s %8s %s\n", "-y", "{int}",
+         "Time-step frequency at which task graphs are dumped");
+  printf("  %2s %8s %s\n", "-h", "", "Print this help message and exit");
+  printf(
+      "\nSee the file parameter_example.yml for an example of "
+      "parameter file.\n");
+}
+
 /**
  * @brief Main routine that loads a few particles and generates some output.
  *
  */
-
 int main(int argc, char *argv[]) {
 
-  int c, icount, periodic = 1;
-  size_t Ngas = 0, Ngpart = 0;
-  long long N_total[2] = {0, 0};
-  int nr_threads = 1, nr_queues = -1;
-  int dump_tasks = 0;
-  int data[2];
-  double dim[3] = {1.0, 1.0, 1.0}, shift[3] = {0.0, 0.0, 0.0};
-  double h_max = -1.0, scaling = 1.0;
-  double time_end = DBL_MAX;
-  struct part *parts = NULL;
-  struct gpart *gparts = NULL;
-  struct space s;
-  struct engine e;
-  struct UnitSystem us;
   struct clocks_time tic, toc;
-  char ICfileName[200] = "";
-  char dumpfile[30];
-  float dt_max = 0.0f, dt_min = 0.0f;
-  int nr_nodes = 1, myrank = 0;
-  FILE *file_thread;
-  int with_outputs = 1;
-  int verbose = 0, talking;
-  unsigned long long cpufreq = 0;
-
-#ifdef WITH_MPI
-  struct partition initial_partition;
-  enum repartition_type reparttype = REPART_NONE;
-
-  initial_partition.type = INITPART_GRID;
-  initial_partition.grid[0] = 1;
-  initial_partition.grid[1] = 1;
-  initial_partition.grid[2] = 1;
-#ifdef HAVE_METIS
-  /* Defaults make use of METIS. */
-  reparttype = REPART_METIS_BOTH;
-  initial_partition.type = INITPART_METIS_NOWEIGHT;
-#endif
-#endif
 
-/* Choke on FP-exceptions. */
-// feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW );
+  int nr_nodes = 1, myrank = 0;
 
 #ifdef WITH_MPI
   /* Start by initializing MPI. */
-  int res, prov;
+  int res = 0, prov = 0;
   if ((res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov)) !=
       MPI_SUCCESS)
     error("Call to MPI_Init failed with error %i.", res);
@@ -117,304 +105,242 @@ int main(int argc, char *argv[]) {
   if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) !=
       MPI_SUCCESS)
     error("Call to MPI_Comm_set_errhandler failed with error %i.", res);
-  if (myrank == 0) message("MPI is up and running with %i node(s).", nr_nodes);
+  if (myrank == 0)
+    printf("[0000][00000.0] MPI is up and running with %i node(s).\n",
+           nr_nodes);
+  if (nr_nodes == 1) {
+    message("WARNING: you are running with one MPI rank.");
+    message("WARNING: you should use the non-MPI version of this program.");
+  }
   fflush(stdout);
-
-  /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
-  factor(nr_nodes, &initial_partition.grid[0], &initial_partition.grid[1]);
-  factor(nr_nodes / initial_partition.grid[1], &initial_partition.grid[0],
-         &initial_partition.grid[2]);
-  factor(initial_partition.grid[0] * initial_partition.grid[1],
-         &initial_partition.grid[1], &initial_partition.grid[0]);
 #endif
 
-  /* Initialize CPU frequency, this also starts time. */
-  clocks_set_cpufreq(cpufreq);
-
-  /* Greeting message */
-  if (myrank == 0) greetings();
-
 #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
   if ((ENGINE_POLICY) & engine_policy_setaffinity) {
     /* Ensure the NUMA node on which we initialise (first touch) everything
      * doesn't change before engine_init allocates NUMA-local workers.
-     * Otherwise,
-     * we may be scheduled elsewhere between the two times.
+     * Otherwise, we may be scheduled elsewhere between the two times.
      */
     cpu_set_t affinity;
     CPU_ZERO(&affinity);
     CPU_SET(sched_getcpu(), &affinity);
     if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) {
-      message("failed to set entry thread's affinity");
-    } else {
-      message("set entry thread's affinity");
+      error("failed to set entry thread's affinity");
     }
   }
 #endif
 
-  /* Init the space. */
-  bzero(&s, sizeof(struct space));
+  /* Welcome to SWIFT, you made the right choice */
+  if (myrank == 0) greetings();
 
-  /* Parse the options */
-  while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:v:w:y:z:")) != -1)
-    switch (c) {
-      case 'a':
-        if (sscanf(optarg, "%lf", &scaling) != 1)
-          error("Error parsing cutoff scaling.");
-        if (myrank == 0) message("scaling cutoff by %.3f.", scaling);
-        fflush(stdout);
-        break;
+  int dry_run = 0;
+  int dump_tasks = 0;
+  int with_cosmology = 0;
+  int with_external_gravity = 0;
+  int with_self_gravity = 0;
+  int with_hydro = 0;
+  int with_fp_exceptions = 0;
+  int verbose = 0;
+  char paramFileName[200] = "";
+  unsigned long long cpufreq = 0;
+
+  /* Parse the parameters */
+  int c;
+  while ((c = getopt(argc, argv, "cdef:gGhsv:y")) != -1) switch (c) {
       case 'c':
-        if (sscanf(optarg, "%lf", &time_end) != 1)
-          error("Error parsing final time.");
-        if (myrank == 0) message("time_end set to %.3e.", time_end);
-        fflush(stdout);
+        with_cosmology = 1;
         break;
       case 'd':
-        if (sscanf(optarg, "%f", &dt_min) != 1)
-          error("Error parsing minimal timestep.");
-        if (myrank == 0) message("dt_min set to %e.", dt_min);
-        fflush(stdout);
+        dry_run = 1;
         break;
       case 'e':
-        if (sscanf(optarg, "%f", &dt_max) != 1)
-          error("Error parsing maximal timestep.");
-        if (myrank == 0) message("dt_max set to %e.", dt_max);
-        fflush(stdout);
+        with_fp_exceptions = 1;
         break;
       case 'f':
-        if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name.");
-        break;
-      case 'h':
-        if (sscanf(optarg, "%llu", &cpufreq) != 1)
-          error("Error parsing CPU frequency.");
-        if (myrank == 0) message("CPU frequency set to %llu.", cpufreq);
-        fflush(stdout);
-        break;
-      case 'm':
-        if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max.");
-        if (myrank == 0) message("maximum h set to %e.", h_max);
-        fflush(stdout);
-        break;
-      case 'o':
-        with_outputs = 0;
-        break;
-      case 'P':
-/* Partition type is one of "g", "m", "w", or "v"; "g" can be
- * followed by three numbers defining the grid. */
-#ifdef WITH_MPI
-        switch (optarg[0]) {
-          case 'g':
-            initial_partition.type = INITPART_GRID;
-            if (strlen(optarg) > 2) {
-              if (sscanf(optarg, "g %i %i %i", &initial_partition.grid[0],
-                         &initial_partition.grid[1],
-                         &initial_partition.grid[2]) != 3)
-                error("Error parsing grid.");
-            }
-            break;
-#ifdef HAVE_METIS
-          case 'm':
-            initial_partition.type = INITPART_METIS_NOWEIGHT;
-            break;
-          case 'w':
-            initial_partition.type = INITPART_METIS_WEIGHT;
-            break;
-#endif
-          case 'v':
-            initial_partition.type = INITPART_VECTORIZE;
-            break;
+        if (sscanf(optarg, "%llu", &cpufreq) != 1) {
+          if (myrank == 0) printf("Error parsing CPU frequency (-f).\n");
+          if (myrank == 0) print_help_message();
+          return 1;
         }
-#endif
         break;
-      case 'q':
-        if (sscanf(optarg, "%d", &nr_queues) != 1)
-          error("Error parsing number of queues.");
+      case 'g':
+        with_external_gravity = 1;
         break;
-      case 'R':
-/* Repartition type "n", "b", "v", "e" or "x".
- * Note only none is available without METIS. */
-#ifdef WITH_MPI
-        switch (optarg[0]) {
-          case 'n':
-            reparttype = REPART_NONE;
-            break;
-#ifdef HAVE_METIS
-          case 'b':
-            reparttype = REPART_METIS_BOTH;
-            break;
-          case 'v':
-            reparttype = REPART_METIS_VERTEX;
-            break;
-          case 'e':
-            reparttype = REPART_METIS_EDGE;
-            break;
-          case 'x':
-            reparttype = REPART_METIS_VERTEX_EDGE;
-            break;
-#endif
-        }
-#endif
+      case 'G':
+        with_self_gravity = 1;
         break;
+      case 'h':
+        if (myrank == 0) print_help_message();
+        return 0;
       case 's':
-        if (sscanf(optarg, "%lf %lf %lf", &shift[0], &shift[1], &shift[2]) != 3)
-          error("Error parsing shift.");
-        if (myrank == 0)
-          message("will shift parts by [ %.3f %.3f %.3f ].", shift[0], shift[1],
-                  shift[2]);
-        break;
-      case 't':
-        if (sscanf(optarg, "%d", &nr_threads) != 1)
-          error("Error parsing number of threads.");
+        with_hydro = 1;
         break;
       case 'v':
-        /* verbose = 1: MPI rank 0 writes
-           verbose = 2: all MPI ranks write */
-        if (sscanf(optarg, "%d", &verbose) != 1)
-          error("Error parsing verbosity level.");
-        break;
-      case 'w':
-        if (sscanf(optarg, "%d", &space_subsize) != 1)
-          error("Error parsing sub size.");
-        if (myrank == 0) message("sub size set to %i.", space_subsize);
+        if (sscanf(optarg, "%d", &verbose) != 1) {
+          if (myrank == 0) printf("Error parsing verbosity level (-v).\n");
+          if (myrank == 0) print_help_message();
+          return 1;
+        }
         break;
       case 'y':
-        if (sscanf(optarg, "%d", &dump_tasks) != 1)
-          error("Error parsing dump_tasks (-y)");
-        break;
-      case 'z':
-        if (sscanf(optarg, "%d", &space_splitsize) != 1)
-          error("Error parsing split size.");
-        if (myrank == 0) message("split size set to %i.", space_splitsize);
+        if (sscanf(optarg, "%d", &dump_tasks) != 1) {
+          if (myrank == 0) printf("Error parsing dump_tasks (-y). \n");
+          if (myrank == 0) print_help_message();
+          return 1;
+        }
         break;
       case '?':
-        error("Unknown option.");
+        if (myrank == 0) print_help_message();
+        return 1;
         break;
     }
+  if (optind == argc - 1) {
+    if (!strcpy(paramFileName, argv[optind++]))
+      error("Error reading parameter file name.");
+  } else if (optind > argc - 1) {
+    if (myrank == 0) printf("Error: A parameter file name must be provided\n");
+    if (myrank == 0) print_help_message();
+    return 1;
+  } else {
+    if (myrank == 0) printf("Error: Too many parameters given\n");
+    if (myrank == 0) print_help_message();
+    return 1;
+  }
+  if (!with_self_gravity && !with_hydro && !with_external_gravity) {
+    if (myrank == 0)
+      printf("Error: At least one of -s, -g or -G must be chosen.\n");
+    if (myrank == 0) print_help_message();
+    return 1;
+  }
 
-#ifdef WITH_MPI
+  /* Genesis 1.1: And then, there was time ! */
+  clocks_set_cpufreq(cpufreq);
+
+  if (myrank == 0 && dry_run)
+    message(
+        "Executing a dry run. No i/o or time integration will be performed.");
+
+  /* Report CPU frequency. */
   if (myrank == 0) {
-    message("Running with %i thread(s) per node.", nr_threads);
-    message("Using initial partition %s",
-            initial_partition_name[initial_partition.type]);
-    if (initial_partition.type == INITPART_GRID)
-      message("grid set to [ %i %i %i ].", initial_partition.grid[0],
-              initial_partition.grid[1], initial_partition.grid[2]);
-    message("Using %s repartitioning", repartition_name[reparttype]);
+    cpufreq = clocks_get_cpufreq();
+    message("CPU frequency used for tick conversion: %llu Hz", cpufreq);
+  }
 
-    if (nr_nodes == 1) {
-      message("WARNING: you are running with one MPI rank.");
-      message("WARNING: you should use the non-MPI version of this program.");
-    }
-    fflush(stdout);
+  /* Do we choke on FP-exceptions ? */
+  if (with_fp_exceptions) {
+    feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+    if (myrank == 0) message("Floating point exceptions will be reported.");
   }
-#else
-  if (myrank == 0) message("Running with %i thread(s).", nr_threads);
-#endif
 
   /* How large are the parts? */
   if (myrank == 0) {
-    message("sizeof(struct part) is %li bytes.", (long int)sizeof(struct part));
-    message("sizeof(struct xpart) is %li bytes.",
-            (long int)sizeof(struct xpart));
-    message("sizeof(struct gpart) is %li bytes.",
-            (long int)sizeof(struct gpart));
+    message("sizeof(struct part)  is %4zi bytes.", sizeof(struct part));
+    message("sizeof(struct xpart) is %4zi bytes.", sizeof(struct xpart));
+    message("sizeof(struct gpart) is %4zi bytes.", sizeof(struct gpart));
+  }
+
+  /* How vocal are we ? */
+  const int talking = (verbose == 1 && myrank == 0) || (verbose == 2);
+
+  /* Read the parameter file */
+  struct swift_params *params = malloc(sizeof(struct swift_params));
+  if (params == NULL) error("Error allocating memory for the parameter file.");
+  if (myrank == 0) {
+    message("Reading parameters from file '%s'", paramFileName);
+    parser_read_file(paramFileName, params);
+    // parser_print_params(&params);
+    parser_write_params_to_file(params, "used_parameters.yml");
   }
+#ifdef WITH_MPI
+  /* Broadcast the parameter file */
+  MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD);
+#endif
 
   /* Initialize unit system */
-  initUnitSystem(&us);
+  struct UnitSystem us;
+  units_init(&us, params);
   if (myrank == 0) {
     message("Unit system: U_M = %e g.", us.UnitMass_in_cgs);
     message("Unit system: U_L = %e cm.", us.UnitLength_in_cgs);
     message("Unit system: U_t = %e s.", us.UnitTime_in_cgs);
     message("Unit system: U_I = %e A.", us.UnitCurrent_in_cgs);
     message("Unit system: U_T = %e K.", us.UnitTemperature_in_cgs);
-    message("Density units: %e a^%f h^%f.",
-            conversionFactor(&us, UNIT_CONV_DENSITY),
-            aFactor(&us, UNIT_CONV_DENSITY), hFactor(&us, UNIT_CONV_DENSITY));
-    message("Entropy units: %e a^%f h^%f.",
-            conversionFactor(&us, UNIT_CONV_ENTROPY),
-            aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY));
   }
 
-  /* Report CPU frequency. */
+/* Prepare the domain decomposition scheme */
+#ifdef WITH_MPI
+  struct partition initial_partition;
+  enum repartition_type reparttype;
+  partition_init(&initial_partition, &reparttype, params, nr_nodes);
+
+  /* Let's report what we did */
   if (myrank == 0) {
-    cpufreq = clocks_get_cpufreq();
-    message("CPU frequency used for tick conversion: %llu Hz", cpufreq);
+    message("Using initial partition %s",
+            initial_partition_name[initial_partition.type]);
+    if (initial_partition.type == INITPART_GRID)
+      message("grid set to [ %i %i %i ].", initial_partition.grid[0],
+              initial_partition.grid[1], initial_partition.grid[2]);
+    message("Using %s repartitioning", repartition_name[reparttype]);
   }
+#endif
 
-  /* Check we have sensible time step bounds */
-  if (dt_min > dt_max)
-    error("Minimal time step size must be large than maximal time step size ");
-
-  /* Check whether an IC file has been provided */
-  if (strcmp(ICfileName, "") == 0)
-    error("An IC file name must be provided via the option -f");
-
-  /* Read particles and space information from (GADGET) IC */
-
+  /* Read particles and space information from (GADGET) ICs */
+  char ICfileName[200] = "";
+  parser_get_param_string(params, "InitialConditions:file_name", ICfileName);
+  if (myrank == 0) message("Reading ICs from file '%s'", ICfileName);
+  struct part *parts = NULL;
+  struct gpart *gparts = NULL;
+  size_t Ngas = 0, Ngpart = 0;
+  double dim[3] = {0., 0., 0.};
+  int periodic = 0;
   if (myrank == 0) clocks_gettime(&tic);
 #if defined(WITH_MPI)
 #if defined(HAVE_PARALLEL_HDF5)
-  read_ic_parallel(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes,
-                   MPI_COMM_WORLD, MPI_INFO_NULL);
+  read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic,
+                   myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run);
 #else
-  read_ic_serial(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes,
-                 MPI_COMM_WORLD, MPI_INFO_NULL);
+  read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic,
+                 myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run);
 #endif
 #else
-  read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic);
+  read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic,
+                 dry_run);
 #endif
-
   if (myrank == 0) {
     clocks_gettime(&toc);
-    message("reading particle properties took %.3f %s.",
-            clocks_diff(&tic, &toc), clocks_getunit());
+    message("Reading initial conditions took %.3f %s.", clocks_diff(&tic, &toc),
+            clocks_getunit());
     fflush(stdout);
   }
 
+  /* Discard gparts if we don't have gravity
+   * (Better implementation of i/o will come)*/
+  if (!with_external_gravity && !with_self_gravity) {
+    free(gparts);
+    gparts = NULL;
+    for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL;
+    Ngpart = 0;
+  }
+
+  /* Get the total number of particles across all nodes. */
+  long long N_total[2] = {0, 0};
 #if defined(WITH_MPI)
   long long N_long[2] = {Ngas, Ngpart};
   MPI_Reduce(&N_long, &N_total, 2, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
-  if (myrank == 0)
-    message("Read %lld gas particles and %lld DM particles from the ICs",
-            N_total[0], N_total[1]);
 #else
   N_total[0] = Ngas;
-  N_total[1] = Ngpart - Ngas;
-  message("Read %lld gas particles and %lld DM particles from the ICs",
-	  N_total[0], N_total[1]);
+  N_total[1] = Ngpart;
 #endif
+  if (myrank == 0)
+    message("Read %lld gas particles and %lld gparts from the ICs.", N_total[0],
+            N_total[1]);
 
-  /* Apply h scaling */
-  if (scaling != 1.0)
-    for (size_t k = 0; k < Ngas; k++) parts[k].h *= scaling;
-
-  /* Apply shift */
-  if (shift[0] != 0 || shift[1] != 0 || shift[2] != 0) {
-    for (size_t k = 0; k < Ngas; k++) {
-      parts[k].x[0] += shift[0];
-      parts[k].x[1] += shift[1];
-      parts[k].x[2] += shift[2];
-    }
-    for (size_t k = 0; k < Ngpart; k++) {
-      gparts[k].x[0] += shift[0];
-      gparts[k].x[1] += shift[1];
-      gparts[k].x[2] += shift[2];
-    }
-  }
-
-  /* Set default number of queues. */
-  if (nr_queues < 0) nr_queues = nr_threads;
-
-  /* How vocal are we ? */
-  talking = (verbose == 1 && myrank == 0) || (verbose == 2);
-
-  /* Initialize the space with this data. */
+  /* Initialize the space with these data. */
   if (myrank == 0) clocks_gettime(&tic);
-  space_init(&s, dim, parts, gparts, Ngas, Ngpart, periodic, h_max,
-             myrank == 0);
-  if (myrank == 0 && verbose) {
+  struct space s;
+  space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic, talking,
+             dry_run);
+  if (myrank == 0) {
     clocks_gettime(&toc);
     message("space_init took %.3f %s.", clocks_diff(&tic, &toc),
             clocks_getunit());
@@ -431,45 +357,47 @@ int main(int argc, char *argv[]) {
     message("%zi parts in %i cells.", s.nr_parts, s.tot_cells);
     message("%zi gparts in %i cells.", s.nr_gparts, s.tot_cells);
     message("maximum depth is %d.", s.maxdepth);
-    // message( "cutoffs in [ %g %g ]." , s.h_min , s.h_max ); fflush(stdout);
   }
 
   /* Verify that each particle is in it's proper cell. */
-  if (myrank == 0) {
-    icount = 0;
+  if (talking && !dry_run) {
+    int icount = 0;
     space_map_cells_pre(&s, 0, &map_cellcheck, &icount);
     message("map_cellcheck picked up %i parts.", icount);
   }
 
-  if (myrank == 0) {
-    data[0] = s.maxdepth;
-    data[1] = 0;
+  /* Verify the maximal depth of cells. */
+  if (talking && !dry_run) {
+    int data[2] = {s.maxdepth, 0};
     space_map_cells_pre(&s, 0, &map_maxdepth, data);
     message("nr of cells at depth %i is %i.", data[0], data[1]);
   }
 
-  /* Initialize the engine with this space. */
+  /* Construct the engine policy */
+  int engine_policies = ENGINE_POLICY | engine_policy_steal;
+  if (with_hydro) engine_policies |= engine_policy_hydro;
+  if (with_self_gravity) engine_policies |= engine_policy_self_gravity;
+  if (with_external_gravity) engine_policies |= engine_policy_external_gravity;
+  if (with_cosmology) engine_policies |= engine_policy_cosmology;
+
+  /* Initialize the engine with the space and policies. */
   if (myrank == 0) clocks_gettime(&tic);
-  if (myrank == 0) message("nr_nodes is %i.", nr_nodes);
-  engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank,
-              ENGINE_POLICY | engine_policy_steal | engine_policy_hydro, 0,
-              time_end, dt_min, dt_max, talking);
-  if (myrank == 0 && verbose) {
+  struct engine e;
+  engine_init(&e, &s, params, nr_nodes, myrank, engine_policies, talking);
+  if (myrank == 0) {
     clocks_gettime(&toc);
     message("engine_init took %.3f %s.", clocks_diff(&tic, &toc),
             clocks_getunit());
     fflush(stdout);
   }
 
-#ifdef WITH_MPI
-  /* Split the space. */
-  engine_split(&e, &initial_partition);
-  engine_redistribute(&e);
-#endif
+  /* Now that everything is ready, no need for the parameters any more */
+  free(params);
+  params = NULL;
 
-  if (with_outputs) {
-    /* Write the state of the system as it is before starting time integration.
-     */
+  int with_outputs = 1;
+  if (with_outputs && !dry_run) {
+    /* Write the state of the system before starting time integration. */
     if (myrank == 0) clocks_gettime(&tic);
 #if defined(WITH_MPI)
 #if defined(HAVE_PARALLEL_HDF5)
@@ -495,26 +423,42 @@ int main(int argc, char *argv[]) {
   for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0;
 #endif
 
+  /* Get some info to the user. */
   if (myrank == 0) {
     message(
         "Running on %lld gas particles and %lld DM particles until t=%.3e with "
         "%i threads and %i queues (dt_min=%.3e, dt_max=%.3e)...",
-        N_total[0], N_total[1], time_end, e.nr_threads, e.sched.nr_queues,
+        N_total[0], N_total[1], e.timeEnd, e.nr_threads, e.sched.nr_queues,
         e.dt_min, e.dt_max);
     fflush(stdout);
   }
 
+  /* Time to say good-bye if this was not a serious run. */
+  if (dry_run) {
+#ifdef WITH_MPI
+    if ((res = MPI_Finalize()) != MPI_SUCCESS)
+      error("call to MPI_Finalize failed with error %i.", res);
+#endif
+    if (myrank == 0)
+      message("Time integration ready to start. End of dry-run.");
+    return 0;
+  }
+
+#ifdef WITH_MPI
+  /* Split the space. */
+  engine_split(&e, &initial_partition);
+  engine_redistribute(&e);
+#endif
+
   /* Initialise the particles */
   engine_init_particles(&e);
 
   /* Legend */
   if (myrank == 0)
-    printf(
-        "# Step  Time  time-step  Number of updates    CPU Wall-clock time "
-        "[%s]\n",
-        clocks_getunit());
+    printf("# %6s %14s %14s %10s %10s %16s [%s]\n", "Step", "Time", "Time-step",
+           "Updates", "g-Updates", "Wall-clock time", clocks_getunit());
 
-  /* Let loose a runner on the space. */
+  /* Main simulation loop */
   for (int j = 0; !engine_is_done(&e); j++) {
 
 /* Repartition the space amongst the nodes? */
@@ -557,7 +501,9 @@ int main(int argc, char *argv[]) {
 #ifdef WITH_MPI
 
       /* Make sure output file is empty, only on one rank. */
-      sprintf(dumpfile, "thread_info_MPI-step%d.dat", j);
+      char dumpfile[30];
+      snprintf(dumpfile, 30, "thread_info_MPI-step%d.dat", j);
+      FILE *file_thread;
       if (myrank == 0) {
         file_thread = fopen(dumpfile, "w");
         fclose(file_thread);
@@ -602,7 +548,9 @@ int main(int argc, char *argv[]) {
       }
 
 #else
-      sprintf(dumpfile, "thread_info-step%d.dat", j);
+      char dumpfile[30];
+      snprintf(dumpfile, 30, "thread_info-step%d.dat", j);
+      FILE *file_thread;
       file_thread = fopen(dumpfile, "w");
       for (int l = 0; l < e.sched.nr_tasks; l++)
         if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit)
@@ -616,26 +564,6 @@ int main(int argc, char *argv[]) {
       fclose(file_thread);
 #endif
     }
-
-    /* Dump a line of aggregate output. */
-    /*     if (myrank == 0) { */
-    /*       printf("%i %e %.16e %.16e %.16e %.3e %.3e %i %.3e %.3e", j, e.time,
-     */
-    /*              e.ekin + e.epot, e.ekin, e.epot, e.dt, e.dt_step,
-     * e.count_step, */
-    /*              e.dt_min, e.dt_max); */
-    /*       for (k = 0; k < timer_count; k++) */
-    /*         printf(" %.3f", clocks_from_ticks(timers[k]); */
-    /*       printf("\n"); */
-    /*       fflush(stdout); */
-    /*     } */
-
-    /* if (myrank == 0) { */
-    /*   printf("%i %e", j, e.time); */
-    /*   printf(" %.3f", clocks_from_ticks(timers[timer_count - 1]); */
-    /*   printf("\n"); */
-    /*   fflush(stdout); */
-    /* } */
   }
 
 /* Print the values of the runner histogram. */
@@ -673,7 +601,7 @@ int main(int argc, char *argv[]) {
   }
 
 #ifdef WITH_MPI
-  if (MPI_Finalize() != MPI_SUCCESS)
+  if ((res = MPI_Finalize()) != MPI_SUCCESS)
     error("call to MPI_Finalize failed with error %i.", res);
 #endif
 
diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b91e99baf383a399b72bfb73f1791ab7ac6f3d91
--- /dev/null
+++ b/examples/parameter_example.yml
@@ -0,0 +1,48 @@
+
+# Define the system of units to use internally. 
+UnitSystem:
+  UnitMass_in_cgs:     1   # Grams
+  UnitLength_in_cgs:   1   # Centimeters
+  UnitVelocity_in_cgs: 1   # Centimeters per second
+  UnitCurrent_in_cgs:  1   # Amperes
+  UnitTemp_in_cgs:     1   # Kelvin
+
+# Parameters for the task scheduling
+Scheduler:
+  nr_threads:       2        # The number of threads per MPI rank to use.
+  nr_queues:        0        # The number of task queues to use. Use 0  to let the system decide.
+  cell_max_size:    8000000  # Maximal number of interactions per task (this is the default value).
+  cell_sub_size:    8000000  # Maximal number of interactions per sub-task  (this is the default value).
+  cell_split_size:  400      # Maximal number of particles per cell (this is the default value).
+
+# Parameters governing the time integration
+TimeIntegration:
+  time_begin: 0.    # The starting time of the simulation (in internal units).
+  time_end:   1.    # The end time of the simulation (in internal units).
+  dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
+  dt_max:     1e-2  # The maximal time-step size of the simulation (in internal units).
+
+# Parameters for the hydrodynamics scheme
+SPH:
+  resolution_eta:        1.2349   # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel).
+  delta_neighbours:      1.       # The tolerance for the targetted number of neighbours.
+  CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
+  max_ghost_iterations:  30       # Maximal number of iterations allowed to converge towards the smoothing length.
+  max_smoothing_length:  3.       # Maximal smoothing length allowed (in internal units).
+
+# Parameters related to the initial conditions
+InitialConditions:
+  file_name:  SedovBlast/sedov.hdf5 # The file to read
+  h_scaling:  1.                    # A scaling factor to apply to all smoothing lengths in the ICs.
+  shift_x:    0.                    # A shift to apply to all particles read from the ICs (in internal units).
+  shift_y:    0.
+  shift_z:    0.
+
+# Parameters govering domain decomposition
+DomainDecomposition:
+  initial_type:       m     # The initial strategy ("g", "m", "w", or "v"). See documentation for details.
+  initial_grid_x:    10     # Grid size if the 'g' strategy is chosen.
+  initial_grid_y:    10
+  initial_grid_z:    10
+  repartition_type:   b     # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details.
+ 
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index eaff41ebae1bad0f1307d23a3204186ecbc63b2f..895c32ef9c3d1490e6d30b7dc79e40171a228ee9 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -60,7 +60,7 @@ pl.rcParams.update(PLOT_PARAMS)
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", "kick",
              "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down",
-             "psort", "split_cell", "rewait", "count"]
+             "part_sort", "gpart_sort", "split_cell", "rewait", "count"]
 
 TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
@@ -77,7 +77,8 @@ TASKCOLOURS = {"none": "black",
                "grav_mm": "mediumturquoise",
                "grav_up": "mediumvioletred",
                "grav_down": "mediumnightblue",
-               "psort": "steelblue",
+               "part_sort": "steelblue",
+               "gpart_sort": "teal" ,
                "split_cell": "seagreen",
                "rewait": "olive",
                "count": "powerblue"}
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index b7d1823ad746d6a10b5e67fc9f7315b13be4649f..d59fe6417b524b8cb3cf8f6117fca3b8b3f3c780 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -66,7 +66,7 @@ pl.rcParams.update(PLOT_PARAMS)
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", "kick",
              "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down",
-             "psort", "split_cell", "rewait", "count"]
+             "part_sort", "gpart_sort", "split_cell", "rewait", "count"]
 
 TASKCOLOURS = {"none": "black",
                "sort": "lightblue",
@@ -83,7 +83,8 @@ TASKCOLOURS = {"none": "black",
                "grav_mm": "mediumturquoise",
                "grav_up": "mediumvioletred",
                "grav_down": "mediumnightblue",
-               "psort": "steelblue",
+               "part_sort": "steelblue",
+               "gpart_sort": "teal",
                "split_cell": "seagreen",
                "rewait": "olive",
                "count": "powerblue"}
diff --git a/examples/runs.sh b/examples/runs.sh
deleted file mode 100755
index 339d8659675843f2491068ed8d30b528cb147c34..0000000000000000000000000000000000000000
--- a/examples/runs.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-# Set some global stuff
-export OMP_WAIT_POLICY=PASSIVE
-
-# Generate the initial conditions if they are not present.
-if [ ! -e SodShock/sodShock.hdf5 ]
-then
-    echo "Generating initial conditions for the SodShock example..."
-    cd SodShock
-    python makeIC.py
-    cd ..
-fi
-if [ ! -e SedovBlast/sedov.hdf5 ]
-then
-    echo "Generating initial conditions for the SedovBlast example..."
-    cd SedovBlast/
-    python makeIC_fcc.py
-    cd ..
-fi
-if [ ! -e CosmoVolume/cosmoVolume.hdf5 ]
-then
-    echo "Downloading initial conditions for the CosmoVolume example..."
-    cd CosmoVolume
-    ./getIC.sh
-    cd ..
-fi
-
-
-# Loop over number of cores
-for cpu in {1..32}
-do
-
-    # Sod-Shock runs
-    if [ ! -e SodShock_${cpu}.dump ]
-    then
-        ./swift -t $cpu -f SodShock/sodShock.hdf5 -m 0.01 -w 5000 -c 1. -d 1e-7 -e 0.01 > SodShock_fixed_${cpu}.dump
-    fi
-    
-    # Sedov blast
-    if [ ! -e SedovBlast_${cpu}.dump ]
-    then
-        ./swift -t $cpu -f SedovBlast/sedov.hdf5 -m 0.02 -w 5000 -c 1. -d 1e-7 -e 0.01 > SedovBlast_fixed_${cpu}.dump
-    fi
-    
-    # Cosmological volume
-    if [ ! -e CosmoVolume_${cpu}.dump ]
-    then
-        ./swift -t $cpu -f CosmoVolume/cosmoVolume.hdf5 -m 0.6 -w 5000 -c 1. -d 1e-7 -e 0.01 > CosmoVolume_fixed_${cpu}.dump
-    fi
-
-done
-
diff --git a/src/Makefile.am b/src/Makefile.am
index f44d47819672d10445fd969fe2ff20dbcb49463b..a96f35b3cf0d8a23aec4f8c0f8d16bec8638cbcd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,17 +35,17 @@ endif
 # List required headers
 include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
-    common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h
+    common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h
 
 # Common source files
 AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
     units.c common_io.c single_io.c multipole.c version.c map.c \
-    kernel.c tools.c part.c partition.c clocks.c
+    kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c
 
 # Include files for distribution, not installation.
-nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
-		 runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \
+nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
+		 vector.h runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \
 		 gravity.h gravity_io.h \
 		 gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
 		 gravity/Default/gravity_debug.h gravity/Default/gravity_part.h  \
diff --git a/src/cell.c b/src/cell.c
index df11782048dfa80c697f53feefe8fabc104eb23b..61acfaaea7a0af01a78ab773541564e9a2723f4e 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -45,6 +45,7 @@
 /* Local headers. */
 #include "atomic.h"
 #include "error.h"
+#include "gravity.h"
 #include "hydro.h"
 #include "space.h"
 #include "timers.h"
@@ -89,14 +90,18 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
   c->ti_end_min = pc->ti_end_min;
   c->ti_end_max = pc->ti_end_max;
   c->count = pc->count;
+  c->gcount = pc->gcount;
   c->tag = pc->tag;
 
-  /* Fill the progeny recursively, depth-first. */
+  /* Number of new cells created. */
   int count = 1;
+
+  /* Fill the progeny recursively, depth-first. */
   for (int k = 0; k < 8; k++)
     if (pc->progeny[k] >= 0) {
       struct cell *temp = space_getcell(s);
       temp->count = 0;
+      temp->gcount = 0;
       temp->loc[0] = c->loc[0];
       temp->loc[1] = c->loc[1];
       temp->loc[2] = c->loc[2];
@@ -122,7 +127,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
 }
 
 /**
- * @brief Link the cells recursively to the given part array.
+ * @brief Link the cells recursively to the given #part array.
  *
  * @param c The #cell.
  * @param parts The #part array.
@@ -130,7 +135,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
  * @return The number of particles linked.
  */
 
-int cell_link(struct cell *c, struct part *parts) {
+int cell_link_parts(struct cell *c, struct part *parts) {
 
   c->parts = parts;
 
@@ -139,14 +144,40 @@ int cell_link(struct cell *c, struct part *parts) {
     int offset = 0;
     for (int k = 0; k < 8; k++) {
       if (c->progeny[k] != NULL)
-        offset += cell_link(c->progeny[k], &parts[offset]);
+        offset += cell_link_parts(c->progeny[k], &parts[offset]);
     }
   }
 
-  /* Return the total number of unpacked cells. */
+  /* Return the total number of linked particles. */
   return c->count;
 }
 
+/**
+ * @brief Link the cells recursively to the given #gpart array.
+ *
+ * @param c The #cell.
+ * @param gparts The #gpart array.
+ *
+ * @return The number of particles linked.
+ */
+
+int cell_link_gparts(struct cell *c, struct gpart *gparts) {
+
+  c->gparts = gparts;
+
+  /* Fill the progeny recursively, depth-first. */
+  if (c->split) {
+    int offset = 0;
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL)
+        offset += cell_link_gparts(c->progeny[k], &gparts[offset]);
+    }
+  }
+
+  /* Return the total number of linked particles. */
+  return c->gcount;
+}
+
 /**
  * @brief Pack the data of the given cell and all it's sub-cells.
  *
@@ -164,6 +195,7 @@ int cell_pack(struct cell *c, struct pcell *pc) {
   pc->ti_end_min = c->ti_end_min;
   pc->ti_end_max = c->ti_end_max;
   pc->count = c->count;
+  pc->gcount = c->gcount;
   c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag;
 
   /* Fill in the progeny, depth-first recursion. */
@@ -574,6 +606,27 @@ void cell_init_parts(struct cell *c, void *data) {
   c->ti_end_max = 0;
 }
 
+/**
+ * @brief Initialises all g-particles to a valid state even if the ICs were
+ *stupid
+ *
+ * @param c Cell to act upon
+ * @param data Unused parameter
+ */
+void cell_init_gparts(struct cell *c, void *data) {
+
+  struct gpart *gp = c->gparts;
+  const int gcount = c->gcount;
+
+  for (int i = 0; i < gcount; ++i) {
+    gp[i].ti_begin = 0;
+    gp[i].ti_end = 0;
+    gravity_first_init_gpart(&gp[i]);
+  }
+  c->ti_end_min = 0;
+  c->ti_end_max = 0;
+}
+
 /**
  * @brief Converts hydro quantities to a valid state after the initial density
  *calculation
diff --git a/src/cell.h b/src/cell.h
index b0451b311fda9c300427da6b3a9a25955090d799..a471eac44bfd3533c4220ab8c5ff2ddec724e87f 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -44,7 +44,7 @@ struct pcell {
   int ti_end_min, ti_end_max;
 
   /* Number of particles in this cell. */
-  int count;
+  int count, gcount;
 
   /* tag used for MPI communication. */
   int tag;
@@ -141,7 +141,7 @@ struct cell {
   double mass, e_pot, e_int, e_kin;
 
   /* Number of particles updated in this cell. */
-  int updated;
+  int updated, g_updated;
 
   /* Linking pointer for "memory management". */
   struct cell *next;
@@ -175,8 +175,10 @@ void cell_gunlocktree(struct cell *c);
 int cell_pack(struct cell *c, struct pcell *pc);
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
 int cell_getsize(struct cell *c);
-int cell_link(struct cell *c, struct part *parts);
+int cell_link_parts(struct cell *c, struct part *parts);
+int cell_link_gparts(struct cell *c, struct gpart *gparts);
 void cell_init_parts(struct cell *c, void *data);
+void cell_init_gparts(struct cell *c, void *data);
 void cell_convert_hydro(struct cell *c, void *data);
 void cell_clean_links(struct cell *c, void *data);
 
diff --git a/src/common_io.c b/src/common_io.c
index f6a4803333581b69671e3adc223b46122ec5364c..2a635723d5bd4db7bce0a0172e8c083bf479ac32 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -42,9 +42,12 @@
 /* Local includes. */
 #include "const.h"
 #include "error.h"
-#include "kernel.h"
+#include "kernel_hydro.h"
 #include "version.h"
 
+const char* particle_type_names[NUM_PARTICLE_TYPES] = {
+    "Gas", "DM", "Boundary", "Dummy", "Star", "BH"};
+
 /**
  * @brief Converts a C data type to the HDF5 equivalent.
  *
@@ -279,15 +282,15 @@ void writeUnitSystem(hid_t h_file, struct UnitSystem* us) {
   if (h_grpunit < 0) error("Error while creating Unit System group");
 
   writeAttribute_d(h_grpunit, "Unit mass in cgs (U_M)",
-                   getBaseUnit(us, UNIT_MASS));
+                   units_get_base_unit(us, UNIT_MASS));
   writeAttribute_d(h_grpunit, "Unit length in cgs (U_L)",
-                   getBaseUnit(us, UNIT_LENGTH));
+                   units_get_base_unit(us, UNIT_LENGTH));
   writeAttribute_d(h_grpunit, "Unit time in cgs (U_t)",
-                   getBaseUnit(us, UNIT_TIME));
+                   units_get_base_unit(us, UNIT_TIME));
   writeAttribute_d(h_grpunit, "Unit current in cgs (U_I)",
-                   getBaseUnit(us, UNIT_CURRENT));
+                   units_get_base_unit(us, UNIT_CURRENT));
   writeAttribute_d(h_grpunit, "Unit temperature in cgs (U_T)",
-                   getBaseUnit(us, UNIT_TEMPERATURE));
+                   units_get_base_unit(us, UNIT_TEMPERATURE));
 
   H5Gclose(h_grpunit);
 }
@@ -402,52 +405,68 @@ void createXMFfile() {
  *snapshot
  *
  * @param xmfFile The file to write in.
- * @param Nparts The number of particles.
  * @param hdfFileName The name of the HDF5 file corresponding to this output.
  * @param time The current simulation time.
  */
-void writeXMFheader(FILE* xmfFile, long long Nparts, char* hdfFileName,
-                    float time) {
+void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time) {
   /* Write end of file */
 
+  fprintf(xmfFile, "<!-- XMF description for file: %s -->\n", hdfFileName);
   fprintf(xmfFile,
           "<Grid GridType=\"Collection\" CollectionType=\"Spatial\">\n");
   fprintf(xmfFile, "<Time Type=\"Single\" Value=\"%f\"/>\n", time);
-  fprintf(xmfFile, "<Grid Name=\"Gas\" GridType=\"Uniform\">\n");
-  fprintf(xmfFile,
-          "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%lld\"/>\n",
-          Nparts);
-  fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n");
-  fprintf(xmfFile,
-          "<DataItem Dimensions=\"%lld 3\" NumberType=\"Double\" "
-          "Precision=\"8\" "
-          "Format=\"HDF\">%s:/PartType0/Coordinates</DataItem>\n",
-          Nparts, hdfFileName);
-  fprintf(xmfFile, "</Geometry>");
 }
 
 /**
  * @brief Writes the end of the XMF file (closes all open markups)
  *
  * @param xmfFile The file to write in.
+ * @param output The number of this output.
+ * @param time The current simulation time.
  */
-void writeXMFfooter(FILE* xmfFile) {
+void writeXMFoutputfooter(FILE* xmfFile, int output, float time) {
   /* Write end of the section of this time step */
 
-  fprintf(xmfFile, "\n</Grid>\n");
-  fprintf(xmfFile, "</Grid>\n");
-  fprintf(xmfFile, "\n</Grid>\n");
+  fprintf(xmfFile,
+          "\n</Grid> <!-- End of meta-data for output=%03i, time=%f -->\n",
+          output, time);
+  fprintf(xmfFile, "\n</Grid> <!-- timeSeries -->\n");
   fprintf(xmfFile, "</Domain>\n");
   fprintf(xmfFile, "</Xdmf>\n");
 
   fclose(xmfFile);
 }
 
+void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N,
+                         enum PARTICLE_TYPE ptype) {
+  fprintf(xmfFile, "\n<Grid Name=\"%s\" GridType=\"Uniform\">\n",
+          particle_type_names[ptype]);
+  fprintf(xmfFile,
+          "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%zi\"/>\n", N);
+  fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n");
+  fprintf(xmfFile,
+          "<DataItem Dimensions=\"%zi 3\" NumberType=\"Double\" "
+          "Precision=\"8\" "
+          "Format=\"HDF\">%s:/PartType%d/Coordinates</DataItem>\n",
+          N, hdfFileName, ptype);
+  fprintf(xmfFile,
+          "</Geometry>\n <!-- Done geometry for %s, start of particle fields "
+          "list -->\n",
+          particle_type_names[ptype]);
+}
+
+void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype) {
+  fprintf(xmfFile, "</Grid> <!-- End of meta-data for parttype=%s -->\n",
+          particle_type_names[ptype]);
+}
+
 /**
  * @brief Writes the lines corresponding to an array of the HDF5 output
  *
  * @param xmfFile The file in which to write
  * @param fileName The name of the HDF5 file associated to this XMF descriptor.
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param name The name of the array in the HDF5 file.
  * @param N The number of particles.
  * @param dim The dimension of the quantity (1 for scalars, 3 for vectors).
@@ -455,21 +474,21 @@ void writeXMFfooter(FILE* xmfFile) {
  *
  * @todo Treat the types in a better way.
  */
-void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N,
-                  int dim, enum DATA_TYPE type) {
+void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName,
+                  char* name, size_t N, int dim, enum DATA_TYPE type) {
   fprintf(xmfFile,
           "<Attribute Name=\"%s\" AttributeType=\"%s\" Center=\"Node\">\n",
           name, dim == 1 ? "Scalar" : "Vector");
   if (dim == 1)
     fprintf(xmfFile,
-            "<DataItem Dimensions=\"%lld\" NumberType=\"Double\" "
-            "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n",
-            N, type == FLOAT ? 4 : 8, fileName, name);
+            "<DataItem Dimensions=\"%zi\" NumberType=\"Double\" "
+            "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
+            N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name);
   else
     fprintf(xmfFile,
-            "<DataItem Dimensions=\"%lld %d\" NumberType=\"Double\" "
-            "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n",
-            N, dim, type == FLOAT ? 4 : 8, fileName, name);
+            "<DataItem Dimensions=\"%zi %d\" NumberType=\"Double\" "
+            "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
+            N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name);
   fprintf(xmfFile, "</Attribute>\n");
 }
 
@@ -483,13 +502,14 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N,
  * @param gparts The array of #gpart freshly read in.
  * @param Ndm The number of DM particles read in.
  */
-void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) {
+void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) {
 
   /* Let's give all these gparts a negative id */
   for (size_t i = 0; i < Ndm; ++i) {
 
     /* 0 or negative ids are not allowed */
-    if (gparts[i].id <= 0) error("0 or negative ID for DM particle");
+    if (gparts[i].id <= 0)
+      error("0 or negative ID for DM particle %zd: ID=%lld", i, gparts[i].id);
 
     gparts[i].id = -gparts[i].id;
   }
@@ -507,8 +527,9 @@ void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) {
  * @param Ngas The number of gas particles read in.
  * @param Ndm The number of DM particles read in.
  */
-void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts,
-                            size_t Ngas, size_t Ndm) {
+void duplicate_hydro_gparts(struct part* const parts,
+                            struct gpart* const gparts, size_t Ngas,
+                            size_t Ndm) {
 
   for (size_t i = 0; i < Ngas; ++i) {
 
@@ -537,16 +558,19 @@ void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts,
  * @param dmparts The array of #gpart containg DM particles to be filled.
  * @param Ndm The number of DM particles.
  */
-void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts,
-                       size_t Ndm) {
+void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
+                       struct gpart* const dmparts, size_t Ndm) {
 
   size_t count = 0;
 
   /* Loop over all gparts */
   for (size_t i = 0; i < Ntot; ++i) {
 
+    /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id,
+     * gparts[i].part); */
+
     /* And collect the DM ones */
-    if (gparts[i].id < 0) {
+    if (gparts[i].id < 0LL) {
       memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart));
       dmparts[count].id = -dmparts[count].id;
       count++;
diff --git a/src/common_io.h b/src/common_io.h
index 2623a03f9a25ce0e650dde4f698da6eb49177e26..b7f3a1a317d69937dde8692eead8f00c75649477 100644
--- a/src/common_io.h
+++ b/src/common_io.h
@@ -24,6 +24,7 @@
 #include "../config.h"
 
 /* Includes. */
+#include "kernel_hydro.h"
 #include "part.h"
 #include "units.h"
 
@@ -70,14 +71,20 @@ enum PARTICLE_TYPE {
   NUM_PARTICLE_TYPES
 };
 
+extern const char* particle_type_names[];
+
+#define FILENAME_BUFFER_SIZE 150
+#define PARTICLE_GROUP_BUFFER_SIZE 20
+
 hid_t hdf5Type(enum DATA_TYPE type);
 size_t sizeOfType(enum DATA_TYPE type);
 
-void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts,
-                       size_t Ndm);
-void prepare_dm_gparts(struct gpart* gparts, size_t Ndm);
-void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts,
-                            size_t Ngas, size_t Ndm);
+void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot,
+                       struct gpart* const dmparts, size_t Ndm);
+void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm);
+void duplicate_hydro_gparts(struct part* const parts,
+                            struct gpart* const gparts, size_t Ngas,
+                            size_t Ndm);
 
 void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data);
 
@@ -92,10 +99,13 @@ void writeAttribute_s(hid_t grp, char* name, const char* str);
 
 void createXMFfile();
 FILE* prepareXMFfile();
-void writeXMFfooter(FILE* xmfFile);
-void writeXMFheader(FILE* xmfFile, long long N, char* hdfFileName, float time);
-void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N,
-                  int dim, enum DATA_TYPE type);
+void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time);
+void writeXMFoutputfooter(FILE* xmfFile, int outputCount, float time);
+void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N,
+                         enum PARTICLE_TYPE ptype);
+void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype);
+void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName,
+                  char* name, size_t N, int dim, enum DATA_TYPE type);
 
 void writeCodeDescription(hid_t h_file);
 void writeSPHflavour(hid_t h_file);
diff --git a/src/const.h b/src/const.h
index 3bd9edff8227a87d040ec7309998364c946307af..6a52ec4796a4904629a57ffa8b32a3107bde263e 100644
--- a/src/const.h
+++ b/src/const.h
@@ -70,9 +70,4 @@
 #define GADGET2_SPH
 //#define DEFAULT_SPH
 
-/* System of units */
-#define const_unit_length_in_cgs 1   /* 3.08567810e16  /\* 1Mpc *\/ */
-#define const_unit_mass_in_cgs 1     /* 1.9891e33      /\* 1 M_sun *\/ */
-#define const_unit_velocity_in_cgs 1 /* 1e5            /\* km s^-1 *\/ */
-
 #endif /* SWIFT_CONST_H */
diff --git a/src/debug.c b/src/debug.c
index 4c1434118c98aab7def28d3a53493767d249d774..53a03d66aee2c169a555ed00a2efa2d5b984066a 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -60,7 +60,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id,
   /* Look for the particle. */
   for (size_t i = 0; i < N; i++)
     if (parts[i].id == id) {
-      printf("## Particle[%zd]:\n id=%lld", i, parts[i].id);
+      printf("## Particle[%zd]:\n id=%lld ", i, parts[i].id);
       hydro_debug_particle(&parts[i], &xparts[i]);
       found = 1;
       break;
@@ -76,12 +76,12 @@ void printgParticle(struct gpart *gparts, long long int id, size_t N) {
   /* Look for the particle. */
   for (size_t i = 0; i < N; i++)
     if (gparts[i].id == -id) {
-      printf("## gParticle[%zd] (DM) :\n id=%lld", i, -gparts[i].id);
+      printf("## gParticle[%zd] (DM) :\n id=%lld ", i, -gparts[i].id);
       gravity_debug_particle(&gparts[i]);
       found = 1;
       break;
     } else if (gparts[i].id > 0 && gparts[i].part->id == id) {
-      printf("## gParticle[%zd] (hydro) :\n id=%lld", i, gparts[i].id);
+      printf("## gParticle[%zd] (hydro) :\n id=%lld ", i, gparts[i].id);
       gravity_debug_particle(&gparts[i]);
       found = 1;
       break;
diff --git a/src/engine.c b/src/engine.c
index c34214c05b6fb45991208dd78689b58ba5d9731f..e49d6da779d4333a00a60da920144d92a9241305 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -56,10 +56,11 @@
 #include "partition.h"
 #include "timers.h"
 
-const char *engine_policy_names[12] = {
-    "none",          "rand",   "steal",        "keep",
-    "block",         "fix_dt", "cpu_tight",    "mpi",
-    "numa_affinity", "hydro",  "self_gravity", "external_gravity"};
+const char *engine_policy_names[13] = {
+    "none",                 "rand",   "steal",        "keep",
+    "block",                "fix_dt", "cpu_tight",    "mpi",
+    "numa_affinity",        "hydro",  "self_gravity", "external_gravity",
+    "cosmology_integration"};
 
 /** The rank of the engine as a global variable (for messages). */
 int engine_rank;
@@ -87,14 +88,17 @@ struct link *engine_addlink(struct engine *e, struct link *l, struct task *t) {
 }
 
 /**
- * @brief Generate the ghost and kick tasks for a hierarchy of cells.
+ * @brief Generate the ghosts all the O(Npart) tasks for a hierarchy of cells.
+ *
+ * Tasks are only created here. The dependencies will be added later on.
  *
  * @param e The #engine.
  * @param c The #cell.
  * @param super The super #cell.
  */
 
-void engine_mkghosts(struct engine *e, struct cell *c, struct cell *super) {
+void engine_make_ghost_tasks(struct engine *e, struct cell *c,
+                             struct cell *super) {
 
   struct scheduler *s = &e->sched;
 
@@ -128,46 +132,64 @@ void engine_mkghosts(struct engine *e, struct cell *c, struct cell *super) {
   /* Recurse. */
   if (c->split)
     for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) engine_mkghosts(e, c->progeny[k], super);
+      if (c->progeny[k] != NULL)
+        engine_make_ghost_tasks(e, c->progeny[k], super);
 }
 
 /**
  * @brief Redistribute the particles amongst the nodes according
  *      to their cell's node IDs.
  *
+ * The strategy here is as follows:
+ * 1) Each node counts the number of particles it has to send to each other
+ * node.
+ * 2) The number of particles of each type is then exchanged.
+ * 3) The particles to send are placed in a temporary buffer in which the
+ * part-gpart links are preserved.
+ * 4) Each node allocates enough space for the new particles.
+ * 5) (Asynchronous) communications are issued to transfer the data.
+ *
+ *
  * @param e The #engine.
  */
-
 void engine_redistribute(struct engine *e) {
 
 #ifdef WITH_MPI
 
-  int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
+  const int nr_nodes = e->nr_nodes;
+  const int nodeID = e->nodeID;
   struct space *s = e->s;
-  int my_cells = 0;
-  int *cdim = s->cdim;
   struct cell *cells = s->cells;
-  int nr_cells = s->nr_cells;
+  const int nr_cells = s->nr_cells;
+  const int *cdim = s->cdim;
+  const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]};
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  struct part *parts = s->parts;
+  struct xpart *xparts = s->xparts;
+  struct gpart *gparts = s->gparts;
   ticks tic = getticks();
 
-  /* Start by sorting the particles according to their nodes and
-     getting the counts. The counts array is indexed as
-     count[from * nr_nodes + to]. */
-  int *counts;
-  size_t *dest;
-  double ih[3], dim[3];
-  ih[0] = s->ih[0];
-  ih[1] = s->ih[1];
-  ih[2] = s->ih[2];
-  dim[0] = s->dim[0];
-  dim[1] = s->dim[1];
-  dim[2] = s->dim[2];
-  if ((counts = (int *)malloc(sizeof(int) *nr_nodes *nr_nodes)) == NULL ||
-      (dest = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL)
-    error("Failed to allocate count and dest buffers.");
+  /* Allocate temporary arrays to store the counts of particles to be sent
+     and the destination of each particle */
+  int *counts, *g_counts;
+  if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate count temporary buffer.");
+  if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
+    error("Failed to allocate gcount temporary buffer.");
   bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
-  struct part *parts = s->parts;
+  bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes);
+
+  // Allocate the destination index arrays.
+  int *dest, *g_dest;
+  if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
+    error("Failed to allocate dest temporary buffer.");
+  if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
+    error("Failed to allocate g_dest temporary buffer.");
+
+  /* Get destination of each particle */
   for (size_t k = 0; k < s->nr_parts; k++) {
+
+    /* Periodic boundary conditions */
     for (int j = 0; j < 3; j++) {
       if (parts[k].x[j] < 0.0)
         parts[k].x[j] += dim[j];
@@ -180,36 +202,121 @@ void engine_redistribute(struct engine *e) {
        error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
              cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */
     dest[k] = cells[cid].nodeID;
+
+    /* The counts array is indexed as count[from * nr_nodes + to]. */
     counts[nodeID * nr_nodes + dest[k]] += 1;
   }
+
+  /* Sort the particles according to their cell index. */
   space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
 
+  /* We need to re-link the gpart partners of parts. */
+  int current_dest = dest[0];
+  size_t count_this_dest = 0;
+  for (size_t k = 0; k < s->nr_parts; ++k) {
+    if (s->parts[k].gpart != NULL) {
+
+      /* As the addresses will be invalidated by the communications, we will */
+      /* instead store the absolute index from the start of the sub-array */
+      /* of particles to be sent to a given node. */
+      /* Recall that gparts without partners have a negative id. */
+      /* We will restore the pointers on the receiving node later on. */
+      if (dest[k] != current_dest) {
+        current_dest = dest[k];
+        count_this_dest = 0;
+      }
+
+      /* Debug */
+      /* if(s->parts[k].gpart->id < 0) */
+      /* 	error("Trying to link a partnerless gpart !"); */
+
+      s->parts[k].gpart->id = count_this_dest;
+      count_this_dest++;
+    }
+  }
+
+  /* Get destination of each g-particle */
+  for (size_t k = 0; k < s->nr_gparts; k++) {
+
+    /* Periodic boundary conditions */
+    for (int j = 0; j < 3; j++) {
+      if (gparts[k].x[j] < 0.0)
+        gparts[k].x[j] += dim[j];
+      else if (gparts[k].x[j] >= dim[j])
+        gparts[k].x[j] -= dim[j];
+    }
+    const int cid = cell_getid(cdim, gparts[k].x[0] * ih[0],
+                               gparts[k].x[1] * ih[1], gparts[k].x[2] * ih[2]);
+    /* if (cid < 0 || cid >= s->nr_cells)
+       error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
+             cid, k, g_parts[k].x[0], g_parts[k].x[1], g_parts[k].x[2]); */
+    g_dest[k] = cells[cid].nodeID;
+
+    /* The counts array is indexed as count[from * nr_nodes + to]. */
+    g_counts[nodeID * nr_nodes + g_dest[k]] += 1;
+  }
+
+  /* Sort the gparticles according to their cell index. */
+  space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
+
   /* Get all the counts from all the nodes. */
   if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                     MPI_COMM_WORLD) != MPI_SUCCESS)
     error("Failed to allreduce particle transfer counts.");
 
-  /* Get the new number of parts for this node, be generous in allocating. */
-  size_t nr_parts = 0;
+  /* Get all the g_counts from all the nodes. */
+  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
+                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to allreduce gparticle transfer counts.");
+
+  /* Each node knows how many parts and gparts will be transferred to every
+     other node. We can start preparing to receive data */
+
+  /* Get the new number of parts and gparts for this node */
+  size_t nr_parts = 0, nr_gparts = 0;
   for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
+  for (int k = 0; k < nr_nodes; k++)
+    nr_gparts += g_counts[k * nr_nodes + nodeID];
+
+  /* Allocate the new arrays with some extra margin */
   struct part *parts_new = NULL;
-  struct xpart *xparts_new = NULL, *xparts = s->xparts;
+  struct xpart *xparts_new = NULL;
+  struct gpart *gparts_new = NULL;
   if (posix_memalign((void **)&parts_new, part_align,
-                     sizeof(struct part) * nr_parts * 1.2) != 0 ||
-      posix_memalign((void **)&xparts_new, part_align,
-                     sizeof(struct xpart) * nr_parts * 1.2) != 0)
+                     sizeof(struct part) * nr_parts *
+                         engine_redistribute_alloc_margin) != 0)
     error("Failed to allocate new part data.");
-
-  /* Emit the sends and recvs for the particle data. */
+  if (posix_memalign((void **)&xparts_new, xpart_align,
+                     sizeof(struct xpart) * nr_parts *
+                         engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new xpart data.");
+  if (posix_memalign((void **)&gparts_new, gpart_align,
+                     sizeof(struct gpart) * nr_gparts *
+                         engine_redistribute_alloc_margin) != 0)
+    error("Failed to allocate new gpart data.");
+
+  /* Prepare MPI requests for the asynchronous communications */
   MPI_Request *reqs;
-  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) ==
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) ==
       NULL)
     error("Failed to allocate MPI request list.");
-  for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
-  for (size_t offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) {
-    int ind_send = nodeID * nr_nodes + k;
-    int ind_recv = k * nr_nodes + nodeID;
+  for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
+
+  /* Emit the sends and recvs for the particle and gparticle data. */
+  size_t offset_send = 0, offset_recv = 0;
+  size_t g_offset_send = 0, g_offset_recv = 0;
+  for (int k = 0; k < nr_nodes; k++) {
+
+    /* Indices in the count arrays of the node of interest */
+    const int ind_send = nodeID * nr_nodes + k;
+    const int ind_recv = k * nr_nodes + nodeID;
+
+    /* Are we sending any part/xpart ? */
     if (counts[ind_send] > 0) {
+
+      /* message("Sending %d part to node %d", counts[ind_send], k); */
+
+      /* If the send is to the same node, just copy */
       if (k == nodeID) {
         memcpy(&parts_new[offset_recv], &s->parts[offset_send],
                sizeof(struct part) * counts[ind_recv]);
@@ -217,36 +324,73 @@ void engine_redistribute(struct engine *e) {
                sizeof(struct xpart) * counts[ind_recv]);
         offset_send += counts[ind_send];
         offset_recv += counts[ind_recv];
+
+        /* Else, emit some communications */
       } else {
-        if (MPI_Isend(&s->parts[offset_send], counts[ind_send],
-                      e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD,
-                      &reqs[4 * k]) != MPI_SUCCESS)
-          error("Failed to isend parts to node %zi.", k);
-        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send],
-                      e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD,
-                      &reqs[4 * k + 1]) != MPI_SUCCESS)
-          error("Failed to isend xparts to node %zi.", k);
+        if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type,
+                      k, 3 * ind_send + 0, MPI_COMM_WORLD,
+                      &reqs[6 * k]) != MPI_SUCCESS)
+          error("Failed to isend parts to node %i.", k);
+        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type,
+                      k, 3 * ind_send + 1, MPI_COMM_WORLD,
+                      &reqs[6 * k + 1]) != MPI_SUCCESS)
+          error("Failed to isend xparts to node %i.", k);
         offset_send += counts[ind_send];
       }
     }
+
+    /* Are we sending any gpart ? */
+    if (g_counts[ind_send] > 0) {
+
+      /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */
+
+      /* If the send is to the same node, just copy */
+      if (k == nodeID) {
+        memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send],
+               sizeof(struct gpart) * g_counts[ind_recv]);
+        g_offset_send += g_counts[ind_send];
+        g_offset_recv += g_counts[ind_recv];
+
+        /* Else, emit some communications */
+      } else {
+        if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send],
+                      gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD,
+                      &reqs[6 * k + 2]) != MPI_SUCCESS)
+          error("Failed to isend gparts to node %i.", k);
+        g_offset_send += g_counts[ind_send];
+      }
+    }
+
+    /* Now emit the corresponding Irecv() */
+
+    /* Are we receiving any part/xpart from this node ? */
     if (k != nodeID && counts[ind_recv] > 0) {
-      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type,
-                    k, 2 * ind_recv + 0, MPI_COMM_WORLD,
-                    &reqs[4 * k + 2]) != MPI_SUCCESS)
-        error("Failed to emit irecv of parts from node %zi.", k);
-      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv],
-                    e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD,
-                    &reqs[4 * k + 3]) != MPI_SUCCESS)
-        error("Failed to emit irecv of parts from node %zi.", k);
+      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k,
+                    3 * ind_recv + 0, MPI_COMM_WORLD,
+                    &reqs[6 * k + 3]) != MPI_SUCCESS)
+        error("Failed to emit irecv of parts from node %i.", k);
+      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type,
+                    k, 3 * ind_recv + 1, MPI_COMM_WORLD,
+                    &reqs[6 * k + 4]) != MPI_SUCCESS)
+        error("Failed to emit irecv of xparts from node %i.", k);
       offset_recv += counts[ind_recv];
     }
+
+    /* Are we receiving any gpart from this node ? */
+    if (k != nodeID && g_counts[ind_recv] > 0) {
+      if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv],
+                    gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD,
+                    &reqs[6 * k + 5]) != MPI_SUCCESS)
+        error("Failed to emit irecv of gparts from node %i.", k);
+      g_offset_recv += g_counts[ind_recv];
+    }
   }
 
   /* Wait for all the sends and recvs to tumble in. */
-  MPI_Status stats[4 * nr_nodes];
+  MPI_Status stats[6 * nr_nodes];
   int res;
-  if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
-    for (int k = 0; k < 4 * nr_nodes; k++) {
+  if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
+    for (int k = 0; k < 6 * nr_nodes; k++) {
       char buff[MPI_MAX_ERROR_STRING];
       int res;
       MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
@@ -255,35 +399,90 @@ void engine_redistribute(struct engine *e) {
     error("Failed during waitall for part data.");
   }
 
+  /* We now need to restore the part<->gpart links */
+  size_t offset_parts = 0, offset_gparts = 0;
+  for (int node = 0; node < nr_nodes; ++node) {
+
+    const int ind_recv = node * nr_nodes + nodeID;
+    const size_t count_parts = counts[ind_recv];
+    const size_t count_gparts = g_counts[ind_recv];
+
+    /* Loop over the gparts received from that node */
+    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) {
+
+      /* Does this gpart have a partner ? */
+      if (gparts_new[k].id >= 0) {
+
+        const size_t partner_index = offset_parts + gparts_new[k].id;
+
+        /* Re-link */
+        gparts_new[k].part = &parts_new[partner_index];
+        gparts_new[k].part->gpart = &gparts_new[k];
+      }
+    }
+
+    offset_parts += count_parts;
+    offset_gparts += count_gparts;
+  }
+
   /* Verify that all parts are in the right place. */
-  /* for ( k = 0 ; k < nr_parts ; k++ ) {
-      cid = cell_getid( cdim , parts_new[k].x[0]*ih[0] , parts_new[k].x[1]*ih[1]
-     , parts_new[k].x[2]*ih[2] );
+  /* for ( int k = 0 ; k < nr_parts ; k++ ) {
+      int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0],
+    parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] );
       if ( cells[ cid ].nodeID != nodeID )
-          error( "Received particle (%i) that does not belong here (nodeID=%i)."
-     , k , cells[ cid ].nodeID );
-      } */
+          error( "Received particle (%i) that does not belong here
+    (nodeID=%i).", k , cells[ cid ].nodeID );
+    } */
+
+  /* Verify that the links are correct */
+  /* MATTHIEU: To be commented out once we are happy */
+  for (size_t k = 0; k < nr_gparts; ++k) {
+
+    if (gparts_new[k].id > 0) {
+
+      if (gparts_new[k].part->gpart != &gparts_new[k])
+        error("Linking problem !");
+
+      if (gparts_new[k].x[0] != gparts_new[k].part->x[0] ||
+          gparts_new[k].x[1] != gparts_new[k].part->x[1] ||
+          gparts_new[k].x[2] != gparts_new[k].part->x[2])
+        error("Linked particles are not at the same position !");
+    }
+  }
+  for (size_t k = 0; k < nr_parts; ++k) {
+
+    if (parts_new[k].gpart != NULL) {
+
+      if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !");
+    }
+  }
 
   /* Set the new part data, free the old. */
   free(parts);
   free(xparts);
+  free(gparts);
   s->parts = parts_new;
   s->xparts = xparts_new;
+  s->gparts = gparts_new;
   s->nr_parts = nr_parts;
-  s->size_parts = 1.2 * nr_parts;
+  s->nr_gparts = nr_gparts;
+  s->size_parts = engine_redistribute_alloc_margin * nr_parts;
+  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts;
 
-  /* Be verbose about what just happened. */
-  for (int k = 0; k < nr_cells; k++)
-    if (cells[k].nodeID == nodeID) my_cells += 1;
-  if (e->verbose)
-    message("node %i now has %zi parts in %i cells.", nodeID, nr_parts,
-            my_cells);
-
-  /* Clean up other stuff. */
+  /* Clean up the temporary stuff. */
   free(reqs);
   free(counts);
   free(dest);
 
+  /* Be verbose about what just happened. */
+  if (e->verbose) {
+    int my_cells = 0;
+    for (int k = 0; k < nr_cells; k++)
+      if (cells[k].nodeID == nodeID) my_cells += 1;
+    message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID,
+            nr_parts, nr_gparts, my_cells);
+  }
+
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -509,7 +708,7 @@ void engine_exchange_cells(struct engine *e) {
 
   /* Wait for each count to come in and start the recv. */
   for (int k = 0; k < nr_proxies; k++) {
-    int pid;
+    int pid = MPI_UNDEFINED;
     if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
         pid == MPI_UNDEFINED)
       error("MPI_Waitany failed.");
@@ -529,7 +728,7 @@ void engine_exchange_cells(struct engine *e) {
 
   /* Wait for each pcell array to come in from the proxies. */
   for (int k = 0; k < nr_proxies; k++) {
-    int pid;
+    int pid = MPI_UNDEFINED;
     if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
         pid == MPI_UNDEFINED)
       error("MPI_Waitany failed.");
@@ -545,31 +744,40 @@ void engine_exchange_cells(struct engine *e) {
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
-  int count_in = 0;
+  int count_parts_in = 0, count_gparts_in = 0;
   for (int k = 0; k < nr_proxies; k++)
-    for (int j = 0; j < e->proxies[k].nr_cells_in; j++)
-      count_in += e->proxies[k].cells_in[j]->count;
-  if (count_in > s->size_parts_foreign) {
+    for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
+      count_parts_in += e->proxies[k].cells_in[j]->count;
+      count_gparts_in += e->proxies[k].cells_in[j]->gcount;
+    }
+  if (count_parts_in > s->size_parts_foreign) {
     if (s->parts_foreign != NULL) free(s->parts_foreign);
-    s->size_parts_foreign = 1.1 * count_in;
+    s->size_parts_foreign = 1.1 * count_parts_in;
     if (posix_memalign((void **)&s->parts_foreign, part_align,
                        sizeof(struct part) * s->size_parts_foreign) != 0)
       error("Failed to allocate foreign part data.");
   }
+  if (count_gparts_in > s->size_gparts_foreign) {
+    if (s->gparts_foreign != NULL) free(s->gparts_foreign);
+    s->size_gparts_foreign = 1.1 * count_gparts_in;
+    if (posix_memalign((void **)&s->gparts_foreign, gpart_align,
+                       sizeof(struct gpart) * s->size_gparts_foreign) != 0)
+      error("Failed to allocate foreign gpart data.");
+  }
 
   /* Unpack the cells and link to the particle data. */
   struct part *parts = s->parts_foreign;
+  struct gpart *gparts = s->gparts_foreign;
   for (int k = 0; k < nr_proxies; k++) {
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
-      cell_link(e->proxies[k].cells_in[j], parts);
+      cell_link_parts(e->proxies[k].cells_in[j], parts);
+      cell_link_gparts(e->proxies[k].cells_in[j], gparts);
       parts = &parts[e->proxies[k].cells_in[j]->count];
+      gparts = &gparts[e->proxies[k].cells_in[j]->gcount];
     }
   }
   s->nr_parts_foreign = parts - s->parts_foreign;
-
-  /* Is the parts buffer large enough? */
-  if (s->nr_parts_foreign > s->size_parts_foreign)
-    error("Foreign parts buffer too small.");
+  s->nr_gparts_foreign = gparts - s->gparts_foreign;
 
   /* Free the pcell buffer. */
   free(pcells);
@@ -587,16 +795,24 @@ void engine_exchange_cells(struct engine *e) {
  * @brief Exchange straying parts with other nodes.
  *
  * @param e The #engine.
- * @param offset The index in the parts array as of which the foreign parts
- *reside.
- * @param ind The ID of the foreign #cell.
- * @param N The number of stray parts.
+ * @param offset_parts The index in the parts array as of which the foreign
+ *        parts reside.
+ * @param ind_part The foreign #cell ID of each part.
+ * @param Npart The number of stray parts, contains the number of parts received
+ *        on return.
+ * @param offset_gparts The index in the gparts array as of which the foreign
+ *        parts reside.
+ * @param ind_gpart The foreign #cell ID of each gpart.
+ * @param Ngpart The number of stray gparts, contains the number of gparts
+ *        received on return.
  *
- * @return The number of arrived parts copied to parts and xparts.
+ * Note that this function does not mess-up the linkage between parts and
+ * gparts, i.e. the received particles have correct linkeage.
  */
 
-int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
-                           size_t N) {
+void engine_exchange_strays(struct engine *e, size_t offset_parts,
+                            int *ind_part, size_t *Npart, size_t offset_gparts,
+                            int *ind_gpart, size_t *Ngpart) {
 
 #ifdef WITH_MPI
 
@@ -606,25 +822,49 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
   /* Re-set the proxies. */
   for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0;
 
-  /* Put the parts into the corresponding proxies. */
-  for (size_t k = 0; k < N; k++) {
-    const int node_id = e->s->cells[ind[k]].nodeID;
+  /* Put the parts and gparts into the corresponding proxies. */
+  for (size_t k = 0; k < *Npart; k++) {
+    /* Get the target node and proxy ID. */
+    const int node_id = e->s->cells[ind_part[k]].nodeID;
     if (node_id < 0 || node_id >= e->nr_nodes)
       error("Bad node ID %i.", node_id);
     const int pid = e->proxy_ind[node_id];
-    if (pid < 0)
+    if (pid < 0) {
       error(
           "Do not have a proxy for the requested nodeID %i for part with "
           "id=%llu, x=[%e,%e,%e].",
-          node_id, s->parts[offset + k].id, s->parts[offset + k].x[0],
-          s->parts[offset + k].x[1], s->parts[offset + k].x[2]);
-    proxy_parts_load(&e->proxies[pid], &s->parts[offset + k],
-                     &s->xparts[offset + k], 1);
+          node_id, s->parts[offset_parts + k].id,
+          s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1],
+          s->parts[offset_parts + k].x[2]);
+    }
+
+    /* Re-link the associated gpart with the buffer offset of the part. */
+    if (s->parts[offset_parts + k].gpart != NULL) {
+      s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_in;
+    }
+
+    /* Load the part and xpart into the proxy. */
+    proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k],
+                     &s->xparts[offset_parts + k], 1);
+  }
+  for (size_t k = 0; k < *Ngpart; k++) {
+    const int node_id = e->s->cells[ind_gpart[k]].nodeID;
+    if (node_id < 0 || node_id >= e->nr_nodes)
+      error("Bad node ID %i.", node_id);
+    const int pid = e->proxy_ind[node_id];
+    if (pid < 0)
+      error(
+          "Do not have a proxy for the requested nodeID %i for part with "
+          "id=%lli, x=[%e,%e,%e].",
+          node_id, s->gparts[offset_parts + k].id,
+          s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1],
+          s->gparts[offset_gparts + k].x[2]);
+    proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1);
   }
 
   /* Launch the proxies. */
-  MPI_Request reqs_in[2 * engine_maxproxies];
-  MPI_Request reqs_out[2 * engine_maxproxies];
+  MPI_Request reqs_in[3 * engine_maxproxies];
+  MPI_Request reqs_out[3 * engine_maxproxies];
   for (int k = 0; k < e->nr_proxies; k++) {
     proxy_parts_exch1(&e->proxies[k]);
     reqs_in[k] = e->proxies[k].req_parts_count_in;
@@ -633,7 +873,7 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
 
   /* Wait for each count to come in and start the recv. */
   for (int k = 0; k < e->nr_proxies; k++) {
-    int pid;
+    int pid = MPI_UNDEFINED;
     if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) !=
             MPI_SUCCESS ||
         pid == MPI_UNDEFINED)
@@ -648,11 +888,18 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
 
   /* Count the total number of incoming particles and make sure we have
      enough space to accommodate them. */
-  size_t count_in = 0;
-  for (int k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in;
-  if (e->verbose) message("sent out %zi particles, got %zi back.", N, count_in);
-  if (offset + count_in > s->size_parts) {
-    s->size_parts = (offset + count_in) * 1.05;
+  int count_parts_in = 0;
+  int count_gparts_in = 0;
+  for (int k = 0; k < e->nr_proxies; k++) {
+    count_parts_in += e->proxies[k].nr_parts_in;
+    count_gparts_in += e->proxies[k].nr_gparts_in;
+  }
+  if (e->verbose) {
+    message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart,
+            count_parts_in, count_gparts_in);
+  }
+  if (offset_parts + count_parts_in > s->size_parts) {
+    s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow;
     struct part *parts_new = NULL;
     struct xpart *xparts_new = NULL;
     if (posix_memalign((void **)&parts_new, part_align,
@@ -660,37 +907,61 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
         posix_memalign((void **)&xparts_new, part_align,
                        sizeof(struct xpart) * s->size_parts) != 0)
       error("Failed to allocate new part data.");
-    memcpy(parts_new, s->parts, sizeof(struct part) * offset);
-    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset);
+    memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts);
+    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts);
     free(s->parts);
     free(s->xparts);
     s->parts = parts_new;
     s->xparts = xparts_new;
   }
+  if (offset_gparts + count_gparts_in > s->size_gparts) {
+    s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow;
+    struct gpart *gparts_new = NULL;
+    if (posix_memalign((void **)&gparts_new, gpart_align,
+                       sizeof(struct gpart) * s->size_gparts) != 0)
+      error("Failed to allocate new gpart data.");
+    memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts);
+    free(s->gparts);
+    s->gparts = gparts_new;
+  }
 
   /* Collect the requests for the particle data from the proxies. */
   int nr_in = 0, nr_out = 0;
   for (int k = 0; k < e->nr_proxies; k++) {
     if (e->proxies[k].nr_parts_in > 0) {
-      reqs_in[2 * k] = e->proxies[k].req_parts_in;
-      reqs_in[2 * k + 1] = e->proxies[k].req_xparts_in;
+      reqs_in[3 * k] = e->proxies[k].req_parts_in;
+      reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in;
+      nr_in += 2;
+    } else {
+      reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL;
+    }
+    if (e->proxies[k].nr_gparts_in > 0) {
+      reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in;
       nr_in += 1;
-    } else
-      reqs_in[2 * k] = reqs_in[2 * k + 1] = MPI_REQUEST_NULL;
+    } else {
+      reqs_in[3 * k + 2] = MPI_REQUEST_NULL;
+    }
     if (e->proxies[k].nr_parts_out > 0) {
-      reqs_out[2 * k] = e->proxies[k].req_parts_out;
-      reqs_out[2 * k + 1] = e->proxies[k].req_xparts_out;
+      reqs_out[3 * k] = e->proxies[k].req_parts_out;
+      reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out;
+      nr_out += 2;
+    } else {
+      reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL;
+    }
+    if (e->proxies[k].nr_gparts_out > 0) {
+      reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out;
       nr_out += 1;
-    } else
-      reqs_out[2 * k] = reqs_out[2 * k + 1] = MPI_REQUEST_NULL;
+    } else {
+      reqs_out[3 * k + 2] = MPI_REQUEST_NULL;
+    }
   }
 
   /* Wait for each part array to come in and collect the new
      parts from the proxies. */
-  size_t count = 0;
-  for (int k = 0; k < 2 * (nr_in + nr_out); k++) {
+  int count_parts = 0, count_gparts = 0;
+  for (int k = 0; k < nr_in; k++) {
     int err, pid;
-    if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid,
+    if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid,
                            MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
       char buff[MPI_MAX_ERROR_STRING];
       int res;
@@ -698,26 +969,46 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
       error("MPI_Waitany failed (%s).", buff);
     }
     if (pid == MPI_UNDEFINED) break;
-    // message( "request from proxy %i has arrived." , pid );
-    if (reqs_in[pid & ~1] == MPI_REQUEST_NULL &&
-        reqs_in[pid | 1] == MPI_REQUEST_NULL) {
+    // message( "request from proxy %i has arrived." , pid / 3 );
+    pid = 3 * (pid / 3);
+
+    /* If all the requests for a given proxy have arrived... */
+    if (reqs_in[pid + 0] == MPI_REQUEST_NULL &&
+        reqs_in[pid + 1] == MPI_REQUEST_NULL &&
+        reqs_in[pid + 2] == MPI_REQUEST_NULL) {
+      /* Copy the particle data to the part/xpart/gpart arrays. */
       struct proxy *p = &e->proxies[pid >> 1];
-      memcpy(&s->parts[offset + count], p->parts_in,
+      memcpy(&s->parts[offset_parts + count_parts], p->parts_in,
              sizeof(struct part) * p->nr_parts_in);
-      memcpy(&s->xparts[offset + count], p->xparts_in,
+      memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in,
              sizeof(struct xpart) * p->nr_parts_in);
+      memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in,
+             sizeof(struct gpart) * p->nr_gparts_in);
       /* for (int k = offset; k < offset + count; k++)
          message(
             "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.",
             s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1],
             s->parts[k].x[2], s->parts[k].h, p->nodeID); */
-      count += p->nr_parts_in;
+
+      /* Re-link the gparts. */
+      for (int k = 0; k < p->nr_gparts_in; k++) {
+        struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k];
+        if (gp->id >= 0) {
+          struct part *p = &s->parts[offset_gparts + count_parts + gp->id];
+          gp->part = p;
+          p->gpart = gp;
+        }
+      }
+
+      /* Advance the counters. */
+      count_parts += p->nr_parts_in;
+      count_gparts += p->nr_gparts_in;
     }
   }
 
   /* Wait for all the sends to have finished too. */
   if (nr_out > 0)
-    if (MPI_Waitall(2 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
+    if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
         MPI_SUCCESS)
       error("MPI_Waitall on sends failed.");
 
@@ -726,49 +1017,51 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind,
             clocks_getunit());
 
   /* Return the number of harvested parts. */
-  return count;
+  *Npart = count_parts;
+  *Ngpart = count_gparts;
 
 #else
   error("SWIFT was not compiled with MPI support.");
-  return 0;
 #endif
 }
 
 /**
- * @brief Fill the #space's task list.
+ * @brief Constructs the top-level pair tasks for the first hydro loop over
+ *neighbours
  *
- * @param e The #engine we are working with.
+ * Here we construct all the tasks for all possible neighbouring non-empty
+ * local cells in the hierarchy. No dependencies are being added thus far.
+ * Additional loop over neighbours can later be added by simply duplicating
+ * all the tasks created by this function.
+ *
+ * @param e The #engine.
  */
-
-void engine_maketasks(struct engine *e) {
+void engine_make_hydroloop_tasks(struct engine *e) {
 
   struct space *s = e->s;
   struct scheduler *sched = &e->sched;
-  struct cell *cells = s->cells;
-  const int nr_cells = s->nr_cells;
   const int nodeID = e->nodeID;
   const int *cdim = s->cdim;
-  const ticks tic = getticks();
-
-  /* Re-set the scheduler. */
-  scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
-
-  /* Add the space sorting tasks. */
-  for (int i = 0; i < e->nr_threads; i++)
-    scheduler_addtask(sched, task_type_psort, task_subtype_none, i, 0, NULL,
-                      NULL, 0);
+  struct cell *cells = s->cells;
 
   /* Run through the highest level of cells and add pairs. */
-  for (int i = 0; i < cdim[0]; i++)
-    for (int j = 0; j < cdim[1]; j++)
+  for (int i = 0; i < cdim[0]; i++) {
+    for (int j = 0; j < cdim[1]; j++) {
       for (int k = 0; k < cdim[2]; k++) {
-        int cid = cell_getid(cdim, i, j, k);
-        if (cells[cid].count == 0) continue;
+
+        /* Get the cell */
+        const int cid = cell_getid(cdim, i, j, k);
         struct cell *ci = &cells[cid];
+
+        /* Skip cells without hydro particles */
         if (ci->count == 0) continue;
+
+        /* If the cells is local build a self-interaction */
         if (ci->nodeID == nodeID)
           scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0,
                             ci, NULL, 0);
+
+        /* Now loop over all the neighbours of this cell */
         for (int ii = -1; ii < 2; ii++) {
           int iii = i + ii;
           if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue;
@@ -781,67 +1074,43 @@ void engine_maketasks(struct engine *e) {
               int kkk = k + kk;
               if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue;
               kkk = (kkk + cdim[2]) % cdim[2];
-              int cjd = cell_getid(cdim, iii, jjj, kkk);
+
+              /* Get the neighbouring cell */
+              const int cjd = cell_getid(cdim, iii, jjj, kkk);
               struct cell *cj = &cells[cjd];
+
+              /* Is that neighbour local and does it have particles ? */
               if (cid >= cjd || cj->count == 0 ||
                   (ci->nodeID != nodeID && cj->nodeID != nodeID))
                 continue;
-              int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))];
+
+              /* Construct the pair task */
+              const int sid =
+                  sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))];
               scheduler_addtask(sched, task_type_pair, task_subtype_density,
                                 sid, 0, ci, cj, 1);
             }
           }
         }
       }
+    }
+  }
+}
 
-  /* /\* Add the gravity mm tasks. *\/ */
-  /* for (int i = 0; i < nr_cells; i++) */
-  /*   if (cells[i].gcount > 0) { */
-  /*     scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0,
-   */
-  /*                       &cells[i], NULL, 0); */
-  /*     for (int j = i + 1; j < nr_cells; j++) */
-  /*       if (cells[j].gcount > 0) */
-  /*         scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1,
-   * 0, */
-  /*                           &cells[i], &cells[j], 0); */
-  /* } */
-
-  /* Split the tasks. */
-  scheduler_splittasks(sched);
-
-  /* Allocate the list of cell-task links. The maximum number of links
-     is the number of cells (s->tot_cells) times the number of neighbours (27)
-     times the number of interaction types (2, density and force). */
-  if (e->links != NULL) free(e->links);
-  e->size_links = s->tot_cells * 27 * 2;
-  if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
-    error("Failed to allocate cell-task links.");
-  e->nr_links = 0;
+/**
+ * @brief Counts the tasks associated with one cell and constructs the links
+ *
+ * For each hydrodynamic task, construct the links with the corresponding cell.
+ * Similarly, construct the dependencies for all the sorting tasks.
+ *
+ * @param e The #engine.
+ */
+void engine_count_and_link_tasks(struct engine *e) {
 
-  /* /\* Add the gravity up/down tasks at the top-level cells and push them
-   * down. *\/ */
-  /* for (int k = 0; k < nr_cells; k++) */
-  /*   if (cells[k].nodeID == nodeID && cells[k].gcount > 0) { */
-
-  /*     /\* Create tasks at top level. *\/ */
-  /*     struct task *up = */
-  /*         scheduler_addtask(sched, task_type_grav_up, task_subtype_none, 0,
-   * 0, */
-  /*                           &cells[k], NULL, 0); */
-  /*     struct task *down = */
-  /*         scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0,
-   * 0, */
-  /*                           &cells[k], NULL, 0); */
-
-  /*     /\* Push tasks down the cell hierarchy. *\/ */
-  /*     engine_addtasks_grav(e, &cells[k], up, down); */
-  /*   } */
+  struct scheduler *sched = &e->sched;
+  const int nr_tasks = sched->nr_tasks;
 
-  /* Count the number of tasks associated with each cell and
-     store the density tasks in each cell, and make each sort
-     depend on the sorts of its progeny. */
-  for (int k = 0; k < sched->nr_tasks; k++) {
+  for (int k = 0; k < nr_tasks; k++) {
 
     /* Get the current task. */
     struct task *t = &sched->tasks[k];
@@ -896,16 +1165,27 @@ void engine_maketasks(struct engine *e) {
     /*   } */
     /* } */
   }
+}
 
-  /* Append a ghost task to each cell, and add kick tasks to the
-     super cells. */
-  for (int k = 0; k < nr_cells; k++) engine_mkghosts(e, &cells[k], NULL);
+/**
+ * @brief Duplicates the first hydro loop and construct all the
+ * dependencies for the hydro part
+ *
+ * This is done by looping over all the previously constructed tasks
+ * and adding another task involving the same cells but this time
+ * corresponding to the second hydro loop over neighbours.
+ * With all the relevant tasks for a given cell available, we construct
+ * all the dependencies for that cell.
+ *
+ * @param e The #engine.
+ */
+void engine_make_extra_hydroloop_tasks(struct engine *e) {
 
-  /* Run through the tasks and make force tasks for each density task.
-     Each force task depends on the cell ghosts and unlocks the kick task
-     of its super-cell. */
-  int sched_nr_tasks = sched->nr_tasks;
-  for (int k = 0; k < sched_nr_tasks; k++) {
+  struct scheduler *sched = &e->sched;
+  const int nodeID = e->nodeID;
+  const int nr_tasks = sched->nr_tasks;
+
+  for (int k = 0; k < nr_tasks; k++) {
 
     /* Get a pointer to the task. */
     struct task *t = &sched->tasks[k];
@@ -915,20 +1195,39 @@ void engine_maketasks(struct engine *e) {
 
     /* Self-interaction? */
     if (t->type == task_type_self && t->subtype == task_subtype_density) {
-      scheduler_addunlock(sched, t->ci->super->init, t);
-      scheduler_addunlock(sched, t, t->ci->super->ghost);
+
+      /* Start by constructing the task for the second hydro loop */
       struct task *t2 = scheduler_addtask(
           sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0);
-      scheduler_addunlock(sched, t->ci->super->ghost, t2);
-      scheduler_addunlock(sched, t2, t->ci->super->kick);
+
+      /* Add the link between the new loop and the cell */
       t->ci->force = engine_addlink(e, t->ci->force, t2);
       atomic_inc(&t->ci->nr_force);
+
+      /* Now, build all the dependencies for the hydro */
+      /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */
+      scheduler_addunlock(sched, t->ci->super->init, t);
+      scheduler_addunlock(sched, t, t->ci->super->ghost);
+      scheduler_addunlock(sched, t->ci->super->ghost, t2);
+      scheduler_addunlock(sched, t2, t->ci->super->kick);
     }
 
     /* Otherwise, pair interaction? */
     else if (t->type == task_type_pair && t->subtype == task_subtype_density) {
+
+      /* Start by constructing the task for the second hydro loop */
       struct task *t2 = scheduler_addtask(
           sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
+      t->cj->force = engine_addlink(e, t->cj->force, t2);
+      atomic_inc(&t->cj->nr_force);
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */
       if (t->ci->nodeID == nodeID) {
         scheduler_addunlock(sched, t->ci->super->init, t);
         scheduler_addunlock(sched, t, t->ci->super->ghost);
@@ -941,17 +1240,27 @@ void engine_maketasks(struct engine *e) {
         scheduler_addunlock(sched, t->cj->super->ghost, t2);
         scheduler_addunlock(sched, t2, t->cj->super->kick);
       }
-      t->ci->force = engine_addlink(e, t->ci->force, t2);
-      atomic_inc(&t->ci->nr_force);
-      t->cj->force = engine_addlink(e, t->cj->force, t2);
-      atomic_inc(&t->cj->nr_force);
     }
 
     /* Otherwise, sub interaction? */
     else if (t->type == task_type_sub && t->subtype == task_subtype_density) {
+
+      /* Start by constructing the task for the second hydro loop */
       struct task *t2 =
           scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags,
                             0, t->ci, t->cj, 0);
+
+      /* Add the link between the new loop and both cells */
+      t->ci->force = engine_addlink(e, t->ci->force, t2);
+      atomic_inc(&t->ci->nr_force);
+      if (t->cj != NULL) {
+        t->cj->force = engine_addlink(e, t->cj->force, t2);
+        atomic_inc(&t->cj->nr_force);
+      }
+
+      /* Now, build all the dependencies for the hydro for the cells */
+      /* that are local and are not descendant of the same super-cells */
+      /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */
       if (t->ci->nodeID == nodeID) {
         scheduler_addunlock(sched, t, t->ci->super->ghost);
         scheduler_addunlock(sched, t->ci->super->ghost, t2);
@@ -963,40 +1272,166 @@ void engine_maketasks(struct engine *e) {
         scheduler_addunlock(sched, t->cj->super->ghost, t2);
         scheduler_addunlock(sched, t2, t->cj->super->kick);
       }
-      t->ci->force = engine_addlink(e, t->ci->force, t2);
-      atomic_inc(&t->ci->nr_force);
-      if (t->cj != NULL) {
-        t->cj->force = engine_addlink(e, t->cj->force, t2);
-        atomic_inc(&t->cj->nr_force);
-      }
     }
 
     /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */
     /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */
     /*   scheduler_addunlock(sched, t->ci->grav_down, t); */
   }
+}
 
-/* Add the communication tasks if MPI is being used. */
-#ifdef WITH_MPI
+/**
+ * @brief Constructs the top-level pair tasks for the gravity M-M interactions
+ *
+ * Correct implementation is still lacking here.
+ *
+ * @param e The #engine.
+ */
+void engine_make_gravityinteraction_tasks(struct engine *e) {
+
+  struct space *s = e->s;
+  struct scheduler *sched = &e->sched;
+  const int nr_cells = s->nr_cells;
+  struct cell *cells = s->cells;
 
-  /* Loop over the proxies. */
-  for (int pid = 0; pid < e->nr_proxies; pid++) {
+  /* Loop over all cells. */
+  for (int i = 0; i < nr_cells; i++) {
 
-    /* Get a handle on the proxy. */
-    struct proxy *p = &e->proxies[pid];
+    /* If it has gravity particles, add a self-task */
+    if (cells[i].gcount > 0) {
+      scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0,
+                        &cells[i], NULL, 0);
 
-    /* Loop through the proxy's incoming cells and add the
-       recv tasks. */
-    for (int k = 0; k < p->nr_cells_in; k++)
-      engine_addtasks_recv(e, p->cells_in[k], NULL, NULL);
+      /* Loop over all remainding cells */
+      for (int j = i + 1; j < nr_cells; j++) {
 
-    /* Loop through the proxy's outgoing cells and add the
-       send tasks. */
-    for (int k = 0; k < p->nr_cells_out; k++)
-      engine_addtasks_send(e, p->cells_out[k], p->cells_in[0]);
+        /* If that other cell has gravity parts, add a pair interaction */
+        if (cells[j].gcount > 0) {
+          scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0,
+                            &cells[i], &cells[j], 0);
+        }
+      }
+    }
   }
+}
 
-#endif
+/**
+ * @brief Constructs the gravity tasks building the multipoles and propagating
+ *them to the children
+ *
+ * Correct implementation is still lacking here.
+ *
+ * @param e The #engine.
+ */
+void engine_make_gravityrecursive_tasks(struct engine *e) {
+
+  struct space *s = e->s;
+  struct scheduler *sched = &e->sched;
+  const int nodeID = e->nodeID;
+  const int nr_cells = s->nr_cells;
+  struct cell *cells = s->cells;
+
+  for (int k = 0; k < nr_cells; k++) {
+
+    /* Only do this for local cells containing gravity particles */
+    if (cells[k].nodeID == nodeID && cells[k].gcount > 0) {
+
+      /* Create tasks at top level. */
+      struct task *up =
+          scheduler_addtask(sched, task_type_grav_up, task_subtype_none, 0, 0,
+                            &cells[k], NULL, 0);
+      struct task *down =
+          scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0, 0,
+                            &cells[k], NULL, 0);
+
+      /* Push tasks down the cell hierarchy. */
+      engine_addtasks_grav(e, &cells[k], up, down);
+    }
+  }
+}
+
+/**
+ * @brief Fill the #space's task list.
+ *
+ * @param e The #engine we are working with.
+ */
+void engine_maketasks(struct engine *e) {
+
+  struct space *s = e->s;
+  struct scheduler *sched = &e->sched;
+  struct cell *cells = s->cells;
+  const int nr_cells = s->nr_cells;
+  const ticks tic = getticks();
+
+  /* Re-set the scheduler. */
+  scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
+
+  /* Add the space sorting tasks. */
+  for (int i = 0; i < e->nr_threads; i++) {
+    scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL,
+                      NULL, 0);
+    scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0,
+                      NULL, NULL, 0);
+  }
+
+  /* Construct the firt hydro loop over neighbours */
+  engine_make_hydroloop_tasks(e);
+
+  /* Add the gravity mm tasks. */
+  if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity)
+    engine_make_gravityinteraction_tasks(e);
+
+  /* Split the tasks. */
+  scheduler_splittasks(sched);
+
+  /* Allocate the list of cell-task links. The maximum number of links
+     is the number of cells (s->tot_cells) times the number of neighbours (27)
+     times the number of interaction types (2, density and force). */
+  if (e->links != NULL) free(e->links);
+  e->size_links = s->tot_cells * 27 * 2;
+  if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
+    error("Failed to allocate cell-task links.");
+  e->nr_links = 0;
+
+  /* Add the gravity up/down tasks at the top-level cells and push them down. */
+  if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity)
+    engine_make_gravityrecursive_tasks(e);
+
+  /* Count the number of tasks associated with each cell and
+     store the density tasks in each cell, and make each sort
+     depend on the sorts of its progeny. */
+  engine_count_and_link_tasks(e);
+
+  /* Append a ghost task to each cell, and add kick tasks to the
+     super cells. */
+  for (int k = 0; k < nr_cells; k++)
+    engine_make_ghost_tasks(e, &cells[k], NULL);
+
+  /* Run through the tasks and make force tasks for each density task.
+     Each force task depends on the cell ghosts and unlocks the kick task
+     of its super-cell. */
+  engine_make_extra_hydroloop_tasks(e);
+
+  /* Add the communication tasks if MPI is being used. */
+  if ((e->policy & engine_policy_mpi) == engine_policy_mpi) {
+
+    /* Loop over the proxies. */
+    for (int pid = 0; pid < e->nr_proxies; pid++) {
+
+      /* Get a handle on the proxy. */
+      struct proxy *p = &e->proxies[pid];
+
+      /* Loop through the proxy's incoming cells and add the
+         recv tasks. */
+      for (int k = 0; k < p->nr_cells_in; k++)
+        engine_addtasks_recv(e, p->cells_in[k], NULL, NULL);
+
+      /* Loop through the proxy's outgoing cells and add the
+         send tasks. */
+      for (int k = 0; k < p->nr_cells_out; k++)
+        engine_addtasks_send(e, p->cells_out[k], p->cells_in[0]);
+    }
+  }
 
   /* Set the unlocks per task. */
   scheduler_set_unlocks(sched);
@@ -1024,9 +1459,10 @@ void engine_maketasks(struct engine *e) {
 int engine_marktasks(struct engine *e) {
 
   struct scheduler *s = &e->sched;
-  const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
+  const int ti_end = e->ti_current;
+  const int nr_tasks = s->nr_tasks;
+  const int *const ind = s->tasks_ind;
   struct task *tasks = s->tasks;
-  const float ti_end = e->ti_current;
   const ticks tic = getticks();
 
   /* Much less to do here if we're on a fixed time-step. */
@@ -1126,6 +1562,7 @@ int engine_marktasks(struct engine *e) {
       else if (t->type == task_type_kick) {
         t->skip = (t->ci->ti_end_min > ti_end);
         t->ci->updated = 0;
+        t->ci->g_updated = 0;
       }
 
       /* Drift? */
@@ -1182,6 +1619,7 @@ void engine_print_task_counts(struct engine *e) {
   printf(" skipped=%i ]\n", counts[task_type_count]);
   fflush(stdout);
   message("nr_parts = %zi.", e->s->nr_parts);
+  message("nr_gparts = %zi.", e->s->nr_gparts);
 }
 
 /**
@@ -1192,7 +1630,7 @@ void engine_print_task_counts(struct engine *e) {
 
 void engine_rebuild(struct engine *e) {
 
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   /* Clear the forcerebuild flag, whatever it was. */
   e->forcerebuild = 0;
@@ -1213,7 +1651,7 @@ void engine_rebuild(struct engine *e) {
     error("engine_marktasks failed after space_rebuild.");
 
   /* Print the status of the system */
-  engine_print_task_counts(e);
+  if (e->verbose) engine_print_task_counts(e);
 
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -1235,7 +1673,7 @@ void engine_prepare(struct engine *e) {
 
 /* Collect the values of rebuild from all nodes. */
 #ifdef WITH_MPI
-  int buff;
+  int buff = 0;
   if (MPI_Allreduce(&rebuild, &buff, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD) !=
       MPI_SUCCESS)
     error("Failed to aggregate the rebuild flag across nodes.");
@@ -1311,7 +1749,7 @@ void engine_collect_kick(struct cell *c) {
   if (c->kick != NULL) return;
 
   /* Counters for the different quantities. */
-  int updated = 0;
+  int updated = 0, g_updated = 0;
   double e_kin = 0.0, e_int = 0.0, e_pot = 0.0;
   float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f};
   int ti_end_min = max_nr_timesteps, ti_end_max = 0;
@@ -1334,6 +1772,7 @@ void engine_collect_kick(struct cell *c) {
         ti_end_min = min(ti_end_min, cp->ti_end_min);
         ti_end_max = max(ti_end_max, cp->ti_end_max);
         updated += cp->updated;
+        g_updated += cp->g_updated;
         e_kin += cp->e_kin;
         e_int += cp->e_int;
         e_pot += cp->e_pot;
@@ -1351,6 +1790,7 @@ void engine_collect_kick(struct cell *c) {
   c->ti_end_min = ti_end_min;
   c->ti_end_max = ti_end_max;
   c->updated = updated;
+  c->g_updated = g_updated;
   c->e_kin = e_kin;
   c->e_int = e_int;
   c->e_pot = e_pot;
@@ -1414,7 +1854,15 @@ void engine_init_particles(struct engine *e) {
 
   /* Make sure all particles are ready to go */
   /* i.e. clean-up any stupid state in the ICs */
-  space_map_cells_pre(s, 1, cell_init_parts, NULL);
+  if ((e->policy & engine_policy_hydro) == engine_policy_hydro) {
+    space_map_cells_pre(s, 1, cell_init_parts, NULL);
+  }
+  if (((e->policy & engine_policy_self_gravity) ==
+       engine_policy_self_gravity) ||
+      ((e->policy & engine_policy_external_gravity) ==
+       engine_policy_external_gravity)) {
+    space_map_cells_pre(s, 1, cell_init_gparts, NULL);
+  }
 
   engine_prepare(e);
 
@@ -1488,7 +1936,7 @@ void engine_init_particles(struct engine *e) {
  */
 void engine_step(struct engine *e) {
 
-  int updates = 0;
+  int updates = 0, g_updates = 0;
   int ti_end_min = max_nr_timesteps, ti_end_max = 0;
   double e_pot = 0.0, e_int = 0.0, e_kin = 0.0;
   float mom[3] = {0.0, 0.0, 0.0};
@@ -1515,6 +1963,7 @@ void engine_step(struct engine *e) {
       e_int += c->e_int;
       e_pot += c->e_pot;
       updates += c->updated;
+      g_updates += c->g_updated;
       mom[0] += c->mom[0];
       mom[1] += c->mom[1];
       mom[2] += c->mom[2];
@@ -1526,7 +1975,8 @@ void engine_step(struct engine *e) {
 /* Aggregate the data from the different nodes. */
 #ifdef WITH_MPI
   {
-    int in_i[4], out_i[4];
+    int in_i[1], out_i[1];
+    in_i[0] = 0;
     out_i[0] = ti_end_min;
     if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) !=
         MPI_SUCCESS)
@@ -1539,18 +1989,20 @@ void engine_step(struct engine *e) {
     ti_end_max = in_i[0];
   }
   {
-    double in_d[4], out_d[4];
+    double in_d[5], out_d[5];
     out_d[0] = updates;
-    out_d[1] = e_kin;
-    out_d[2] = e_int;
-    out_d[3] = e_pot;
-    if (MPI_Allreduce(out_d, in_d, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) !=
+    out_d[1] = g_updates;
+    out_d[2] = e_kin;
+    out_d[3] = e_int;
+    out_d[4] = e_pot;
+    if (MPI_Allreduce(out_d, in_d, 5, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) !=
         MPI_SUCCESS)
       error("Failed to aggregate energies.");
     updates = in_d[0];
-    e_kin = in_d[1];
-    e_int = in_d[2];
-    e_pot = in_d[3];
+    g_updates = in_d[1];
+    e_kin = in_d[2];
+    e_int = in_d[3];
+    e_pot = in_d[4];
   }
 #endif
 
@@ -1575,8 +2027,8 @@ void engine_step(struct engine *e) {
   if (e->nodeID == 0) {
 
     /* Print some information to the screen */
-    printf("%d %e %e %d %.3f\n", e->step, e->time, e->timeStep, updates,
-           e->wallclock_time);
+    printf("  %6d %14e %14e %10d %10d %21.3f\n", e->step, e->time, e->timeStep,
+           updates, g_updates, e->wallclock_time);
     fflush(stdout);
 
     /* Write some energy statistics */
@@ -1779,7 +2231,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   engine_makeproxies(e);
 
   /* Re-allocate the local parts. */
-  if (e->nodeID == 0)
+  if (e->verbose)
     message("Re-allocating parts array from %zi to %zi.", s->size_parts,
             (size_t)(s->nr_parts * 1.2));
   s->size_parts = s->nr_parts * 1.2;
@@ -1787,7 +2239,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   struct xpart *xparts_new = NULL;
   if (posix_memalign((void **)&parts_new, part_align,
                      sizeof(struct part) * s->size_parts) != 0 ||
-      posix_memalign((void **)&xparts_new, part_align,
+      posix_memalign((void **)&xparts_new, xpart_align,
                      sizeof(struct xpart) * s->size_parts) != 0)
     error("Failed to allocate new part data.");
   memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts);
@@ -1796,6 +2248,50 @@ void engine_split(struct engine *e, struct partition *initial_partition) {
   free(s->xparts);
   s->parts = parts_new;
   s->xparts = xparts_new;
+
+  /* Re-link the gparts. */
+  for (size_t k = 0; k < s->nr_parts; k++)
+    if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
+
+  /* Re-allocate the local gparts. */
+  if (e->verbose)
+    message("Re-allocating gparts array from %zi to %zi.", s->size_gparts,
+            (size_t)(s->nr_gparts * 1.2));
+  s->size_gparts = s->nr_gparts * 1.2;
+  struct gpart *gparts_new = NULL;
+  if (posix_memalign((void **)&gparts_new, gpart_align,
+                     sizeof(struct gpart) * s->size_gparts) != 0)
+    error("Failed to allocate new gpart data.");
+  memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts);
+  free(s->gparts);
+  s->gparts = gparts_new;
+
+  /* Re-link the parts. */
+  for (size_t k = 0; k < s->nr_gparts; k++)
+    if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k];
+
+  /* Verify that the links are correct */
+  /* MATTHIEU: To be commented out once we are happy */
+  for (size_t k = 0; k < s->nr_gparts; ++k) {
+
+    if (s->gparts[k].id > 0) {
+
+      if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !");
+
+      if (s->gparts[k].x[0] != s->gparts[k].part->x[0] ||
+          s->gparts[k].x[1] != s->gparts[k].part->x[1] ||
+          s->gparts[k].x[2] != s->gparts[k].part->x[2])
+        error("Linked particles are not at the same position !");
+    }
+  }
+  for (size_t k = 0; k < s->nr_parts; ++k) {
+
+    if (s->parts[k].gpart != NULL) {
+
+      if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !");
+    }
+  }
+
 #else
   error("SWIFT was not compiled with MPI support.");
 #endif
@@ -1825,30 +2321,25 @@ static bool hyperthreads_present(void) {
  *
  * @param e The #engine.
  * @param s The #space in which this #runner will run.
- * @param dt The initial time step to use.
- * @param nr_threads The number of threads to spawn.
- * @param nr_queues The number of task queues to create.
+ * @param params The parsed parameter file.
  * @param nr_nodes The number of MPI ranks.
  * @param nodeID The MPI rank of this node.
  * @param policy The queuing policy to use.
- * @param timeBegin Time at the begininning of the simulation.
- * @param timeEnd Time at the end of the simulation.
- * @param dt_min Minimal allowed timestep (unsed with fixdt policy)
- * @param dt_max Maximal allowed timestep
  * @param verbose Is this #engine talkative ?
  */
 
-void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
-                 int nr_queues, int nr_nodes, int nodeID, int policy,
-                 float timeBegin, float timeEnd, float dt_min, float dt_max,
-                 int verbose) {
+void engine_init(struct engine *e, struct space *s,
+                 const struct swift_params *params, int nr_nodes, int nodeID,
+                 int policy, int verbose) {
+
+  /* Clean-up everything */
+  bzero(e, sizeof(struct engine));
 
   /* Store the values. */
   e->s = s;
-  e->nr_threads = nr_threads;
+  e->nr_threads = parser_get_param_int(params, "Scheduler:nr_threads");
   e->policy = policy;
   e->step = 0;
-  e->nullstep = 0;
   e->nr_nodes = nr_nodes;
   e->nodeID = nodeID;
   e->proxy_ind = NULL;
@@ -1857,23 +2348,29 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
   e->forcerepart = REPART_NONE;
   e->links = NULL;
   e->nr_links = 0;
-  e->timeBegin = timeBegin;
-  e->timeEnd = timeEnd;
-  e->timeOld = timeBegin;
-  e->time = timeBegin;
+  e->timeBegin = parser_get_param_double(params, "TimeIntegration:time_begin");
+  e->timeEnd = parser_get_param_double(params, "TimeIntegration:time_end");
+  e->timeOld = e->timeBegin;
+  e->time = e->timeBegin;
   e->ti_old = 0;
   e->ti_current = 0;
   e->timeStep = 0.;
-  e->dt_min = dt_min;
-  e->dt_max = dt_max;
+  e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min");
+  e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max");
   e->file_stats = NULL;
   e->verbose = verbose;
+  e->count_step = 0;
   e->wallclock_time = 0.f;
   engine_rank = nodeID;
 
   /* Make the space link back to the engine. */
   s->e = e;
 
+  /* Get the number of queues */
+  int nr_queues = parser_get_param_int(params, "Scheduler:nr_queues");
+  if (nr_queues <= 0) nr_queues = e->nr_threads;
+  s->nr_queues = nr_queues;
+
 #if defined(HAVE_SETAFFINITY)
   const int nr_cores = sysconf(_SC_NPROCESSORS_ONLN);
   int cpuid[nr_cores];
@@ -1969,22 +2466,29 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
   engine_print_policy(e);
 
   /* Print information about the hydro scheme */
-  if (e->nodeID == 0) message("Hydrodynamic scheme: %s", SPH_IMPLEMENTATION);
+  if ((e->policy & engine_policy_hydro) == engine_policy_hydro) {
+    if (e->nodeID == 0) message("Hydrodynamic scheme: %s.", SPH_IMPLEMENTATION);
+    if (e->nodeID == 0)
+      message("Hydrodynamic kernel: %s with %.2f +/- %.2f neighbours.",
+              kernel_name, kernel_nwneigh, const_delta_nwneigh);
+  }
 
   /* Check we have sensible time bounds */
-  if (timeBegin >= timeEnd)
+  if (e->timeBegin >= e->timeEnd)
     error(
         "Final simulation time (t_end = %e) must be larger than the start time "
         "(t_beg = %e)",
-        timeEnd, timeBegin);
+        e->timeEnd, e->timeBegin);
 
-  /* Check we have sensible time step bounds */
+  /* Check we have sensible time-step values */
   if (e->dt_min > e->dt_max)
     error(
-        "Minimal time step size must be smaller than maximal time step size ");
+        "Minimal time-step size (%e) must be smaller than maximal time-step "
+        "size (%e)",
+        e->dt_min, e->dt_max);
 
   /* Deal with timestep */
-  e->timeBase = (timeEnd - timeBegin) / max_nr_timesteps;
+  e->timeBase = (e->timeEnd - e->timeBegin) / max_nr_timesteps;
   e->ti_current = 0;
 
   /* Fixed time-step case */
@@ -2003,12 +2507,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
     if (e->nodeID == 0) {
       message("Absolute minimal timestep size: %e", e->timeBase);
 
-      float dt_min = timeEnd - timeBegin;
+      float dt_min = e->timeEnd - e->timeBegin;
       while (dt_min > e->dt_min) dt_min /= 2.f;
 
       message("Minimal timestep size (on time-line): %e", dt_min);
 
-      float dt_max = timeEnd - timeBegin;
+      float dt_max = e->timeEnd - e->timeBegin;
       while (dt_max > e->dt_max) dt_max /= 2.f;
 
       message("Maximal timestep size (on time-line): %e", dt_max);
@@ -2027,8 +2531,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
 
 /* Construct types for MPI communications */
 #ifdef WITH_MPI
-  part_create_mpi_type(&e->part_mpi_type);
-  xpart_create_mpi_type(&e->xpart_mpi_type);
+  part_create_mpi_types();
 #endif
 
   /* First of all, init the barrier and lock it. */
@@ -2043,23 +2546,26 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
   e->barrier_launchcount = 0;
 
   /* Init the scheduler with enough tasks for the initial sorting tasks. */
-  int nr_tasks = 2 * s->tot_cells + e->nr_threads;
+  const int nr_tasks = 2 * s->tot_cells + 2 * e->nr_threads;
   scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, scheduler_flag_steal,
                  e->nodeID);
-  s->nr_queues = nr_queues;
 
   /* Create the sorting tasks. */
-  for (int i = 0; i < e->nr_threads; i++)
-    scheduler_addtask(&e->sched, task_type_psort, task_subtype_none, i, 0, NULL,
-                      NULL, 0);
+  for (int i = 0; i < e->nr_threads; i++) {
+    scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0,
+                      NULL, NULL, 0);
+
+    scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0,
+                      NULL, NULL, 0);
+  }
 
   scheduler_ranktasks(&e->sched);
 
   /* Allocate and init the threads. */
-  if ((e->runners =
-           (struct runner *)malloc(sizeof(struct runner) * nr_threads)) == NULL)
+  if ((e->runners = (struct runner *)malloc(sizeof(struct runner) *
+                                            e->nr_threads)) == NULL)
     error("Failed to allocate threads array.");
-  for (int k = 0; k < nr_threads; k++) {
+  for (int k = 0; k < e->nr_threads; k++) {
     e->runners[k].id = k;
     e->runners[k].e = e;
     e->barrier_running += 1;
@@ -2071,7 +2577,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
 
       /* Set a reasonable queue ID. */
       e->runners[k].cpuid = cpuid[k % nr_cores];
-      if (nr_queues < nr_threads)
+      if (nr_queues < e->nr_threads)
         e->runners[k].qid = cpuid[k % nr_cores] * nr_queues / nr_cores;
       else
         e->runners[k].qid = k;
@@ -2090,7 +2596,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
 #endif
     } else {
       e->runners[k].cpuid = k;
-      e->runners[k].qid = k * nr_queues / nr_threads;
+      e->runners[k].qid = k * nr_queues / e->nr_threads;
     }
     // message( "runner %i on cpuid=%i with qid=%i." , e->runners[k].id ,
     // e->runners[k].cpuid , e->runners[k].qid );
diff --git a/src/engine.h b/src/engine.h
index 741ae1f553494e435394f529606b4cb794b0e3d2..e1c3f61d1293fc01e24b9bcb0673d75fa3ce4648 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -38,6 +38,7 @@
 #include "scheduler.h"
 #include "space.h"
 #include "task.h"
+#include "parser.h"
 #include "partition.h"
 
 /* Some constants. */
@@ -53,7 +54,8 @@ enum engine_policy {
   engine_policy_setaffinity = (1 << 7),
   engine_policy_hydro = (1 << 8),
   engine_policy_self_gravity = (1 << 9),
-  engine_policy_external_gravity = (1 << 10)
+  engine_policy_external_gravity = (1 << 10),
+  engine_policy_cosmology = (1 << 11)
 };
 
 extern const char *engine_policy_names[];
@@ -62,6 +64,8 @@ extern const char *engine_policy_names[];
 #define engine_maxtaskspercell 96
 #define engine_maxproxies 64
 #define engine_tasksreweight 10
+#define engine_parts_size_grow 1.05
+#define engine_redistribute_alloc_margin 1.2
 
 /* The rank of the engine as a global variable (for messages). */
 extern int engine_rank;
@@ -124,7 +128,7 @@ struct engine {
   FILE *file_stats;
 
   /* The current step number. */
-  int step, nullstep;
+  int step;
 
   /* The number of particles updated in the previous step. */
   int count_step;
@@ -160,20 +164,13 @@ struct engine {
 
   /* Are we talkative ? */
   int verbose;
-
-#ifdef WITH_MPI
-  /* MPI data type for the particle transfers */
-  MPI_Datatype part_mpi_type;
-  MPI_Datatype xpart_mpi_type;
-#endif
 };
 
 /* Function prototypes. */
 void engine_barrier(struct engine *e, int tid);
-void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
-                 int nr_queues, int nr_nodes, int nodeID, int policy,
-                 float timeBegin, float timeEnd, float dt_min, float dt_max,
-                 int verbose);
+void engine_init(struct engine *e, struct space *s,
+                 const struct swift_params *params, int nr_nodes, int nodeID,
+                 int policy, int verbose);
 void engine_launch(struct engine *e, int nr_runners, unsigned int mask,
                    unsigned int submask);
 void engine_prepare(struct engine *e);
@@ -182,7 +179,9 @@ void engine_init_particles(struct engine *e);
 void engine_step(struct engine *e);
 void engine_maketasks(struct engine *e);
 void engine_split(struct engine *e, struct partition *initial_partition);
-int engine_exchange_strays(struct engine *e, int offset, size_t *ind, size_t N);
+void engine_exchange_strays(struct engine *e, size_t offset_parts,
+                            int *ind_part, size_t *Npart, size_t offset_gparts,
+                            int *ind_gpart, size_t *Ngpart);
 void engine_rebuild(struct engine *e);
 void engine_repartition(struct engine *e);
 void engine_makeproxies(struct engine *e);
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index 82bc52ad3e05794c8c05896075edc463a69197ff..92a9f64c1f84a9e949f4c0e9485f892b5c808cdc 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -22,14 +22,61 @@
 /**
  * @brief Computes the gravity time-step of a given particle
  *
- * @param p Pointer to the particle data
- * @param xp Pointer to the extended particle data
+ * @param gp Pointer to the g-particle data
  *
  */
 
-__attribute__((always_inline)) INLINE static float gravity_compute_timestep(
-    struct part* p, struct xpart* xp) {
+__attribute__((always_inline))
+    INLINE static float gravity_compute_timestep(struct gpart* gp) {
 
   /* Currently no limit is imposed */
   return FLT_MAX;
 }
+
+/**
+ * @brief Initialises the g-particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void gravity_first_init_gpart(struct gpart* gp) {}
+
+/**
+ * @brief Prepares a g-particle for the gravity calculation
+ *
+ * Zeroes all the relevant arrays in preparation for the sums taking place in
+ * the variaous tasks
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void gravity_init_part(struct gpart* gp) {
+
+  /* Zero the acceleration */
+  gp->a_grav[0] = 0.f;
+  gp->a_grav[1] = 0.f;
+  gp->a_grav[2] = 0.f;
+}
+
+/**
+ * @brief Finishes the gravity calculation.
+ *
+ * Multiplies the forces and accelerations by the appropiate constants
+ *
+ * @param gp The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void gravity_end_force(struct gpart* gp) {}
+
+/**
+ * @brief Kick the additional variables
+ *
+ * @param gp The particle to act upon
+ * @param dt The time-step for this kick
+ * @param half_dt The half time-step for this kick
+ */
+__attribute__((always_inline)) INLINE static void gravity_kick_extra(
+    struct gpart* gp, float dt, float half_dt) {}
diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h
index 98e0c40a5700b4da70f27fb0955592bb5d2287c3..654745bfeb70dddba772af9e23797713376377a7 100644
--- a/src/gravity/Default/gravity_debug.h
+++ b/src/gravity/Default/gravity_debug.h
@@ -24,5 +24,5 @@ __attribute__((always_inline))
       "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "mass=%.3e t_begin=%d, t_end=%d\n",
       p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2],
-      p->a[0], p->a[1], p->a[2], p->mass, p->ti_begin, p->ti_end);
+      p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, p->ti_end);
 }
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index e62be446e8263bf02e3fd73f902b28cb1c3b16cf..62023345f174eb8cb9bae4d4438bdd50c9969494 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -22,19 +22,12 @@
 
 /* Includes. */
 #include "const.h"
-#include "kernel.h"
+#include "kernel_gravity.h"
 #include "vector.h"
 
-/**
- * @file  runner_iact_grav.h
- * @brief Gravity interaction functions.
- *
- */
-
 /**
  * @brief Gravity potential
  */
-
 __attribute__((always_inline)) INLINE static void runner_iact_grav(
     float r2, float *dx, struct gpart *pi, struct gpart *pj) {
 
@@ -56,8 +49,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav(
   /* Aggregate the accelerations. */
   for (k = 0; k < 3; k++) {
     w = acc * dx[k];
-    pi->a[k] -= w * mj;
-    pj->a[k] += w * mi;
+    pi->a_grav[k] -= w * mj;
+    pj->a_grav[k] += w * mi;
   }
 }
 
@@ -107,8 +100,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_grav(
     ai.v = w.v * mj.v;
     aj.v = w.v * mi.v;
     for (j = 0; j < VEC_SIZE; j++) {
-      pi[j]->a[k] -= ai.f[j];
-      pj[j]->a[k] += aj.f[j];
+      pi[j]->a_grav[k] -= ai.f[j];
+      pj[j]->a_grav[k] += aj.f[j];
     }
   }
 
diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h
index d707d69631e65eed8ad21a7fa9601c07d3c71263..129c4b39828ca73d2d80d79edbdaa8ec4d5a9e01 100644
--- a/src/gravity/Default/gravity_io.h
+++ b/src/gravity/Default/gravity_io.h
@@ -48,6 +48,8 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles(
  *
  * @param h_grp The HDF5 group in which to write the arrays.
  * @param fileName The name of the file (unsued in MPI mode).
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param xmfFile The XMF file to write to (unused in MPI mode).
  * @param Ndm The number of DM particles on that MPI rank.
  * @param Ndm_total The total number of g-particles (only used in MPI mode)
@@ -59,17 +61,20 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles(
  *
  */
 __attribute__((always_inline)) INLINE static void darkmatter_write_particles(
-    hid_t h_grp, char* fileName, FILE* xmfFile, int Ndm, long long Ndm_total,
-    int mpi_rank, long long offset, struct gpart* gparts,
-    struct UnitSystem* us) {
+    hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile,
+    int Ndm, long long Ndm_total, int mpi_rank, long long offset,
+    struct gpart* gparts, struct UnitSystem* us) {
 
   /* Write arrays */
-  writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, Ndm, 3, gparts,
-             Ndm_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, Ndm, 1, gparts,
-             Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS);
-  writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, Ndm, 3, gparts,
-             Ndm_total, mpi_rank, offset, v_full, us, UNIT_CONV_SPEED);
-  writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, Ndm, 1, gparts,
-             Ndm_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE,
+             Ndm, 3, gparts, Ndm_total, mpi_rank, offset, x, us,
+             UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, Ndm,
+             1, gparts, Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT,
+             Ndm, 3, gparts, Ndm_total, mpi_rank, offset, v_full, us,
+             UNIT_CONV_SPEED);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs",
+             ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, id, us,
+             UNIT_CONV_NO_UNITS);
 }
diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h
index 7ce7b81892582f2a90f7dd07f7f244c0d4ed8afb..0dfdb82e4ec11c9153f77439027d7e4451ded7f4 100644
--- a/src/gravity/Default/gravity_part.h
+++ b/src/gravity/Default/gravity_part.h
@@ -29,7 +29,7 @@ struct gpart {
   float v_full[3];
 
   /* Particle acceleration. */
-  float a[3];
+  float a_grav[3];
 
   /* Particle mass. */
   float mass;
@@ -50,4 +50,4 @@ struct gpart {
     struct part* part;
   };
 
-} __attribute__((aligned(part_align)));
+} __attribute__((aligned(gpart_align)));
diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h
index fca4a346047d7dce0741924a69e95fdad5a5ce45..03953b07ad4e172d96b6e3382814e036a538e2bd 100644
--- a/src/hydro/Default/hydro.h
+++ b/src/hydro/Default/hydro.h
@@ -91,13 +91,16 @@ __attribute__((always_inline))
   const float ih2 = ih * ih;
   const float ih4 = ih2 * ih2;
 
-  /* Final operation on the density. */
-  p->rho = ih * ih2 * (p->rho + p->mass * kernel_root);
-  p->rho_dh = (p->rho_dh - 3.0f * p->mass * kernel_root) * ih4;
-  p->density.wcount =
-      (p->density.wcount + kernel_root) * (4.0f / 3.0 * M_PI * kernel_gamma3);
-  p->density.wcount_dh =
-      p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3);
+  /* Final operation on the density (add self-contribution). */
+  p->rho += p->mass * kernel_root;
+  p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma;
+  p->density.wcount += kernel_root;
+
+  /* Finish the calculation by inserting the missing h-factors */
+  p->rho *= ih * ih2;
+  p->rho_dh *= ih4;
+  p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3);
+  p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4);
 }
 
 /**
diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h
index b5b631501b2f9c398cf1f7e5ee32fd5c962ba86e..4f85299b9d61b3a66389bac3527a63068ab96db9 100644
--- a/src/hydro/Default/hydro_iact.h
+++ b/src/hydro/Default/hydro_iact.h
@@ -22,7 +22,7 @@
 
 /* Includes. */
 #include "const.h"
-#include "kernel.h"
+#include "kernel_hydro.h"
 #include "part.h"
 #include "vector.h"
 
diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h
index 958bf5a1869718b57678246ff3b1985e54145824..0e9ad46ddc1d4e8c8d3ffdbf3e81262ec49a7092 100644
--- a/src/hydro/Default/hydro_io.h
+++ b/src/hydro/Default/hydro_io.h
@@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles(
  *
  * @param h_grp The HDF5 group in which to write the arrays.
  * @param fileName The name of the file (unsued in MPI mode).
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param xmfFile The XMF file to write to (unused in MPI mode).
  * @param N The number of particles on that MPI rank.
  * @param N_total The total number of particles (only used in MPI mode)
@@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles(
  *
  */
 __attribute__((always_inline)) INLINE static void hydro_write_particles(
-    hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total,
-    int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) {
+    hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N,
+    long long N_total, int mpi_rank, long long offset, struct part* parts,
+    struct UnitSystem* us) {
 
   /* Write arrays */
-  writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts,
-             N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
-  writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, mass, us, UNIT_CONV_MASS);
-  writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS);
-  writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts,
-             N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS);
-  writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION);
-  writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, rho, us, UNIT_CONV_DENSITY);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE,
+             N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT,
+             N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1,
+             parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength",
+             FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us,
+             UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy",
+             FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us,
+             UNIT_CONV_ENERGY_PER_UNIT_MASS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs",
+             ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us,
+             UNIT_CONV_NO_UNITS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT,
+             N, 3, parts, N_total, mpi_rank, offset, a_hydro, us,
+             UNIT_CONV_ACCELERATION);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N,
+             1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY);
 }
 
 /**
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index 8cc553363122099c748e3e3e1941611e986c8581..22c5734ed5762400285521b30f9aa60795c45325 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -101,7 +101,7 @@ __attribute__((always_inline))
   p->rho *= ih * ih2;
   p->rho_dh *= ih4;
   p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3);
-  p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3);
+  p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4);
 
   const float irho = 1.f / p->rho;
 
diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h
index 46e156bb99015069f9958aeea05954e2be6db5e0..a4d1f7dd4397ebfc850b582e1ca81fc0d4edb76a 100644
--- a/src/hydro/Gadget2/hydro_debug.h
+++ b/src/hydro/Gadget2/hydro_debug.h
@@ -23,13 +23,13 @@ __attribute__((always_inline))
       "x=[%.3e,%.3e,%.3e], "
       "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n "
       "h=%.3e, "
-      "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, S=%.3e, "
+      "wcount=%d, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, S=%.3e, "
       "dS/dt=%.3e, c=%.3e\n"
       "divV=%.3e, curlV=%.3e, rotV=[%.3e,%.3e,%.3e]  \n "
       "v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n",
       p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0],
       xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2],
-      p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho,
+      p->h, (int)p->density.wcount, p->density.wcount_dh, p->mass, p->rho_dh, p->rho,
       p->force.pressure, p->entropy, p->entropy_dt, p->force.soundspeed,
       p->div_v, p->force.curl_v, p->density.rot_v[0], p->density.rot_v[1],
       p->density.rot_v[2], p->force.v_sig, p->h_dt, p->ti_begin, p->ti_end);
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index d31b6be383b80a2698b63d27308f6fee9b23518f..d988c678affcf4ca722a965a7e52a7c120b4a924 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -22,7 +22,7 @@
 
 /* Includes. */
 #include "const.h"
-#include "kernel.h"
+#include "kernel_hydro.h"
 #include "part.h"
 #include "vector.h"
 
@@ -93,8 +93,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
   dv[2] = pi->v[2] - pj->v[2];
   const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2];
 
-  pi->div_v += faci * dvdr;
-  pj->div_v += facj * dvdr;
+  pi->div_v -= faci * dvdr;
+  pj->div_v -= facj * dvdr;
 
   /* Compute dv cross r */
   curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1];
@@ -211,10 +211,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   /* Balsara term */
   const float balsara_i =
       fabsf(pi->div_v) /
-      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi);
+      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi);
   const float balsara_j =
       fabsf(pj->div_v) /
-      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj);
+      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj);
 
   /* Are the particles moving towards each others ? */
   const float omega_ij = fminf(dvdr, 0.f);
@@ -309,10 +309,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   /* Balsara term */
   const float balsara_i =
       fabsf(pi->div_v) /
-      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi);
+      (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi);
   const float balsara_j =
       fabsf(pj->div_v) /
-      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj);
+      (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj);
 
   /* Are the particles moving towards each others ? */
   const float omega_ij = fminf(dvdr, 0.f);
diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h
index 17c3d3013644c3572f3c26fc3e270b1c1bc465ed..c1c59dfa4980a2843e7e13bee4c964c9b254cae6 100644
--- a/src/hydro/Gadget2/hydro_io.h
+++ b/src/hydro/Gadget2/hydro_io.h
@@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles(
  *
  * @param h_grp The HDF5 group in which to write the arrays.
  * @param fileName The name of the file (unsued in MPI mode).
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param xmfFile The XMF file to write to (unused in MPI mode).
  * @param N The number of particles on that MPI rank.
  * @param N_total The total number of particles (only used in MPI mode)
@@ -67,27 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles(
  *
  */
 __attribute__((always_inline)) INLINE static void hydro_write_particles(
-    hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total,
-    int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) {
+    hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N,
+    long long N_total, int mpi_rank, long long offset, struct part* parts,
+    struct UnitSystem* us) {
 
   /* Write arrays */
-  writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts,
-             N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
-  writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, mass, us, UNIT_CONV_MASS);
-  writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, entropy, us,
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE,
+             N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT,
+             N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1,
+             parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength",
+             FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us,
+             UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy",
+             FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us,
              UNIT_CONV_ENTROPY_PER_UNIT_MASS);
-  writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts,
-             N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS);
-  writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION);
-  writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, rho, us, UNIT_CONV_DENSITY);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs",
+             ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us,
+             UNIT_CONV_NO_UNITS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT,
+             N, 3, parts, N_total, mpi_rank, offset, a_hydro, us,
+             UNIT_CONV_ACCELERATION);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N,
+             1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY);
 }
 
 /**
diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h
index f4e3f1a70625430d9bd891c5f7596d71e7b8b231..7db3c275ce7e3389610e8297c287cbd5301c6c64 100644
--- a/src/hydro/Minimal/hydro.h
+++ b/src/hydro/Minimal/hydro.h
@@ -101,7 +101,12 @@ __attribute__((always_inline))
   p->rho *= ih * ih2;
   p->rho_dh *= ih4;
   p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3);
-  p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3);
+  p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4);
+
+  const float irho = 1.f / p->rho;
+
+  /* Compute the derivative term */
+  p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh * irho);
 }
 
 /**
diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h
index 6afb9d8d38a4fc7f1d38b7286720ddb7f3c51ab4..3427ec538613842f8fbcf0d8ba5f9ba5a0b8d540 100644
--- a/src/hydro/Minimal/hydro_iact.h
+++ b/src/hydro/Minimal/hydro_iact.h
@@ -16,12 +16,12 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
-#ifndef SWIFT_RUNNER_IACT_H
-#define SWIFT_RUNNER_IACT_H
+#ifndef SWIFT_RUNNER_IACT_MINIMAL_H
+#define SWIFT_RUNNER_IACT_MINIMAL_H
 
 /* Includes. */
 #include "const.h"
-#include "kernel.h"
+#include "kernel_hydro.h"
 #include "part.h"
 #include "vector.h"
 
@@ -38,33 +38,31 @@
 __attribute__((always_inline)) INLINE static void runner_iact_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
-  float r = sqrtf(r2);
-  float xi, xj;
-  float h_inv;
   float wi, wj, wi_dx, wj_dx;
-  float mi, mj;
+
+  const float r = sqrtf(r2);
 
   /* Get the masses. */
-  mi = pi->mass;
-  mj = pj->mass;
+  const float mi = pi->mass;
+  const float mj = pj->mass;
 
   /* Compute density of pi. */
-  h_inv = 1.0 / hi;
-  xi = r * h_inv;
+  const float hi_inv = 1.f / hi;
+  const float xi = r * hi_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
   pi->rho += mj * wi;
-  pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx);
+  pi->rho_dh -= mj * (3.f * wi + xi * wi_dx);
   pi->density.wcount += wi;
   pi->density.wcount_dh -= xi * wi_dx;
 
   /* Compute density of pj. */
-  h_inv = 1.f / hj;
-  xj = r * h_inv;
+  const float hj_inv = 1.f / hj;
+  const float xj = r * hj_inv;
   kernel_deval(xj, &wj, &wj_dx);
 
   pj->rho += mi * wj;
-  pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx);
+  pj->rho_dh -= mi * (3.f * wj + xj * wj_dx);
   pj->density.wcount += wj;
   pj->density.wcount_dh -= xj * wj_dx;
 }
@@ -76,24 +74,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
 __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
     float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
 
-  float r;
-  float xi;
-  float h_inv;
   float wi, wi_dx;
-  float mj;
 
   /* Get the masses. */
-  mj = pj->mass;
+  const float mj = pj->mass;
 
   /* Get r and r inverse. */
-  r = sqrtf(r2);
+  const float r = sqrtf(r2);
 
-  h_inv = 1.f / hi;
-  xi = r * h_inv;
+  const float h_inv = 1.f / hi;
+  const float xi = r * h_inv;
   kernel_deval(xi, &wi, &wi_dx);
 
   pi->rho += mj * wi;
-  pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx);
+  pi->rho_dh -= mj * (3.f * wi + xi * wi_dx);
   pi->density.wcount += wi;
   pi->density.wcount_dh -= xi * wi_dx;
 }
@@ -148,7 +142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
   /* Compute sound speeds */
   const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi);
   const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj);
-  float v_sig = ci + cj + 3.f * omega_ij;
+  const float v_sig = ci + cj + 3.f * omega_ij;
 
   /* SPH acceleration term */
   const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv;
@@ -225,7 +219,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   /* Compute sound speeds */
   const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi);
   const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj);
-  float v_sig = ci + cj + 3.f * omega_ij;
+  const float v_sig = ci + cj + 3.f * omega_ij;
 
   /* SPH acceleration term */
   const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv;
@@ -245,4 +239,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
   pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
 }
 
-#endif /* SWIFT_RUNNER_IACT_H */
+#endif /* SWIFT_RUNNER_IACT_MINIMAL_H */
diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h
index 2c56fb489ab84ca7c30426b54cf95e26e3821084..afe5de83f423e43b4d2480cca1ac3e84d6c549de 100644
--- a/src/hydro/Minimal/hydro_io.h
+++ b/src/hydro/Minimal/hydro_io.h
@@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles(
  *
  * @param h_grp The HDF5 group in which to write the arrays.
  * @param fileName The name of the file (unsued in MPI mode).
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param xmfFile The XMF file to write to (unused in MPI mode).
  * @param N The number of particles on that MPI rank.
  * @param N_total The total number of particles (only used in MPI mode)
@@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles(
  *
  */
 __attribute__((always_inline)) INLINE static void hydro_write_particles(
-    hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total,
-    int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) {
+    hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N,
+    long long N_total, int mpi_rank, long long offset, struct part* parts,
+    struct UnitSystem* us) {
 
   /* Write arrays */
-  writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts,
-             N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
-  writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, mass, us, UNIT_CONV_MASS);
-  writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH);
-  writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts,
-             N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS);
-  writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts,
-             N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS);
-  writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts,
-             N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION);
-  writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total,
-             mpi_rank, offset, rho, us, UNIT_CONV_DENSITY);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE,
+             N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT,
+             N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1,
+             parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength",
+             FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us,
+             UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy",
+             FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us,
+             UNIT_CONV_ENERGY_PER_UNIT_MASS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs",
+             ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us,
+             UNIT_CONV_NO_UNITS);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT,
+             N, 3, parts, N_total, mpi_rank, offset, a_hydro, us,
+             UNIT_CONV_ACCELERATION);
+  writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N,
+             1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY);
 }
 
 /**
diff --git a/src/kernel.h b/src/kernel.h
deleted file mode 100644
index aead6a95adc35028834d671448223a31a57fc2b6..0000000000000000000000000000000000000000
--- a/src/kernel.h
+++ /dev/null
@@ -1,617 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
- *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- ******************************************************************************/
-#ifndef SWIFT_KERNEL_H
-#define SWIFT_KERNEL_H
-
-/* Includes. */
-#include "const.h"
-#include "inline.h"
-#include "vector.h"
-
-/**
- * @file kernel.h
- * @brief SPH kernel functions. Compute W(x,h) and the gradient of W(x,h),
- *        as well as the blending function used for gravity.
- */
-
-/* Gravity kernel stuff
- * -----------------------------------------------------------------------------------------------
- */
-
-/* The gravity kernel is defined as a degree 6 polynomial in the distance
-   r. The resulting value should be post-multiplied with r^-3, resulting
-   in a polynomial with terms ranging from r^-3 to r^3, which are
-   sufficient to model both the direct potential as well as the splines
-   near the origin. */
-
-/* Coefficients for the gravity kernel. */
-#define kernel_grav_degree 6
-#define kernel_grav_ivals 2
-#define kernel_grav_scale (2 * const_iepsilon)
-static float kernel_grav_coeffs
-    [(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = {
-        32.0f * const_iepsilon6,         -192.0f / 5.0f * const_iepsilon5,
-        0.0f,                            32.0f / 3.0f * const_iepsilon3,
-        0.0f,                            0.0f,
-        0.0f,                            -32.0f / 3.0f * const_iepsilon6,
-        192.0f / 5.0f * const_iepsilon5, -48.0f * const_iepsilon4,
-        64.0f / 3.0f * const_iepsilon3,  0.0f,
-        0.0f,                            -1.0f / 15.0f,
-        0.0f,                            0.0f,
-        0.0f,                            0.0f,
-        0.0f,                            0.0f,
-        1.0f};
-
-/**
- * @brief Computes the gravity cubic spline for a given distance x.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x,
-                                                                   float *W) {
-  int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals);
-  float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-
-#ifdef VECTORIZE
-
-/**
- * @brief Computes the gravity cubic spline for a given distance x (Vectorized
- * version).
- */
-
-__attribute__((always_inline))
-    INLINE static void kernel_grav_eval_vec(vector *x, vector *w) {
-
-  vector ind, c[kernel_grav_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale),
-                            vec_set1((float)kernel_grav_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < kernel_grav_degree + 1; j++)
-      c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v;
-}
-
-#endif
-
-/* Blending function stuff
- * --------------------------------------------------------------------------------------------
- */
-
-/* Coefficients for the blending function. */
-#define blender_degree 3
-#define blender_ivals 3
-#define blender_scale 4.0f
-static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = {
-    0.0f,   0.0f,  0.0f,   1.0f,  -32.0f, 24.0f, -6.0f, 1.5f,
-    -32.0f, 72.0f, -54.0f, 13.5f, 0.0f,   0.0f,  0.0f,  0.0f};
-
-/**
- * @brief Computes the cubic spline blender for a given distance x.
- */
-
-__attribute__((always_inline)) INLINE static void blender_eval(float x,
-                                                               float *W) {
-  int ind = fmin(x * blender_scale, blender_ivals);
-  float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x.
- */
-
-__attribute__((always_inline)) INLINE static void blender_deval(float x,
-                                                                float *W,
-                                                                float *dW_dx) {
-  int ind = fminf(x * blender_scale, blender_ivals);
-  float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  float dw_dx = coeffs[0];
-  for (int k = 2; k <= blender_degree; k++) {
-    dw_dx = dw_dx * x + w;
-    w = x * w + coeffs[k];
-  }
-  *W = w;
-  *dW_dx = dw_dx;
-}
-
-#ifdef VECTORIZE
-
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
- */
-
-__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x,
-                                                                   vector *w) {
-
-  vector ind, c[blender_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(
-      vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < blender_degree + 1; j++)
-      c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v;
-}
-
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
- */
-
-__attribute__((always_inline))
-    INLINE static void blender_deval_vec(vector *x, vector *w, vector *dw_dx) {
-
-  vector ind, c[blender_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(
-      vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < blender_degree + 1; j++)
-      c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  dw_dx->v = c[0].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= blender_degree; k++) {
-    dw_dx->v = (dw_dx->v * x->v) + w->v;
-    w->v = (x->v * w->v) + c[k].v;
-  }
-}
-
-#endif
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-#if defined(CUBIC_SPLINE_KERNEL)
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-/* Coefficients for the kernel. */
-#define kernel_name "Cubic spline"
-#define kernel_degree 3
-#define kernel_ivals 2
-#define kernel_gamma 2.0f
-#define kernel_gamma2 4.0f
-#define kernel_gamma3 8.0f
-#define kernel_igamma 0.5f
-#define kernel_nwneigh                                                      \
-  (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
-   6.0858f)
-static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
-    __attribute__((aligned(16))) = {
-        3.0 / 4.0 * M_1_PI, -3.0 / 2.0 * M_1_PI, 0.0,           M_1_PI,
-        -0.25 * M_1_PI,     3.0 / 2.0 * M_1_PI,  -3.0 * M_1_PI, M_2_PI,
-        0.0,                0.0,                 0.0,           0.0};
-#define kernel_root (kernel_coeffs[kernel_degree])
-#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
-
-/**
- * @brief Computes the cubic spline kernel and its derivative for a given
- * distance x. Gives a sensible answer only if x<2.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_deval(float x,
-                                                               float *W,
-                                                               float *dW_dx) {
-  int ind = fminf(x, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  float dw_dx = coeffs[0];
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx = dw_dx * x + w;
-    w = x * w + coeffs[k];
-  }
-  *W = w;
-  *dW_dx = dw_dx;
-}
-
-#ifdef VECTORIZE
-
-/**
- * @brief Computes the cubic spline kernel and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
- */
-
-__attribute__((always_inline))
-    INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
-
-  vector ind, c[kernel_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < kernel_degree + 1; j++)
-      c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  dw_dx->v = c[0].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx->v = (dw_dx->v * x->v) + w->v;
-    w->v = (x->v * w->v) + c[k].v;
-  }
-}
-
-#endif
-
-/**
- * @brief Computes the cubic spline kernel for a given distance x. Gives a
- * sensible answer only if x<2.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_eval(float x,
-                                                              float *W) {
-  int ind = fmin(x, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-#elif defined(QUARTIC_SPLINE_KERNEL)
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-/* Coefficients for the kernel. */
-#define kernel_name "Quartic spline"
-#define kernel_degree 4
-#define kernel_ivals 3
-#define kernel_gamma 2.5f
-#define kernel_gamma2 6.25f
-#define kernel_gamma3 15.625f
-#define kernel_igamma 0.4f
-#define kernel_nwneigh                                                      \
-  (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
-   8.2293f)
-static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
-    __attribute__((aligned(16))) = {
-        3.0 / 10.0 * M_1_PI,  0.0,                  -3.0 / 4.0 * M_1_PI,
-        0.0,                  23.0 / 32.0 * M_1_PI, -1.0 / 5.0 * M_1_PI,
-        M_1_PI,               -3.0 / 2.0 * M_1_PI,  0.25 * M_1_PI,
-        11.0 / 16.0 * M_1_PI, 1.0 / 20.0 * M_1_PI,  -0.5 * M_1_PI,
-        15.0 / 8.0 * M_1_PI,  -25.0 / 8.0 * M_1_PI, 125.0 / 64.0 * M_1_PI,
-        0.0,                  0.0,                  0.0,
-        0.0,                  0.0};
-#define kernel_root (kernel_coeffs[kernel_degree])
-#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
-
-/**
- * @brief Computes the quartic spline kernel and its derivative for a given
- * distance x. Gives a sensible answer only if x<2.5
- */
-
-__attribute__((always_inline)) INLINE static void kernel_deval(float x,
-                                                               float *W,
-                                                               float *dW_dx) {
-  int ind = fminf(x + 0.5, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  float dw_dx = coeffs[0];
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx = dw_dx * x + w;
-    w = x * w + coeffs[k];
-  }
-  *W = w;
-  *dW_dx = dw_dx;
-}
-
-#ifdef VECTORIZE
-
-/**
- * @brief Computes the quartic spline kernel and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.5
- */
-
-__attribute__((always_inline))
-    INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
-
-  vector ind, c[kernel_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(vec_fmin(x->v + 0.5f, vec_set1((float)kernel_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < kernel_degree + 1; j++)
-      c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  dw_dx->v = c[0].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx->v = (dw_dx->v * x->v) + w->v;
-    w->v = (x->v * w->v) + c[k].v;
-  }
-}
-
-#endif
-
-/**
- * @brief Computes the quartic spline kernel for a given distance x. Gives a
- * sensible answer only if x<2.5
- */
-
-__attribute__((always_inline)) INLINE static void kernel_eval(float x,
-                                                              float *W) {
-  int ind = fmin(x + 0.5f, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-#elif defined(QUINTIC_SPLINE_KERNEL)
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-/* Coefficients for the kernel. */
-#define kernel_name "Quintic spline"
-#define kernel_degree 5
-#define kernel_ivals 3
-#define kernel_gamma 3.f
-#define kernel_gamma2 9.f
-#define kernel_gamma3 27.f
-#define kernel_igamma 1.0f / 3.0f
-#define kernel_nwneigh                                                      \
-  (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
-   10.5868f)
-static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
-    __attribute__((aligned(16))) = {
-        -1.0 / 12.0 * M_1_PI,  1.0 / 4.0 * M_1_PI,   0.0,
-        -1.0 / 2.0 * M_1_PI,   0.0,                  11.0 / 20.0 * M_1_PI,
-        1.0 / 24.0 * M_1_PI,   -3.0 / 8.0 * M_1_PI,  5.0 / 4.0 * M_1_PI,
-        -7.0 / 4.0 * M_1_PI,   5.0 / 8.0 * M_1_PI,   17.0 / 40.0 * M_1_PI,
-        -1.0 / 120.0 * M_1_PI, 1.0 / 8.0 * M_1_PI,   -3.0 / 4.0 * M_1_PI,
-        9.0 / 4.0 * M_1_PI,    -27.0 / 8.0 * M_1_PI, 81.0 / 40.0 * M_1_PI,
-        0.0,                   0.0,                  0.0,
-        0.0,                   0.0,                  0.0};
-#define kernel_root (kernel_coeffs[kernel_degree])
-#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
-
-/**
- * @brief Computes the quintic spline kernel and its derivative for a given
- * distance x. Gives a sensible answer only if x<3.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_deval(float x,
-                                                               float *W,
-                                                               float *dW_dx) {
-  int ind = fminf(x, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  float dw_dx = coeffs[0];
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx = dw_dx * x + w;
-    w = x * w + coeffs[k];
-  }
-  *W = w;
-  *dW_dx = dw_dx;
-}
-
-#ifdef VECTORIZE
-
-/**
- * @brief Computes the quintic spline kernel and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<3.
- */
-
-__attribute__((always_inline))
-    INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
-
-  vector ind, c[kernel_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < kernel_degree + 1; j++)
-      c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  dw_dx->v = c[0].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx->v = (dw_dx->v * x->v) + w->v;
-    w->v = (x->v * w->v) + c[k].v;
-  }
-}
-
-#endif
-
-/**
- * @brief Computes the quintic spline kernel for a given distance x. Gives a
- * sensible answer only if x<3.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_eval(float x,
-                                                              float *W) {
-  int ind = fmin(x, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-#elif defined(WENDLAND_C2_KERNEL)
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-/* Coefficients for the kernel. */
-#define kernel_name "Wendland C2"
-#define kernel_degree 5
-#define kernel_ivals 1
-#define kernel_gamma 2.f
-#define kernel_gamma2 4.f
-#define kernel_gamma3 8.f
-#define kernel_igamma 0.5f
-#define kernel_nwneigh                                                      \
-  (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
-   7.261825f)
-static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
-    __attribute__((aligned(16))) = {
-        0.05222272f, -0.39167037f, 1.04445431f, -1.04445431f, 0.f,  0.41778173f,
-        0.0f,        0.0f,         0.0f,        0.0f,         0.0f, 0.0f};
-#define kernel_root (kernel_coeffs[kernel_degree])
-#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
-
-/**
- * @brief Computes the quintic spline kernel and its derivative for a given
- * distance x. Gives a sensible answer only if x<1.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_deval(float x,
-                                                               float *W,
-                                                               float *dW_dx) {
-  int ind = fminf(0.5f * x, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  float dw_dx = coeffs[0];
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx = dw_dx * x + w;
-    w = x * w + coeffs[k];
-  }
-  *W = w;
-  *dW_dx = dw_dx;
-}
-
-#ifdef VECTORIZE
-
-/**
- * @brief Computes the Wendland C2 kernel and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<1.
- */
-
-__attribute__((always_inline))
-    INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) {
-
-  vector ind, c[kernel_degree + 1];
-  int j, k;
-
-  /* Load x and get the interval id. */
-  ind.m = vec_ftoi(vec_fmin(0.5f * x->v, vec_set1((float)kernel_ivals)));
-
-  /* load the coefficients. */
-  for (k = 0; k < VEC_SIZE; k++)
-    for (j = 0; j < kernel_degree + 1; j++)
-      c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j];
-
-  /* Init the iteration for Horner's scheme. */
-  w->v = (c[0].v * x->v) + c[1].v;
-  dw_dx->v = c[0].v;
-
-  /* And we're off! */
-  for (int k = 2; k <= kernel_degree; k++) {
-    dw_dx->v = (dw_dx->v * x->v) + w->v;
-    w->v = (x->v * w->v) + c[k].v;
-  }
-}
-
-#endif
-
-/**
- * @brief Computes the Wendland C2 kernel for a given distance x. Gives a
- * sensible answer only if x<1.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_eval(float x,
-                                                              float *W) {
-  int ind = fmin(0.5f * x, kernel_ivals);
-  float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
-  float w = coeffs[0] * x + coeffs[1];
-  for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
-  *W = w;
-}
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-#else
-
-/* --------------------------------------------------------------------------------------------------------------------
- */
-
-#error "A kernel function must be chosen in const.h !!"
-
-#endif  // Kernel choice
-
-/* Some cross-check functions */
-void SPH_kernel_dump(int N);
-void gravity_kernel_dump(float r_max, int N);
-
-#endif  // SWIFT_KERNEL_H
diff --git a/src/kernel.c b/src/kernel_gravity.c
similarity index 78%
rename from src/kernel.c
rename to src/kernel_gravity.c
index 58f5b0c9fdaa62663c65d5af18afe0a15a813834..639a964c813ef7fd95008857ee17b7dd5ffafb27 100644
--- a/src/kernel.c
+++ b/src/kernel_gravity.c
@@ -21,32 +21,7 @@
 #include <math.h>
 #include <stdio.h>
 
-#include "kernel.h"
-
-/**
- * @brief Test the SPH kernel function by dumping it in the interval [0,1].
- *
- * @param N number of intervals in [0,1].
- */
-void SPH_kernel_dump(int N) {
-
-  int k;
-  float x, w, dw_dx;
-  float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-  float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-  // float dw_dx4[4] __attribute__ ((aligned (16)));
-
-  for (k = 0; k <= N; k++) {
-    x = ((float)k) / N;
-    x4[3] = x4[2];
-    x4[2] = x4[1];
-    x4[1] = x4[0];
-    x4[0] = x;
-    kernel_deval(x, &w, &dw_dx);
-    // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
-    printf(" %e %e %e %e %e %e %e\n", x, w, dw_dx, w4[0], w4[1], w4[2], w4[3]);
-  }
-}
+#include "kernel_gravity.h"
 
 /**
  * @brief The Gadget-2 gravity kernel function
diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h
new file mode 100644
index 0000000000000000000000000000000000000000..7fd4b061a7e94be01a11b06ad23d9113f579ebb8
--- /dev/null
+++ b/src/kernel_gravity.h
@@ -0,0 +1,209 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_KERNEL_GRAVITY_H
+#define SWIFT_KERNEL_GRAVITY_H
+
+/* Includes. */
+#include "const.h"
+#include "inline.h"
+#include "vector.h"
+
+/* Gravity kernel stuff
+ * -----------------------------------------------------------------------------------------------
+ */
+
+/* The gravity kernel is defined as a degree 6 polynomial in the distance
+   r. The resulting value should be post-multiplied with r^-3, resulting
+   in a polynomial with terms ranging from r^-3 to r^3, which are
+   sufficient to model both the direct potential as well as the splines
+   near the origin. */
+
+/* Coefficients for the gravity kernel. */
+#define kernel_grav_degree 6
+#define kernel_grav_ivals 2
+#define kernel_grav_scale (2 * const_iepsilon)
+static float kernel_grav_coeffs
+    [(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = {
+        32.0f * const_iepsilon6,         -192.0f / 5.0f * const_iepsilon5,
+        0.0f,                            32.0f / 3.0f * const_iepsilon3,
+        0.0f,                            0.0f,
+        0.0f,                            -32.0f / 3.0f * const_iepsilon6,
+        192.0f / 5.0f * const_iepsilon5, -48.0f * const_iepsilon4,
+        64.0f / 3.0f * const_iepsilon3,  0.0f,
+        0.0f,                            -1.0f / 15.0f,
+        0.0f,                            0.0f,
+        0.0f,                            0.0f,
+        0.0f,                            0.0f,
+        1.0f};
+
+/**
+ * @brief Computes the gravity cubic spline for a given distance x.
+ */
+
+__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x,
+                                                                   float *W) {
+  int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals);
+  float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)];
+  float w = coeffs[0] * x + coeffs[1];
+  for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k];
+  *W = w;
+}
+
+#ifdef VECTORIZE
+
+/**
+ * @brief Computes the gravity cubic spline for a given distance x (Vectorized
+ * version).
+ */
+
+__attribute__((always_inline))
+    INLINE static void kernel_grav_eval_vec(vector *x, vector *w) {
+
+  vector ind, c[kernel_grav_degree + 1];
+  int j, k;
+
+  /* Load x and get the interval id. */
+  ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale),
+                            vec_set1((float)kernel_grav_ivals)));
+
+  /* load the coefficients. */
+  for (k = 0; k < VEC_SIZE; k++)
+    for (j = 0; j < kernel_grav_degree + 1; j++)
+      c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j];
+
+  /* Init the iteration for Horner's scheme. */
+  w->v = (c[0].v * x->v) + c[1].v;
+
+  /* And we're off! */
+  for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v;
+}
+
+#endif
+
+/* Blending function stuff
+ * --------------------------------------------------------------------------------------------
+ */
+
+/* Coefficients for the blending function. */
+#define blender_degree 3
+#define blender_ivals 3
+#define blender_scale 4.0f
+static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = {
+    0.0f,   0.0f,  0.0f,   1.0f,  -32.0f, 24.0f, -6.0f, 1.5f,
+    -32.0f, 72.0f, -54.0f, 13.5f, 0.0f,   0.0f,  0.0f,  0.0f};
+
+/**
+ * @brief Computes the cubic spline blender for a given distance x.
+ */
+
+__attribute__((always_inline)) INLINE static void blender_eval(float x,
+                                                               float *W) {
+  int ind = fmin(x * blender_scale, blender_ivals);
+  float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
+  float w = coeffs[0] * x + coeffs[1];
+  for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k];
+  *W = w;
+}
+
+/**
+ * @brief Computes the cubic spline blender and its derivative for a given
+ * distance x.
+ */
+
+__attribute__((always_inline)) INLINE static void blender_deval(float x,
+                                                                float *W,
+                                                                float *dW_dx) {
+  int ind = fminf(x * blender_scale, blender_ivals);
+  float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
+  float w = coeffs[0] * x + coeffs[1];
+  float dw_dx = coeffs[0];
+  for (int k = 2; k <= blender_degree; k++) {
+    dw_dx = dw_dx * x + w;
+    w = x * w + coeffs[k];
+  }
+  *W = w;
+  *dW_dx = dw_dx;
+}
+
+#ifdef VECTORIZE
+
+/**
+ * @brief Computes the cubic spline blender and its derivative for a given
+ * distance x (Vectorized version). Gives a sensible answer only if x<2.
+ */
+
+__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x,
+                                                                   vector *w) {
+
+  vector ind, c[blender_degree + 1];
+  int j, k;
+
+  /* Load x and get the interval id. */
+  ind.m = vec_ftoi(
+      vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
+
+  /* load the coefficients. */
+  for (k = 0; k < VEC_SIZE; k++)
+    for (j = 0; j < blender_degree + 1; j++)
+      c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
+
+  /* Init the iteration for Horner's scheme. */
+  w->v = (c[0].v * x->v) + c[1].v;
+
+  /* And we're off! */
+  for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v;
+}
+
+/**
+ * @brief Computes the cubic spline blender and its derivative for a given
+ * distance x (Vectorized version). Gives a sensible answer only if x<2.
+ */
+
+__attribute__((always_inline))
+    INLINE static void blender_deval_vec(vector *x, vector *w, vector *dw_dx) {
+
+  vector ind, c[blender_degree + 1];
+  int j, k;
+
+  /* Load x and get the interval id. */
+  ind.m = vec_ftoi(
+      vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
+
+  /* load the coefficients. */
+  for (k = 0; k < VEC_SIZE; k++)
+    for (j = 0; j < blender_degree + 1; j++)
+      c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
+
+  /* Init the iteration for Horner's scheme. */
+  w->v = (c[0].v * x->v) + c[1].v;
+  dw_dx->v = c[0].v;
+
+  /* And we're off! */
+  for (int k = 2; k <= blender_degree; k++) {
+    dw_dx->v = (dw_dx->v * x->v) + w->v;
+    w->v = (x->v * w->v) + c[k].v;
+  }
+}
+
+#endif
+
+void gravity_kernel_dump(float r_max, int N);
+
+#endif  // SWIFT_KERNEL_GRAVITY_H
diff --git a/src/kernel_hydro.c b/src/kernel_hydro.c
new file mode 100644
index 0000000000000000000000000000000000000000..18a930d8ff7f792b2f9606787a6e4c547770629a
--- /dev/null
+++ b/src/kernel_hydro.c
@@ -0,0 +1,49 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <math.h>
+#include <stdio.h>
+
+#include "kernel_hydro.h"
+
+/**
+ * @brief Test the SPH kernel function by dumping it in the interval [0,1].
+ *
+ * @param N number of intervals in [0,1].
+ */
+void hydro_kernel_dump(int N) {
+
+  int k;
+  float x, w, dw_dx;
+  float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  // float dw_dx4[4] __attribute__ ((aligned (16)));
+
+  for (k = 0; k <= N; k++) {
+    x = ((float)k) / N;
+    x4[3] = x4[2];
+    x4[2] = x4[1];
+    x4[1] = x4[0];
+    x4[0] = x;
+    kernel_deval(x, &w, &dw_dx);
+    // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
+    printf(" %e %e %e %e %e %e %e\n", x, w, dw_dx, w4[0], w4[1], w4[2], w4[3]);
+  }
+}
diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h
new file mode 100644
index 0000000000000000000000000000000000000000..66f51391fb9504ba30363b1980aaad1fcc9174b7
--- /dev/null
+++ b/src/kernel_hydro.h
@@ -0,0 +1,218 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_KERNEL_HYDRO_H
+#define SWIFT_KERNEL_HYDRO_H
+
+/* Includes. */
+#include "const.h"
+#include "error.h"
+#include "inline.h"
+#include "vector.h"
+
+/* ------------------------------------------------------------------------- */
+#if defined(CUBIC_SPLINE_KERNEL)
+
+/* Coefficients for the kernel. */
+#define kernel_name "Cubic spline (M4)"
+#define kernel_degree 3 /* Degree of the polynomial */
+#define kernel_ivals 2  /* Number of branches */
+#define kernel_gamma 1.825742
+#define kernel_constant 16. * M_1_PI
+static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
+    __attribute__((aligned(16))) = {3.f,  -3.f, 0.f,  0.5f, /* 0 < u < 0.5 */
+                                    -1.f, 3.f,  -3.f, 1.f,  /* 0.5 < u < 1 */
+                                    0.f,  0.f,  0.f,  0.f}; /* 1 < u */
+
+/* ------------------------------------------------------------------------- */
+#elif defined(QUARTIC_SPLINE_KERNEL)
+
+/* Coefficients for the kernel. */
+#define kernel_name "Quartic spline (M5)"
+#define kernel_degree 4
+#define kernel_ivals 5
+#define kernel_gamma 2.018932
+#define kernel_constant 15625. * M_1_PI / 512.
+static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
+    __attribute__((aligned(16))) = {
+        6.f,  0.f,  -2.4f, 0.f,   0.368f, /* 0 < u < 0.2 */
+        -4.f, 8.f,  -4.8f, 0.32f, 0.352f, /* 0.2 < u < 0.4 */
+        -4.f, 8.f,  -4.8f, 0.32f, 0.352f, /* 0.4 < u < 0.6 */
+        1.f,  -4.f, 6.f,   -4.f,  1.f,    /* 0.6 < u < 0.8 */
+        1.f,  -4.f, 6.f,   -4.f,  1.f,    /* 0.8 < u < 1 */
+        0.f,  0.f,  0.f,   0.f,   0.f};   /* 1 < u */
+
+/* ------------------------------------------------------------------------- */
+#elif defined(QUINTIC_SPLINE_KERNEL)
+
+/* Coefficients for the kernel. */
+#define kernel_name "Quintic spline (M6)"
+#define kernel_degree 5
+#define kernel_ivals 3
+#define kernel_gamma 2.195775
+#define kernel_constant 2187. * M_1_PI / 40.
+static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
+    __attribute__((aligned(16))) = {
+        -10.f,        10.f,      0.f,
+        -2.2222222f,  0.f,       0.271604938f, /* 0 < u < 1/3 */
+        5.f,          -15.f,     16.666667f,
+        -7.77777777f, 0.925925f, 0.209876543f, /* 1/3 < u < 2/3 */
+        -1.f,         5.f,       -10.f,
+        10.f,         -5.f,      1.f, /* 2/3 < u < 1. */
+        0.f,          0.f,       0.f,
+        0.f,          0.f,       0.f}; /* 1 < u */
+
+/* ------------------------------------------------------------------------- */
+#elif defined(WENDLAND_C2_KERNEL)
+
+/* Coefficients for the kernel. */
+#define kernel_name "Wendland C2"
+#define kernel_degree 5
+#define kernel_ivals 1
+#define kernel_gamma 1.936492
+#define kernel_constant 21. * M_1_PI / 2.
+static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
+    __attribute__((aligned(16))) = {
+        4.f, -15.f, 20.f, -10.f, 0.f, 1.f,  /* 0 < u < 1 */
+        0.f, 0.f,   0.f,  0.f,   0.f, 0.f}; /* 1 < u */
+
+/* ------------------------------------------------------------------------- */
+#elif defined(WENDLAND_C4_KERNEL)
+
+/* Coefficients for the kernel. */
+#define kernel_name "Wendland C4"
+#define kernel_degree 8
+#define kernel_ivals 1
+#define kernel_gamma 2.207940
+#define kernel_constant 495. * M_1_PI / 32.
+static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
+    __attribute__((aligned(16))) = {
+        11.666667f, -64.f,       140.f, -149.333333f, 70.f,
+        0.f,        -9.3333333f, 0.f,   1.f, /* 0 < u < 1 */
+        0.f,        0.f,         0.f,   0.f,          0.f,
+        0.f,        0.f,         0.f,   0.f}; /* 1 < u */
+
+/* ------------------------------------------------------------------------- */
+#elif defined(WENDLAND_C6_KERNEL)
+
+/* Coefficients for the kernel. */
+#define kernel_name "Wendland C6"
+#define kernel_degree 11
+#define kernel_ivals 1
+#define kernel_gamma 2.449490
+#define kernel_constant 1365. * M_1_PI / 64.
+static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
+    __attribute__((aligned(16))) = {
+        32.f, -231.f, 704.f, -1155.f, 1056.f, -462.f,
+        0.f,  66.f,   0.f,   -11.f,   0.f,    1.f, /* 0 < u < 1 */
+        0.f,  0.f,    0.f,   0.f,     0.f,    0.f,
+        0.f,  0.f,    0.f,   0.f,     0.f,    0.f}; /* 1 < u */
+
+/* ------------------------------------------------------------------------- */
+#else
+
+#error "A kernel function must be chosen in const.h !!"
+
+/* ------------------------------------------------------------------------- */
+#endif
+
+/* Ok, now comes the real deal. */
+
+/* First some powers of gamma = H/h */
+#define kernel_gamma2 kernel_gamma *kernel_gamma
+#define kernel_gamma3 kernel_gamma2 *kernel_gamma
+#define kernel_gamma4 kernel_gamma3 *kernel_gamma
+#define kernel_igamma 1. / kernel_gamma
+#define kernel_igamma2 kernel_igamma *kernel_igamma
+#define kernel_igamma3 kernel_igamma2 *kernel_igamma
+#define kernel_igamma4 kernel_igamma3 *kernel_igamma
+
+/* Some powers of eta */
+#define kernel_eta3 const_eta_kernel *const_eta_kernel *const_eta_kernel
+
+/* The number of neighbours (i.e. N_ngb) */
+#define kernel_nwneigh 4.0 * M_PI *kernel_gamma3 *kernel_eta3 / 3.0
+
+/* Kernel self contribution (i.e. W(0,h)) */
+#define kernel_root \
+  (kernel_coeffs[kernel_degree]) * kernel_constant *kernel_igamma3
+
+/**
+ * @brief Computes the kernel function and its derivative.
+ *
+ * Return 0 if $u > \\gamma = H/h$
+ *
+ * @param u The ratio of the distance to the smoothing length $u = x/h$.
+ * @param W (return) The value of the kernel function $W(x,h)$.
+ * @param dW_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
+ */
+__attribute__((always_inline)) INLINE static void kernel_deval(
+    float u, float *const W, float *const dW_dx) {
+
+  /* Go to the range [0,1[ from [0,H[ */
+  const float x = u * (float)kernel_igamma;
+
+  /* Pick the correct branch of the kernel */
+  const int ind = (int)fminf(x * (float)kernel_ivals, kernel_ivals);
+  const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
+
+  /* First two terms of the polynomial ... */
+  float w = coeffs[0] * x + coeffs[1];
+  float dw_dx = coeffs[0];
+
+  /* ... and the rest of them */
+  for (int k = 2; k <= kernel_degree; k++) {
+    dw_dx = dw_dx * x + w;
+    w = x * w + coeffs[k];
+  }
+
+  /* Return everything */
+  *W = w * (float)kernel_constant * (float)kernel_igamma3;
+  *dW_dx = dw_dx * (float)kernel_constant * (float)kernel_igamma4;
+}
+
+/**
+ * @brief Computes the kernel function.
+ *
+ * @param u The ratio of the distance to the smoothing length $u = x/h$.
+ * @param W (return) The value of the kernel function $W(x,h)$.
+ */
+__attribute__((always_inline)) INLINE static void kernel_eval(float u,
+                                                              float *const W) {
+  /* Go to the range [0,1[ from [0,H[ */
+  const float x = u * (float)kernel_igamma;
+
+  /* Pick the correct branch of the kernel */
+  const int ind = (int)fminf(x * (float)kernel_ivals, kernel_ivals);
+  const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
+
+  /* First two terms of the polynomial ... */
+  float w = coeffs[0] * x + coeffs[1];
+
+  /* ... and the rest of them */
+  for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
+
+  /* Return everything */
+  *W = w * (float)kernel_constant * (float)kernel_igamma3;
+}
+
+/* Some cross-check functions */
+void hydro_kernel_dump(int N);
+
+#endif  // SWIFT_KERNEL_HYDRO_H
diff --git a/src/multipole.h b/src/multipole.h
index 91ba6df965ce9d3b088d538411b7f0a8555ba0e4..85ba44d3ce95d958b721d435ccd26b72e30a79c1 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -25,7 +25,7 @@
 /* Includes. */
 #include "const.h"
 #include "inline.h"
-#include "kernel.h"
+#include "kernel_gravity.h"
 #include "part.h"
 
 /* Some constants. */
@@ -127,7 +127,7 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mp(
 
 /* Compute the forces on both multipoles. */
 #if multipole_order == 1
-  for (k = 0; k < 3; k++) p->a[k] += dx[k] * acc;
+  for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc;
 #else
 #error( "Multipoles of order %i not yet implemented." , multipole_order )
 #endif
diff --git a/src/parallel_io.c b/src/parallel_io.c
index cffa99a0fd75566ec3e850076d15e104504eeb40..d1c739b59021f38b2259f82dd06c547e0e7c147d 100644
--- a/src/parallel_io.c
+++ b/src/parallel_io.c
@@ -178,9 +178,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
  *
  * Calls #error() if an error occurs.
  */
-void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
-                       enum DATA_TYPE type, int N, int dim, long long N_total,
-                       int mpi_rank, long long offset, char* part_c,
+void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile,
+                       char* partTypeGroupName, char* name, enum DATA_TYPE type,
+                       int N, int dim, long long N_total, int mpi_rank,
+                       long long offset, char* part_c, size_t partSize,
                        struct UnitSystem* us,
                        enum UnitConversionFactor convFactor) {
   hid_t h_data = 0, h_err = 0, h_memspace = 0, h_filespace = 0, h_plist_id = 0;
@@ -189,7 +190,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
   int i = 0, rank = 0;
   const size_t typeSize = sizeOfType(type);
   const size_t copySize = typeSize * dim;
-  const size_t partSize = sizeof(struct part);
   char* temp_c = 0;
   char buffer[150];
 
@@ -269,14 +269,16 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
   }
 
   /* Write XMF description for this data set */
-  if (mpi_rank == 0) writeXMFline(xmfFile, fileName, name, N_total, dim, type);
+  if (mpi_rank == 0)
+    writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim,
+                 type);
 
   /* Write unit conversion factors for this data set */
-  conversionString(buffer, us, convFactor);
+  units_conversion_string(buffer, us, convFactor);
   writeAttribute_d(h_data, "CGS conversion factor",
-                   conversionFactor(us, convFactor));
-  writeAttribute_f(h_data, "h-scale exponent", hFactor(us, convFactor));
-  writeAttribute_f(h_data, "a-scale exponent", aFactor(us, convFactor));
+                   units_conversion_factor(us, convFactor));
+  writeAttribute_f(h_data, "h-scale exponent", units_h_factor(us, convFactor));
+  writeAttribute_f(h_data, "a-scale exponent", units_a_factor(us, convFactor));
   writeAttribute_s(h_data, "Conversion factor", buffer);
 
   /* Free and close everything */
@@ -328,14 +330,16 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param convFactor The UnitConversionFactor for this array
  *
  */
-#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \
-                   mpi_rank, offset, field, us, convFactor)                   \
-  writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total,      \
-                    mpi_rank, offset, (char*)(&(part[0]).field), us,          \
-                    convFactor)
+#define writeArray(grp, fileName, xmfFile, pTypeGroupName, name, type, N, dim, \
+                   part, N_total, mpi_rank, offset, field, us, convFactor)     \
+  writeArrayBackEnd(grp, fileName, xmfFile, pTypeGroupName, name, type, N,     \
+                    dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \
+                    sizeof(part[0]), us, convFactor)
 
 /* Import the right hydro definition */
 #include "hydro_io.h"
+/* Import the right gravity definition */
+#include "gravity_io.h"
 
 /**
  * @brief Reads an HDF5 initial condition file (GADGET-3 type) in parallel
@@ -345,6 +349,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param parts (output) The array of #part read from the file.
  * @param N (output) The number of particles read from the file.
  * @param periodic (output) 1 if the volume is periodic, 0 if not.
+ * @param dry_run If 1, don't read the particle. Only allocates the arrays.
  *
  * Opens the HDF5 file fileName and reads the particles contained
  * in the parts array. N is the returned number of particles found
@@ -357,16 +362,17 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  *
  */
 void read_ic_parallel(char* fileName, double dim[3], struct part** parts,
-                      size_t* N, int* periodic, int mpi_rank, int mpi_size,
-                      MPI_Comm comm, MPI_Info info) {
+                      struct gpart** gparts, size_t* Ngas, size_t* Ngparts,
+                      int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm,
+                      MPI_Info info, int dry_run) {
   hid_t h_file = 0, h_grp = 0;
-  double boxSize[3] = {
-      0.0, -1.0, -1.0}; /* GADGET has only cubic boxes (in cosmological mode) */
-  int numParticles[6] = {
-      0}; /* GADGET has 6 particle types. We only keep the type 0*/
-  int numParticles_highWord[6] = {0};
-  long long offset = 0;
-  long long N_total = 0;
+  /* GADGET has only cubic boxes (in cosmological mode) */
+  double boxSize[3] = {0.0, -1.0, -1.0};
+  int numParticles[NUM_PARTICLE_TYPES] = {0};
+  int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
+  size_t N[NUM_PARTICLE_TYPES] = {0};
+  long long N_total[NUM_PARTICLE_TYPES] = {0};
+  long long offset[NUM_PARTICLE_TYPES] = {0};
 
   /* Open file */
   /* message("Opening file '%s' as IC.", fileName); */
@@ -398,58 +404,118 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts,
   readAttribute(h_grp, "NumPart_Total", UINT, numParticles);
   readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord);
 
-  N_total = ((long long)numParticles[0]) +
-            ((long long)numParticles_highWord[0] << 32);
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
+    N_total[ptype] = ((long long)numParticles[ptype]) +
+                     ((long long)numParticles_highWord[ptype] << 32);
+
   dim[0] = boxSize[0];
   dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
   dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];
 
-  /* message("Found %d particles in a %speriodic box of size [%f %f %f].",  */
-  /* 	 N_total, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */
+  /* message("Found %d particles in a %speriodic box of size
+   * [%f %f %f].",  */
+  /* 	 N_total, (periodic ? "": "non-"), dim[0],
+   * dim[1], dim[2]); */
 
   /* Divide the particles among the tasks. */
-  offset = mpi_rank * N_total / mpi_size;
-  *N = (mpi_rank + 1) * N_total / mpi_size - offset;
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) {
+    offset[ptype] = mpi_rank * N_total[ptype] / mpi_size;
+    N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype];
+  }
 
   /* Close header */
   H5Gclose(h_grp);
 
-  /* Allocate memory to store particles */
-  if (posix_memalign((void*)parts, part_align, *N * sizeof(struct part)) != 0)
+  /* Allocate memory to store SPH particles */
+  *Ngas = N[0];
+  if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) !=
+      0)
     error("Error while allocating memory for particles");
-  bzero(*parts, *N * sizeof(struct part));
+  bzero(*parts, *Ngas * sizeof(struct part));
 
-  /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) /
+  /* Allocate memory to store all particles */
+  const size_t Ndm = N[1];
+  *Ngparts = N[1] + N[0];
+  if (posix_memalign((void*)gparts, gpart_align,
+                     *Ngparts * sizeof(struct gpart)) != 0)
+    error(
+        "Error while allocating memory for gravity "
+        "particles");
+  bzero(*gparts, *Ngparts * sizeof(struct gpart));
+
+  /* message("Allocated %8.2f MB for particles.", *N *
+   * sizeof(struct part) /
    * (1024.*1024.)); */
 
-  /* Open SPH particles group */
-  /* message("Reading particle arrays..."); */
-  h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT);
-  if (h_grp < 0) error("Error while opening particle group.\n");
+  /* message("BoxSize = %lf", dim[0]); */
+  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm,
+   * *Ngparts); */
 
-  /* Read particle fields into the particle structure */
-  hydro_read_particles(h_grp, *N, N_total, offset, *parts);
+  /* Loop over all particle types */
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
 
-  /* Close particle group */
-  H5Gclose(h_grp);
+    /* Don't do anything if no particle of this kind */
+    if (N_total[ptype] == 0) continue;
+
+    /* Open the particle group in the file */
+    char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
+    snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
+             ptype);
+    h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT);
+    if (h_grp < 0) {
+      error("Error while opening particle group %s.", partTypeGroupName);
+    }
+
+    /* Read particle fields into the particle structure */
+    switch (ptype) {
+
+      case GAS:
+        if (!dry_run)
+          hydro_read_particles(h_grp, N[ptype], N_total[ptype], offset[ptype],
+                               *parts);
+        break;
+
+      case DM:
+        if (!dry_run)
+          darkmatter_read_particles(h_grp, N[ptype], N_total[ptype],
+                                    offset[ptype], *gparts);
+        break;
+
+      default:
+        error("Particle Type %d not yet supported. Aborting", ptype);
+    }
+
+    /* Close particle group */
+    H5Gclose(h_grp);
+  }
+
+  /* Prepare the DM particles */
+  if (!dry_run) prepare_dm_gparts(*gparts, Ndm);
+
+  /* Now duplicate the hydro particle into gparts */
+  if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+
+  /* message("Done Reading particles..."); */
 
   /* Close property handler */
   H5Pclose(h_plist_id);
 
   /* Close file */
   H5Fclose(h_file);
-
-  /* message("Done Reading particles..."); */
 }
 
 /**
- * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor
+ * @brief Writes an HDF5 output file (GADGET-3 type) with
+ *its XMF descriptor
  *
  * @param e The engine containing all the system.
- * @param us The UnitSystem used for the conversion of units in the output
+ * @param us The UnitSystem used for the conversion of units
+ *in the output
  *
- * Creates an HDF5 output file and writes the particles contained
- * in the engine. If such a file already exists, it is erased and replaced
+ * Creates an HDF5 output file and writes the particles
+ *contained
+ * in the engine. If such a file already exists, it is
+ *erased and replaced
  * by the new one.
  * The companion XMF file is also updated accordingly.
  *
@@ -459,23 +525,27 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts,
 void write_output_parallel(struct engine* e, struct UnitSystem* us,
                            int mpi_rank, int mpi_size, MPI_Comm comm,
                            MPI_Info info) {
-
   hid_t h_file = 0, h_grp = 0, h_grpsph = 0;
-  int N = e->s->nr_parts;
+  const size_t Ngas = e->s->nr_parts;
+  const size_t Ntot = e->s->nr_gparts;
   int periodic = e->s->periodic;
-  unsigned int numParticles[6] = {N, 0};
-  unsigned int numParticlesHighWord[6] = {0};
-  unsigned int flagEntropy[6] = {0};
-  long long N_total = 0, offset = 0;
-  double offset_d = 0., N_d = 0., N_total_d = 0.;
   int numFiles = 1;
   struct part* parts = e->s->parts;
-  FILE* xmfFile = 0;
+  struct gpart* gparts = e->s->gparts;
+  struct gpart* dmparts = NULL;
   static int outputCount = 0;
+  FILE* xmfFile = 0;
+
+  /* Number of particles of each type */
+  // const size_t Ndm = Ntot - Ngas;
+
+  /* MATTHIEU: Temporary fix to preserve master */
+  const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0;
+  /* MATTHIEU: End temporary fix */
 
   /* File name */
-  char fileName[200];
-  sprintf(fileName, "output_%03i.hdf5", outputCount);
+  char fileName[FILENAME_BUFFER_SIZE];
+  snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount);
 
   /* First time, we need to create the XMF file */
   if (outputCount == 0 && mpi_rank == 0) createXMFfile();
@@ -491,21 +561,26 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us,
     error("Error while opening file '%s'.", fileName);
   }
 
-  /* Compute offset in the file and total number of particles */
-  /* Done using double to allow for up to 2^50=10^15 particles */
-  N_d = (double)N;
-  MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm);
-  N_total_d = offset_d + N_d;
-  MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm);
-  if (N_total_d > 1.e15)
-    error(
-        "Error while computing the offset for parallel output: Simulation has "
-        "more than 10^15 particles.\n");
-  N_total = (long long)N_total_d;
-  offset = (long long)offset_d;
+  /* Compute offset in the file and total number of
+   * particles */
+  size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0};
+  long long N_total[NUM_PARTICLE_TYPES] = {0};
+  long long offset[NUM_PARTICLE_TYPES] = {0};
+  MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm);
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
+    N_total[ptype] = offset[ptype] + N[ptype];
+
+  /* The last rank now has the correct N_total. Let's
+   * broadcast from there */
+  MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm);
 
-  /* Write the part of the XMF file corresponding to this specific output */
-  if (mpi_rank == 0) writeXMFheader(xmfFile, N_total, fileName, e->time);
+  /* Now everybody konws its offset and the total number of
+   * particles of each
+   * type */
+
+  /* Write the part of the XMF file corresponding to this
+   * specific output */
+  if (mpi_rank == 0) writeXMFoutputheader(xmfFile, fileName, e->time);
 
   /* Open header to write simulation properties */
   /* message("Writing runtime parameters..."); */
@@ -526,19 +601,28 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us,
 
   /* Print the relevant information and print status */
   writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
-  writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6);
   double dblTime = e->time;
   writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1);
 
   /* GADGET-2 legacy values */
-  numParticles[0] = (unsigned int)N_total;
-  writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6);
-  numParticlesHighWord[0] = (unsigned int)(N_total >> 32);
+  /* Number of particles of each type */
+  unsigned int numParticles[NUM_PARTICLE_TYPES] = {0};
+  unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0};
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) {
+    numParticles[ptype] = (unsigned int)N_total[ptype];
+    numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32);
+  }
+  writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total,
+                 NUM_PARTICLE_TYPES);
+  writeAttribute(h_grp, "NumPart_Total", UINT, numParticles,
+                 NUM_PARTICLE_TYPES);
   writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord,
-                 6);
+                 NUM_PARTICLE_TYPES);
   double MassTable[6] = {0., 0., 0., 0., 0., 0.};
-  writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6);
-  writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6);
+  writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES);
+  unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0};
+  writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy,
+                 NUM_PARTICLE_TYPES);
   writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1);
 
   /* Close header */
@@ -556,21 +640,71 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us,
   /* Print the system of Units */
   writeUnitSystem(h_file, us);
 
-  /* Create SPH particles group */
-  /* message("Writing particle arrays..."); */
-  h_grp =
-      H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  if (h_grp < 0) error("Error while creating particle group.\n");
+  /* Loop over all particle types */
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
+
+    /* Don't do anything if no particle of this kind */
+    if (N_total[ptype] == 0) continue;
+
+    /* Add the global information for that particle type to
+     * the XMF meta-file */
+    if (mpi_rank == 0)
+      writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype);
+
+    /* Open the particle group in the file */
+    char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
+    snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
+             ptype);
+    h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT,
+                      H5P_DEFAULT);
+    if (h_grp < 0) {
+      error("Error while opening particle group %s.", partTypeGroupName);
+    }
 
-  /* Write particle fields from the particle structure */
-  hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, offset,
-                        parts, us);
+    /* Read particle fields into the particle structure */
+    switch (ptype) {
 
-  /* Close particle group */
-  H5Gclose(h_grp);
+      case GAS:
+        hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile,
+                              N[ptype], N_total[ptype], mpi_rank, offset[ptype],
+                              parts, us);
+
+        break;
+
+      case DM:
+        /* Allocate temporary array */
+        if (posix_memalign((void*)&dmparts, gpart_align,
+                           Ndm * sizeof(struct gpart)) != 0)
+          error(
+              "Error while allocating temporart memory for "
+              "DM particles");
+        bzero(dmparts, Ndm * sizeof(struct gpart));
+
+        /* Collect the DM particles from gpart */
+        collect_dm_gparts(gparts, Ntot, dmparts, Ndm);
+
+        /* Write DM particles */
+        darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile,
+                                   N[ptype], N_total[ptype], mpi_rank,
+                                   offset[ptype], dmparts, us);
+
+        /* Free temporary array */
+        free(dmparts);
+        break;
+
+      default:
+        error("Particle Type %d not yet supported. Aborting", ptype);
+    }
+
+    /* Close particle group */
+    H5Gclose(h_grp);
+
+    /* Close this particle group in the XMF file as well */
+    if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype);
+  }
 
   /* Write LXMF file descriptor */
-  if (mpi_rank == 0) writeXMFfooter(xmfFile);
+  if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time);
 
   /* message("Done writing particles..."); */
 
diff --git a/src/parallel_io.h b/src/parallel_io.h
index a0589944ec845c712abde1e64e305980748db0e7..f3691cb29b8d5e7f17382f1f81ba230c3898a929 100644
--- a/src/parallel_io.h
+++ b/src/parallel_io.h
@@ -32,8 +32,9 @@
 #if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5)
 
 void read_ic_parallel(char* fileName, double dim[3], struct part** parts,
-                      size_t* N, int* periodic, int mpi_rank, int mpi_size,
-                      MPI_Comm comm, MPI_Info info);
+                      struct gpart** gparts, size_t* Ngas, size_t* Ngparts,
+                      int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm,
+                      MPI_Info info, int dry_run);
 
 void write_output_parallel(struct engine* e, struct UnitSystem* us,
                            int mpi_rank, int mpi_size, MPI_Comm comm,
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000000000000000000000000000000000000..0f767bc434ef596df403fb12d3ae0f77ea546df3
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,493 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+/* Needs to be included so that strtok returns char * instead of a int *. */
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+/* This object's header. */
+#include "parser.h"
+
+/* Local headers. */
+#include "error.h"
+
+#define PARSER_COMMENT_STRING "#"
+#define PARSER_COMMENT_CHAR '#'
+#define PARSER_VALUE_CHAR ':'
+#define PARSER_VALUE_STRING ":"
+#define PARSER_START_OF_FILE "---"
+#define PARSER_END_OF_FILE "..."
+
+/* Private functions. */
+static int count_char(const char *str, char val);
+static int is_empty(const char *str);
+static int count_indentation(const char *str);
+static void parse_line(char *line, struct swift_params *params);
+static void parse_value(char *line, struct swift_params *params);
+static void parse_section_param(char *line, int *isFirstParam,
+                                char *sectionName, struct swift_params *params);
+
+static int lineNumber = 0;
+
+/**
+ * @brief Reads an input file and stores each parameter in a structure.
+ *
+ * @param file_name Name of file to be read
+ * @param params Structure to be populated from file
+ */
+
+void parser_read_file(const char *file_name, struct swift_params *params) {
+  /* Open file for reading */
+  FILE *file = fopen(file_name, "r");
+
+  /* Line to parsed. */
+  char line[PARSER_MAX_LINE_SIZE];
+
+  /* Initialise parameter count. */
+  params->count = 0;
+
+  /* Check if parameter file exits. */
+  if (file == NULL) {
+    error("Error opening parameter file: %s", file_name);
+  }
+
+  /* Read until the end of the file is reached.*/
+  while (!feof(file)) {
+    if (fgets(line, PARSER_MAX_LINE_SIZE, file) != NULL) {
+      lineNumber++;
+      parse_line(line, params);
+    }
+  }
+
+  fclose(file);
+}
+
+/**
+ * @brief Counts the number of times a specific character appears in a string.
+ *
+ * @param str String to be checked
+ * @param val Character to be counted
+ *
+ * @return Number of occurrences of val inside str
+ */
+
+static int count_char(const char *str, char val) {
+  int count = 0;
+
+  /* Check if the line contains the character */
+  while (*str) {
+    if (*str++ == val) ++count;
+  }
+
+  return count;
+}
+
+/**
+ * @brief Counts the number of white spaces that prefix a string.
+ *
+ * @param str String to be checked
+ *
+ * @return Number of white spaces prefixing str
+ */
+
+static int count_indentation(const char *str) {
+  int count = 0;
+
+  /* Check if the line contains the character */
+  while (*(++str) == ' ') {
+    count++;
+  }
+  return count;
+}
+
+/**
+ * @brief Checks if a string is empty.
+ *
+ * @param str String to be checked
+ *
+ * @return Returns 1 if str is empty, 0 otherwise
+ */
+
+static int is_empty(const char *str) {
+  int retParam = 1;
+  while (*str != '\0') {
+    if (!isspace(*str)) {
+      retParam = 0;
+      break;
+    }
+    str++;
+  }
+
+  return retParam;
+}
+
+/**
+ * @brief Parses a line from a file and stores any parameters in a structure.
+ *
+ * @param line Line to be parsed.
+ * @param params Structure to be populated from file.
+ */
+static void parse_line(char *line, struct swift_params *params) {
+  /* Parse line if it doesn't begin with a comment. */
+  if (*line != PARSER_COMMENT_CHAR) {
+    char trim_line[PARSER_MAX_LINE_SIZE];
+    char tmp_str[PARSER_MAX_LINE_SIZE];
+    char *token;
+
+    /* Remove comments at the end of a line. */
+    token = strtok(line, PARSER_COMMENT_STRING);
+    strcpy(tmp_str, token);
+
+    /* Check if the line is just white space. */
+    if (!is_empty(tmp_str)) {
+      /* Trim '\n' characters from string. */
+      token = strtok(tmp_str, "\n");
+      strcpy(trim_line, token);
+
+      /* Check if the line contains a value and parse it. */
+      if (strchr(trim_line, PARSER_VALUE_CHAR)) {
+        parse_value(trim_line, params);
+      }
+      /* Check for invalid lines,not including the start and end of file. */
+      /* Note: strcmp returns 0 if both strings are the same.*/
+      else if (strcmp(trim_line, PARSER_START_OF_FILE) &&
+               strcmp(trim_line, PARSER_END_OF_FILE)) {
+        error("Invalid line:%d '%s'.", lineNumber, trim_line);
+      }
+    }
+  }
+}
+
+/**
+ * @brief Performs error checking and stores a parameter in a structure.
+ *
+ * @param line Line containing the parameter
+ * @param params Structure to be written to
+ *
+ */
+
+static void parse_value(char *line, struct swift_params *params) {
+  static int inSection = 0;
+  static char section[PARSER_MAX_LINE_SIZE]; /* Keeps track of current section
+                                                name. */
+  static int isFirstParam = 1;
+  char tmpStr[PARSER_MAX_LINE_SIZE];
+
+  char *token;
+
+  /* Check for more than one value on the same line. */
+  if (count_char(line, PARSER_VALUE_CHAR) > 1) {
+    error("Inavlid line:%d '%s', only one value allowed per line.", lineNumber,
+          line);
+  }
+
+  /* Check that standalone parameters have correct indentation. */
+  if (!inSection && *line == ' ') {
+    error(
+        "Invalid line:%d '%s', standalone parameter defined with incorrect "
+        "indentation.",
+        lineNumber, line);
+  }
+
+  /* Check that it is a parameter inside a section.*/
+  if (*line == ' ' || *line == '\t') {
+    parse_section_param(line, &isFirstParam, section, params);
+  } else {/*Else it is the start of a new section or standalone parameter. */
+    /* Take first token as the parameter name. */
+    token = strtok(line, " :\t");
+    strcpy(tmpStr, token);
+
+    /* Take second token as the parameter value. */
+    token = strtok(NULL, " #\n");
+
+    /* If second token is NULL then the line must be a section heading. */
+    if (token == NULL) {
+      strcat(tmpStr, PARSER_VALUE_STRING);
+      strcpy(section, tmpStr);
+      inSection = 1;
+      isFirstParam = 1;
+    } else {
+      /* Must be a standalone parameter so no need to prefix name with a
+       * section. */
+      strcpy(params->data[params->count].name, tmpStr);
+      strcpy(params->data[params->count++].value, token);
+      inSection = 0;
+      isFirstParam = 1;
+    }
+  }
+}
+
+/**
+ * @brief Parses a parameter that appears in a section and stores it in a
+ *structure.
+ *
+ * @param line Line containing the parameter
+ * @param isFirstParam Shows if the first parameter of a section has been found
+ * @param sectionName String containing the current section name
+ * @param params Structure to be written to
+ *
+ */
+
+static void parse_section_param(char *line, int *isFirstParam,
+                                char *sectionName,
+                                struct swift_params *params) {
+  static int sectionIndent = 0;
+  char tmpStr[PARSER_MAX_LINE_SIZE];
+  char paramName[PARSER_MAX_LINE_SIZE];
+  char *token;
+
+  /* Count indentation of each parameter and check that it
+   * is consistent with the first parameter in the section. */
+  if (*isFirstParam) {
+    sectionIndent = count_indentation(line);
+    *isFirstParam = 0;
+  } else if (count_indentation(line) != sectionIndent) {
+    error("Invalid line:%d '%s', parameter has incorrect indentation.",
+          lineNumber, line);
+  }
+
+  /* Take first token as the parameter name and trim leading white space. */
+  token = strtok(line, " :\t");
+  strcpy(tmpStr, token);
+
+  /* Take second token as the parameter value. */
+  token = strtok(NULL, " #\n");
+
+  /* Prefix the parameter name with its section name and
+   * copy it into the parameter structure. */
+  strcpy(paramName, sectionName);
+  strcat(paramName, tmpStr);
+  strcpy(params->data[params->count].name, paramName);
+  strcpy(params->data[params->count++].value, token);
+}
+
+/**
+ * @brief Retrieve integer parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @return Value of the parameter found
+ */
+int parser_get_param_int(const struct swift_params *params, const char *name) {
+
+  char str[PARSER_MAX_LINE_SIZE];
+  int retParam = 0;
+
+  for (int i = 0; i < params->count; i++) {
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%d%s", &retParam, str) != 1) {
+        error(
+            "Tried parsing int '%s' but found '%s' with illegal integer "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+
+      return retParam;
+    }
+  }
+
+  error("Cannot find '%s' in the structure.", name);
+  return 0;
+}
+
+/**
+ * @brief Retrieve char parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @return Value of the parameter found
+ */
+char parser_get_param_char(const struct swift_params *params,
+                           const char *name) {
+
+  char str[PARSER_MAX_LINE_SIZE];
+  char retParam = 0;
+
+  for (int i = 0; i < params->count; i++) {
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%c%s", &retParam, str) != 1) {
+        error(
+            "Tried parsing char '%s' but found '%s' with illegal char "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+
+      return retParam;
+    }
+  }
+
+  error("Cannot find '%s' in the structure.", name);
+  return 0;
+}
+
+/**
+ * @brief Retrieve float parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @return Value of the parameter found
+ */
+float parser_get_param_float(const struct swift_params *params,
+                             const char *name) {
+
+  char str[PARSER_MAX_LINE_SIZE];
+  float retParam = 0.f;
+
+  for (int i = 0; i < params->count; i++) {
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%f%s", &retParam, str) != 1) {
+        error(
+            "Tried parsing float '%s' but found '%s' with illegal float "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+
+      return retParam;
+    }
+  }
+
+  error("Cannot find '%s' in the structure.", name);
+  return 0.f;
+}
+
+/**
+ * @brief Retrieve double parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @return Value of the parameter found
+ */
+double parser_get_param_double(const struct swift_params *params,
+                               const char *name) {
+
+  char str[PARSER_MAX_LINE_SIZE];
+  double retParam = 0.;
+
+  for (int i = 0; i < params->count; i++) {
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+      /* Check that exactly one number is parsed. */
+      if (sscanf(params->data[i].value, "%lf%s", &retParam, str) != 1) {
+        error(
+            "Tried parsing double '%s' but found '%s' with illegal double "
+            "characters '%s'.",
+            params->data[i].name, params->data[i].value, str);
+      }
+      return retParam;
+    }
+  }
+
+  error("Cannot find '%s' in the structure.", name);
+  return 0.;
+}
+
+/**
+ * @brief Retrieve string parameter from structure.
+ *
+ * @param params Structure that holds the parameters
+ * @param name Name of the parameter to be found
+ * @param retParam (return) Value of the parameter found
+ */
+void parser_get_param_string(const struct swift_params *params,
+                             const char *name, char *retParam) {
+  for (int i = 0; i < params->count; i++) {
+    /*strcmp returns 0 if both strings are the same.*/
+    if (!strcmp(name, params->data[i].name)) {
+      strcpy(retParam, params->data[i].value);
+      return;
+    }
+  }
+
+  error("Cannot find '%s' in the structure.", name);
+}
+
+/**
+ * @brief Prints the contents of the parameter structure.
+ *
+ * @param params Structure that holds the parameters
+ */
+void parser_print_params(const struct swift_params *params) {
+  printf("\n--------------------------\n");
+  printf("|  SWIFT Parameter File  |\n");
+  printf("--------------------------\n");
+
+  for (int i = 0; i < params->count; i++) {
+    printf("Parameter name: %s\n", params->data[i].name);
+    printf("Parameter value: %s\n", params->data[i].value);
+  }
+}
+
+/**
+ * @brief Write the contents of the parameter structure to a file in YAML
+ *format.
+ *
+ * @param params Structure that holds the parameters
+ * @param file_name Name of file to be written
+ */
+void parser_write_params_to_file(const struct swift_params *params,
+                                 const char *file_name) {
+  FILE *file = fopen(file_name, "w");
+  char section[PARSER_MAX_LINE_SIZE];
+  char param_name[PARSER_MAX_LINE_SIZE];
+  char *token;
+
+  /* Start of file identifier in YAML. */
+  fprintf(file, "%s\n", PARSER_START_OF_FILE);
+
+  for (int i = 0; i < params->count; i++) {
+    /* Check that the parameter name contains a section name. */
+    if (strchr(params->data[i].name, PARSER_VALUE_CHAR)) {
+      /* Copy the parameter name into a temporary string and find the section
+       * name. */
+      strcpy(param_name, params->data[i].name);
+      token = strtok(param_name, PARSER_VALUE_STRING);
+
+      /* If a new section name is found print it to the file. */
+      if (strcmp(token, section)) {
+        strcpy(section, token);
+        fprintf(file, "\n%s%c\n", section, PARSER_VALUE_CHAR);
+      }
+
+      /* Remove white space from parameter name and write it to the file. */
+      token = strtok(NULL, " #\n");
+
+      fprintf(file, "\t%s%c %s\n", token, PARSER_VALUE_CHAR,
+              params->data[i].value);
+    } else {
+      fprintf(file, "\n%s%c %s\n", params->data[i].name, PARSER_VALUE_CHAR,
+              params->data[i].value);
+    }
+  }
+
+  /* End of file identifier in YAML. */
+  fprintf(file, PARSER_END_OF_FILE);
+
+  fclose(file);
+}
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000000000000000000000000000000000000..7b2088ae12cdd5136a96baeabd01dd80255c8a3b
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_PARSER_H
+#define SWIFT_PARSER_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some constants. */
+#define PARSER_MAX_LINE_SIZE 256
+#define PARSER_MAX_NO_OF_PARAMS 512
+
+/* A parameter in the input file */
+struct parameter {
+  char name[PARSER_MAX_LINE_SIZE];
+  char value[PARSER_MAX_LINE_SIZE];
+};
+
+/* The array of parameters read from a file */
+struct swift_params {
+  struct parameter data[PARSER_MAX_NO_OF_PARAMS];
+  int count;
+};
+
+/* Public API. */
+void parser_read_file(const char *file_name, struct swift_params *params);
+void parser_print_params(const struct swift_params *params);
+void parser_write_params_to_file(const struct swift_params *params,
+                                 const char *file_name);
+char parser_get_param_char(const struct swift_params *params, const char *name);
+int parser_get_param_int(const struct swift_params *params, const char *name);
+float parser_get_param_float(const struct swift_params *params,
+                             const char *name);
+double parser_get_param_double(const struct swift_params *params,
+                               const char *name);
+void parser_get_param_string(const struct swift_params *params,
+                             const char *name, char *retParam);
+
+#endif /* SWIFT_PARSER_H */
diff --git a/src/part.c b/src/part.c
index 6a99325ef23a7062fafb387fa3f3bd6b2203d057..b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21 100644
--- a/src/part.c
+++ b/src/part.c
@@ -26,33 +26,21 @@
 #endif
 
 /* This object's header. */
+#include "error.h"
 #include "part.h"
 
 #ifdef WITH_MPI
-/**
- * @brief Registers and returns an MPI type for the particles
- *
- * @param part_type The type container
- */
-void part_create_mpi_type(MPI_Datatype* part_type) {
-
-  /* This is not the recommended way of doing this.
-     One should define the structure field by field
-     But as long as we don't do serialization via MPI-IO
-     we don't really care.
-     Also we would have to modify this function everytime something
-     is added to the part structure. */
-  MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE,
-                      part_type);
-  MPI_Type_commit(part_type);
-}
+/* MPI data type for the particle transfers */
+MPI_Datatype part_mpi_type;
+MPI_Datatype xpart_mpi_type;
+MPI_Datatype gpart_mpi_type;
+#endif
 
+#ifdef WITH_MPI
 /**
- * @brief Registers and returns an MPI type for the xparticles
- *
- * @param xpart_type The type container
+ * @brief Registers MPI particle types.
  */
-void xpart_create_mpi_type(MPI_Datatype* xpart_type) {
+void part_create_mpi_types() {
 
   /* This is not the recommended way of doing this.
      One should define the structure field by field
@@ -60,9 +48,20 @@ void xpart_create_mpi_type(MPI_Datatype* xpart_type) {
      we don't really care.
      Also we would have to modify this function everytime something
      is added to the part structure. */
-  MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE,
-                      xpart_type);
-  MPI_Type_commit(xpart_type);
+  if (MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE,
+                          &part_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&part_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for parts.");
+  }
+  if (MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char),
+                          MPI_BYTE, &xpart_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&xpart_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for xparts.");
+  }
+  if (MPI_Type_contiguous(sizeof(struct gpart) / sizeof(unsigned char),
+                          MPI_BYTE, &gpart_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for gparts.");
+  }
 }
-
 #endif
diff --git a/src/part.h b/src/part.h
index 865403e8c2c157dc5a8ff7a32bc41be676d7919b..5d4c9c88a1acadea3d23a3df618c04da389fb61d 100644
--- a/src/part.h
+++ b/src/part.h
@@ -35,8 +35,8 @@
 
 /* Some constants. */
 #define part_align 64
-#define gpart_align 32
 #define xpart_align 32
+#define gpart_align 32
 
 /* Import the right particle definition */
 #if defined(MINIMAL_SPH)
@@ -52,8 +52,12 @@
 #include "./gravity/Default/gravity_part.h"
 
 #ifdef WITH_MPI
-void part_create_mpi_type(MPI_Datatype* part_type);
-void xpart_create_mpi_type(MPI_Datatype* xpart_type);
+/* MPI data type for the particle transfers */
+extern MPI_Datatype part_mpi_type;
+extern MPI_Datatype xpart_mpi_type;
+extern MPI_Datatype gpart_mpi_type;
+
+void part_create_mpi_types();
 #endif
 
 #endif /* SWIFT_PART_H */
diff --git a/src/partition.c b/src/partition.c
index 7dbbb9552e603adee45097a379200f1493ce3349..1e5202df7df45fab9182ad625ae145e6fd221ebd 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -35,7 +35,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
-#include <values.h>
+#include <float.h>
 
 /* MPI headers. */
 #ifdef WITH_MPI
@@ -52,6 +52,7 @@
 #include "error.h"
 #include "partition.h"
 #include "space.h"
+#include "tools.h"
 
 /* Maximum weight used for METIS. */
 #define metis_maxweight 10000.0f
@@ -424,7 +425,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
    * assume the same graph structure as used in the part_ calls). */
   int nr_cells = s->nr_cells;
   struct cell *cells = s->cells;
-  float wscale = 1e-3, vscale = 1e-3, wscale_buff;
+  float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0;
   int wtot = 0;
   int wmax = 1e9 / nr_nodes;
   int wmin;
@@ -659,6 +660,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
   split_metis(s, nr_nodes, celllist);
 
   /* Clean up. */
+  free(inds);
   if (bothweights) free(weights_v);
   free(weights_e);
   free(celllist);
@@ -907,6 +909,111 @@ void partition_initial_partition(struct partition *initial_partition,
   }
 }
 
+/**
+ * @brief Initialises the partition and re-partition scheme from the parameter
+ *file
+ *
+ * @param partition The #partition scheme to initialise.
+ * @param reparttype The repartition scheme to initialise.
+ * @param params The parsed parameter file.
+ * @param nr_nodes The number of MPI nodes we are running on.
+ */
+void partition_init(struct partition *partition,
+                    enum repartition_type *reparttype,
+                    const struct swift_params *params, int nr_nodes) {
+
+#ifdef WITH_MPI
+
+/* Defaults make use of METIS if available */
+#ifdef HAVE_METIS
+  *reparttype = REPART_METIS_BOTH;
+  partition->type = INITPART_METIS_NOWEIGHT;
+#else
+  *reparttype = REPART_NONE;
+  partition->type = INITPART_GRID;
+#endif
+
+  /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
+  factor(nr_nodes, &partition->grid[0], &partition->grid[1]);
+  factor(nr_nodes / partition->grid[1], &partition->grid[0],
+         &partition->grid[2]);
+  factor(partition->grid[0] * partition->grid[1], &partition->grid[1],
+         &partition->grid[0]);
+
+  /* Now let's check what the user wants as an initial domain*/
+  const char part_type =
+      parser_get_param_char(params, "DomainDecomposition:initial_type");
+
+  switch (part_type) {
+    case 'g':
+      partition->type = INITPART_GRID;
+      break;
+    case 'v':
+      partition->type = INITPART_VECTORIZE;
+      break;
+#ifdef HAVE_METIS
+    case 'm':
+      partition->type = INITPART_METIS_NOWEIGHT;
+      break;
+    case 'w':
+      partition->type = INITPART_METIS_WEIGHT;
+      break;
+    default:
+      message("Invalid choice of initial partition type '%c'.", part_type);
+      error("Permitted values are: 'g','m','v' or 'w'.");
+#else
+    default:
+      message("Invalid choice of initial partition type '%c'.", part_type);
+      error("Permitted values are: 'g' or 'v' when compiled without metis.");
+#endif
+  }
+
+  /* In case of grid, read more parameters */
+  if (part_type == 'g') {
+    partition->grid[0] =
+        parser_get_param_int(params, "DomainDecomposition:initial_grid_x");
+    partition->grid[1] =
+        parser_get_param_int(params, "DomainDecomposition:initial_grid_y");
+    partition->grid[2] =
+        parser_get_param_int(params, "DomainDecomposition:initial_grid_z");
+  }
+
+  /* Now let's check what the user wants as a repartition strategy */
+  const char repart_type =
+      parser_get_param_char(params, "DomainDecomposition:repartition_type");
+
+  switch (repart_type) {
+    case 'n':
+      *reparttype = REPART_NONE;
+      break;
+#ifdef HAVE_METIS
+    case 'b':
+      *reparttype = REPART_METIS_BOTH;
+      break;
+    case 'e':
+      *reparttype = REPART_METIS_EDGE;
+      break;
+    case 'v':
+      *reparttype = REPART_METIS_VERTEX;
+      break;
+    case 'x':
+      *reparttype = REPART_METIS_VERTEX_EDGE;
+      break;
+    default:
+      message("Invalid choice of re-partition type '%c'.", repart_type);
+      error("Permitted values are: 'b','e','n', 'v' or 'x'.");
+#else
+    default:
+      message("Invalid choice of re-partition type '%c'.", repart_type);
+      error("Permitted values are: 'n' when compiled without metis.");
+#endif
+  }
+
+#else
+  error("SWIFT was not compiled with MPI support");
+#endif
+}
+
 /*  General support */
 /*  =============== */
 
diff --git a/src/partition.h b/src/partition.h
index 08906c765bbcf71b084829502e632e3159ebd0bf..b2a132ed48e48573949d16291f72218990589158 100644
--- a/src/partition.h
+++ b/src/partition.h
@@ -19,6 +19,7 @@
 #ifndef SWIFT_PARTITION_H
 #define SWIFT_PARTITION_H
 
+#include "parser.h"
 #include "space.h"
 #include "task.h"
 
@@ -58,4 +59,8 @@ void partition_initial_partition(struct partition *initial_partition,
 
 int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeID,
                              struct space *s);
+void partition_init(struct partition *partition,
+                    enum repartition_type *reparttypestruct,
+                    const struct swift_params *params, int nr_nodes);
+
 #endif /* SWIFT_PARTITION_H */
diff --git a/src/proxy.c b/src/proxy.c
index 7d2e546bf945ca18c2195ea2801d1b2058cb2f58..02263a5653bdcdd2d1bf0a86523ed1a599d4bf21 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -50,11 +50,9 @@ void proxy_cells_exch1(struct proxy *p) {
 
 #ifdef WITH_MPI
 
-  int k, ind;
-
   /* Get the number of pcells we will need to send. */
   p->size_pcells_out = 0;
-  for (k = 0; k < p->nr_cells_out; k++)
+  for (int k = 0; k < p->nr_cells_out; k++)
     p->size_pcells_out += p->cells_out[k]->pcell_size;
 
   /* Send the number of pcells. */
@@ -70,7 +68,7 @@ void proxy_cells_exch1(struct proxy *p) {
   if ((p->pcells_out = malloc(sizeof(struct pcell) * p->size_pcells_out)) ==
       NULL)
     error("Failed to allocate pcell_out buffer.");
-  for (ind = 0, k = 0; k < p->nr_cells_out; k++) {
+  for (int ind = 0, k = 0; k < p->nr_cells_out; k++) {
     memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell,
            sizeof(struct pcell) * p->cells_out[k]->pcell_size);
     ind += p->cells_out[k]->pcell_size;
@@ -131,16 +129,14 @@ void proxy_cells_exch2(struct proxy *p) {
 
 void proxy_addcell_in(struct proxy *p, struct cell *c) {
 
-  int k;
-  struct cell **temp;
-
   /* Check if the cell is already registered with the proxy. */
-  for (k = 0; k < p->nr_cells_in; k++)
+  for (int k = 0; k < p->nr_cells_in; k++)
     if (p->cells_in[k] == c) return;
 
   /* Do we need to grow the number of in cells? */
   if (p->nr_cells_in == p->size_cells_in) {
     p->size_cells_in *= proxy_buffgrow;
+    struct cell **temp;
     if ((temp = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL)
       error("Failed to allocate incoming cell list.");
     memcpy(temp, p->cells_in, sizeof(struct cell *) * p->nr_cells_in);
@@ -162,16 +158,14 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) {
 
 void proxy_addcell_out(struct proxy *p, struct cell *c) {
 
-  int k;
-  struct cell **temp;
-
   /* Check if the cell is already registered with the proxy. */
-  for (k = 0; k < p->nr_cells_out; k++)
+  for (int k = 0; k < p->nr_cells_out; k++)
     if (p->cells_out[k] == c) return;
 
   /* Do we need to grow the number of out cells? */
   if (p->nr_cells_out == p->size_cells_out) {
     p->size_cells_out *= proxy_buffgrow;
+    struct cell **temp;
     if ((temp = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL)
       error("Failed to allocate outgoing cell list.");
     memcpy(temp, p->cells_out, sizeof(struct cell *) * p->nr_cells_out);
@@ -195,20 +189,21 @@ void proxy_parts_exch1(struct proxy *p) {
 #ifdef WITH_MPI
 
   /* Send the number of particles. */
-  if (MPI_Isend(&p->nr_parts_out, 1, MPI_INT, p->nodeID,
+  p->buff_out[0] = p->nr_parts_out;
+  p->buff_out[1] = p->nr_gparts_out;
+  if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID,
                 p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD,
                 &p->req_parts_count_out) != MPI_SUCCESS)
     error("Failed to isend nr of parts.");
-  // message( "isent particle count (%i) from node %i to node %i." ,
-  // p->nr_parts_out , p->mynodeID , p->nodeID ); fflush(stdout);
+  /* message( "isent particle counts [%i, %i] from node %i to node %i." ,
+  p->buff_out[0], p->buff_out[1], p->mynodeID , p->nodeID ); fflush(stdout); */
 
   /* Send the particle buffers. */
   if (p->nr_parts_out > 0) {
-    if (MPI_Isend(p->parts_out, sizeof(struct part) * p->nr_parts_out, MPI_BYTE,
-                  p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_parts,
+    if (MPI_Isend(p->parts_out, p->nr_parts_out, part_mpi_type, p->nodeID,
+                  p->mynodeID * proxy_tag_shift + proxy_tag_parts,
                   MPI_COMM_WORLD, &p->req_parts_out) != MPI_SUCCESS ||
-        MPI_Isend(p->xparts_out, sizeof(struct xpart) * p->nr_parts_out,
-                  MPI_BYTE, p->nodeID,
+        MPI_Isend(p->xparts_out, p->nr_parts_out, xpart_mpi_type, p->nodeID,
                   p->mynodeID * proxy_tag_shift + proxy_tag_xparts,
                   MPI_COMM_WORLD, &p->req_xparts_out) != MPI_SUCCESS)
       error("Failed to isend part data.");
@@ -219,14 +214,20 @@ void proxy_parts_exch1(struct proxy *p) {
               p->parts_out[k].id, p->parts_out[k].x[0], p->parts_out[k].x[1],
               p->parts_out[k].x[2], p->parts_out[k].h, p->nodeID);*/
   }
+  if (p->nr_gparts_out > 0) {
+    if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID,
+                  p->mynodeID * proxy_tag_shift + proxy_tag_gparts,
+                  MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS)
+      error("Failed to isend part data.");
+    // message( "isent gpart data (%i) to node %i." , p->nr_parts_out ,
+    // p->nodeID ); fflush(stdout);
+  }
 
   /* Receive the number of particles. */
-  if (MPI_Irecv(&p->nr_parts_in, 1, MPI_INT, p->nodeID,
+  if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID,
                 p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD,
                 &p->req_parts_count_in) != MPI_SUCCESS)
     error("Failed to irecv nr of parts.");
-// message( "irecv particle count on node %i from node %i." , p->mynodeID ,
-// p->nodeID ); fflush(stdout);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -237,6 +238,10 @@ void proxy_parts_exch2(struct proxy *p) {
 
 #ifdef WITH_MPI
 
+  /* Unpack the incomming parts counts. */
+  p->nr_parts_in = p->buff_in[0];
+  p->nr_gparts_in = p->buff_in[1];
+
   /* Is there enough space in the buffer? */
   if (p->nr_parts_in > p->size_parts_in) {
     do {
@@ -250,19 +255,36 @@ void proxy_parts_exch2(struct proxy *p) {
                                                p->size_parts_in)) == NULL)
       error("Failed to re-allocate parts_in buffers.");
   }
+  if (p->nr_gparts_in > p->size_gparts_in) {
+    do {
+      p->size_gparts_in *= proxy_buffgrow;
+    } while (p->nr_gparts_in > p->size_gparts_in);
+    free(p->gparts_in);
+    if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) *
+                                               p->size_gparts_in)) == NULL)
+      error("Failed to re-allocate gparts_in buffers.");
+  }
 
   /* Receive the particle buffers. */
   if (p->nr_parts_in > 0) {
-    if (MPI_Irecv(p->parts_in, sizeof(struct part) * p->nr_parts_in, MPI_BYTE,
-                  p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_parts,
-                  MPI_COMM_WORLD, &p->req_parts_in) != MPI_SUCCESS ||
-        MPI_Irecv(p->xparts_in, sizeof(struct xpart) * p->nr_parts_in, MPI_BYTE,
-                  p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_xparts,
+    if (MPI_Irecv(p->parts_in, p->nr_parts_in, part_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD,
+                  &p->req_parts_in) != MPI_SUCCESS ||
+        MPI_Irecv(p->xparts_in, p->nr_parts_in, xpart_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_xparts,
                   MPI_COMM_WORLD, &p->req_xparts_in) != MPI_SUCCESS)
       error("Failed to irecv part data.");
     // message( "irecv particle data (%i) from node %i." , p->nr_parts_in ,
     // p->nodeID ); fflush(stdout);
   }
+  if (p->nr_gparts_in > 0) {
+    if (MPI_Irecv(p->gparts_in, p->nr_gparts_in, gpart_mpi_type, p->nodeID,
+                  p->nodeID * proxy_tag_shift + proxy_tag_gparts,
+                  MPI_COMM_WORLD, &p->req_gparts_in) != MPI_SUCCESS)
+      error("Failed to irecv gpart data.");
+    // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in ,
+    // p->nodeID ); fflush(stdout);
+  }
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -278,8 +300,8 @@ void proxy_parts_exch2(struct proxy *p) {
  * @param N The number of parts.
  */
 
-void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts,
-                      int N) {
+void proxy_parts_load(struct proxy *p, const struct part *parts,
+                      const struct xpart *xparts, int N) {
 
   /* Is there enough space in the buffer? */
   if (p->nr_parts_out + N > p->size_parts_out) {
@@ -309,6 +331,37 @@ void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts,
   p->nr_parts_out += N;
 }
 
+/**
+ * @brief Load parts onto a proxy for exchange.
+ *
+ * @param p The #proxy.
+ * @param gparts Pointer to an array of #gpart to send.
+ * @param N The number of parts.
+ */
+
+void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) {
+
+  /* Is there enough space in the buffer? */
+  if (p->nr_gparts_out + N > p->size_gparts_out) {
+    do {
+      p->size_gparts_out *= proxy_buffgrow;
+    } while (p->nr_gparts_out + N > p->size_gparts_out);
+    struct gpart *tp;
+    if ((tp = (struct gpart *)malloc(sizeof(struct gpart) *
+                                     p->size_gparts_out)) == NULL)
+      error("Failed to re-allocate gparts_out buffers.");
+    memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out);
+    free(p->gparts_out);
+    p->gparts_out = tp;
+  }
+
+  /* Copy the parts and xparts data to the buffer. */
+  memcpy(&p->gparts_out[p->nr_gparts_out], gparts, sizeof(struct gpart) * N);
+
+  /* Increase the counters. */
+  p->nr_gparts_out += N;
+}
+
 /**
  * @brief Initialize the given proxy.
  *
@@ -358,4 +411,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) {
       error("Failed to allocate parts_out buffers.");
   }
   p->nr_parts_out = 0;
+
+  /* Allocate the gpart send and receive buffers, if needed. */
+  if (p->gparts_in == NULL) {
+    p->size_gparts_in = proxy_buffinit;
+    if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) *
+                                               p->size_gparts_in)) == NULL)
+      error("Failed to allocate gparts_in buffers.");
+  }
+  p->nr_gparts_in = 0;
+  if (p->gparts_out == NULL) {
+    p->size_gparts_out = proxy_buffinit;
+    if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) *
+                                                p->size_gparts_out)) == NULL)
+      error("Failed to allocate gparts_out buffers.");
+  }
+  p->nr_gparts_out = 0;
 }
diff --git a/src/proxy.h b/src/proxy.h
index 3cd33e0f0819ee1ecac53213630445b39c809dea..5a747187e05a78a109ce4523ebb3c9d5fe2ad717 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -32,7 +32,8 @@
 #define proxy_tag_count 0
 #define proxy_tag_parts 1
 #define proxy_tag_xparts 2
-#define proxy_tag_cells 3
+#define proxy_tag_gparts 3
+#define proxy_tag_cells 4
 
 /* Data structure for the proxy. */
 struct proxy {
@@ -53,14 +54,21 @@ struct proxy {
   /* The parts and xparts buffers for input and output. */
   struct part *parts_in, *parts_out;
   struct xpart *xparts_in, *xparts_out;
+  struct gpart *gparts_in, *gparts_out;
   int size_parts_in, size_parts_out;
   int nr_parts_in, nr_parts_out;
+  int size_gparts_in, size_gparts_out;
+  int nr_gparts_in, nr_gparts_out;
+
+  /* Buffer to hold the incomming/outgoing particle counts. */
+  int buff_out[2], buff_in[2];
 
 /* MPI request handles. */
 #ifdef WITH_MPI
   MPI_Request req_parts_count_out, req_parts_count_in;
   MPI_Request req_parts_out, req_parts_in;
   MPI_Request req_xparts_out, req_xparts_in;
+  MPI_Request req_gparts_out, req_gparts_in;
   MPI_Request req_cells_count_out, req_cells_count_in;
   MPI_Request req_cells_out, req_cells_in;
 #endif
@@ -68,8 +76,9 @@ struct proxy {
 
 /* Function prototypes. */
 void proxy_init(struct proxy *p, int mynodeID, int nodeID);
-void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts,
-                      int N);
+void proxy_parts_load(struct proxy *p, const struct part *parts,
+                      const struct xpart *xparts, int N);
+void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N);
 void proxy_parts_exch1(struct proxy *p);
 void proxy_parts_exch2(struct proxy *p);
 void proxy_addcell_in(struct proxy *p, struct cell *c);
diff --git a/src/queue.c b/src/queue.c
index a7321155100df9225526c2f19fac2b99531307e4..6b788d7376ba4bdc95f1b1d918ab52a9514e7b4a 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -136,9 +136,6 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
   lock_type *qlock = &q->lock;
   struct task *res = NULL;
 
-  /* If there are no tasks, leave immediately. */
-  if (q->count == 0) return NULL;
-
   /* Grab the task lock. */
   if (blocking) {
     if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n");
@@ -146,6 +143,12 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
     if (lock_trylock(qlock) != 0) return NULL;
   }
 
+  /* If there are no tasks, leave immediately. */
+  if (q->count == 0) {
+    lock_unlock_blind(qlock);
+    return NULL;
+  }
+
   /* Set some pointers we will use often. */
   int *qtid = q->tid;
   struct task *qtasks = q->tasks;
diff --git a/src/riemann/riemann_exact.h b/src/riemann/riemann_exact.h
index 861dad9729794efb302638792fef6e3df43c700a..b768cde5f4f5dfd0463cc8a582a1af0a17607bbe 100644
--- a/src/riemann/riemann_exact.h
+++ b/src/riemann/riemann_exact.h
@@ -192,6 +192,8 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_guess_p(
  *
  * @param lower_limit Lower limit for the method (riemann_f(lower_limit) < 0)
  * @param upper_limit Upper limit for the method (riemann_f(upper_limit) > 0)
+ * @param lowf ??? Bert?
+ * @param upf  ??? Bert?
  * @param error_tol Tolerance used to decide if the solution is converged
  * @param WL Left state vector
  * @param WR Right state vector
diff --git a/src/runner.c b/src/runner.c
index 7eedb6adc72755ba12faed5429edad43d3849451..e86a2129b013398647db416df2095a55fdb7417e 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -469,8 +469,10 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) {
 
 void runner_doinit(struct runner *r, struct cell *c, int timer) {
 
-  struct part *p, *parts = c->parts;
+  struct part *const parts = c->parts;
+  struct gpart *const gparts = c->gparts;
   const int count = c->count;
+  const int gcount = c->gcount;
   const int ti_current = r->e->ti_current;
 
   TIMER_TIC;
@@ -486,7 +488,7 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) {
     for (int i = 0; i < count; i++) {
 
       /* Get a direct pointer on the part. */
-      p = &parts[i];
+      struct part *const p = &parts[i];
 
       if (p->ti_end <= ti_current) {
 
@@ -494,6 +496,19 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) {
         hydro_init_part(p);
       }
     }
+
+    /* Loop over the gparts in this cell. */
+    for (int i = 0; i < gcount; i++) {
+
+      /* Get a direct pointer on the part. */
+      struct gpart *const gp = &gparts[i];
+
+      if (gp->ti_end <= ti_current) {
+
+        /* Get ready for a density calculation */
+        gravity_init_part(gp);
+      }
+    }
   }
 
   if (timer) TIMER_TOC(timer_init);
@@ -649,7 +664,7 @@ void runner_doghost(struct runner *r, struct cell *c) {
 }
 
 /**
- * @brief Drift particles forward in time
+ * @brief Drift particles and g-particles forward in time
  *
  * @param r The runner thread.
  * @param c The cell.
@@ -658,26 +673,39 @@ void runner_doghost(struct runner *r, struct cell *c) {
 void runner_dodrift(struct runner *r, struct cell *c, int timer) {
 
   const int nr_parts = c->count;
+  const int nr_gparts = c->gcount;
   const double timeBase = r->e->timeBase;
   const double dt = (r->e->ti_current - r->e->ti_old) * timeBase;
-  const float ti_old = r->e->ti_old;
-  const float ti_current = r->e->ti_current;
-  struct part *restrict p, *restrict parts = c->parts;
-  struct xpart *restrict xp, *restrict xparts = c->xparts;
-  float dx_max = 0.f, h_max = 0.f;
-  float w;
+  const int ti_old = r->e->ti_old;
+  const int ti_current = r->e->ti_current;
+  struct part *const parts = c->parts;
+  struct xpart *const xparts = c->xparts;
+  struct gpart *const gparts = c->gparts;
+  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
 
   TIMER_TIC
 
   /* No children? */
   if (!c->split) {
 
-    /* Loop over all the particles in the cell */
+    /* Loop over all the g-particles in the cell */
+    for (int k = 0; k < nr_gparts; ++k) {
+
+      /* Get a handle on the gpart. */
+      struct gpart *const gp = &gparts[k];
+
+      /* Drift... */
+      gp->x[0] += gp->v_full[0] * dt;
+      gp->x[1] += gp->v_full[1] * dt;
+      gp->x[2] += gp->v_full[2] * dt;
+    }
+
+    /* Loop over all the particles in the cell (more work for these !) */
     for (int k = 0; k < nr_parts; k++) {
 
       /* Get a handle on the part. */
-      p = &parts[k];
-      xp = &xparts[k];
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
 
       /* Useful quantity */
       const float h_inv = 1.0f / p->h;
@@ -693,32 +721,34 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
       p->v[2] += p->a_hydro[2] * dt;
 
       /* Predict smoothing length */
-      w = p->h_dt * h_inv * dt;
-      if (fabsf(w) < 0.2f)
-        p->h *= approx_expf(w); /* 4th order expansion of exp(w) */
+      const float w1 = p->h_dt * h_inv * dt;
+      if (fabsf(w1) < 0.2f)
+        p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
       else
-        p->h *= expf(w);
+        p->h *= expf(w1);
 
       /* Predict density */
-      w = -3.0f * p->h_dt * h_inv * dt;
-      if (fabsf(w) < 0.2f)
-        p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */
+      const float w2 = -3.0f * p->h_dt * h_inv * dt;
+      if (fabsf(w2) < 0.2f)
+        p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
       else
-        p->rho *= expf(w);
+        p->rho *= expf(w2);
 
       /* Predict the values of the extra fields */
       hydro_predict_extra(p, xp, ti_old, ti_current, timeBase);
 
-      /* Compute motion since last cell construction */
-      const float dx =
-          sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) +
-                (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) +
-                (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]));
-      dx_max = fmaxf(dx_max, dx);
+      /* Compute (square of) motion since last cell construction */
+      const float dx2 = (p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) +
+                        (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) +
+                        (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]);
+      dx2_max = fmaxf(dx2_max, dx2);
 
       /* Maximal smoothing length */
       h_max = fmaxf(p->h, h_max);
     }
+
+    /* Now, get the maximal particle motion from its square */
+    dx_max = sqrtf(dx2_max);
   }
 
   /* Otherwise, aggregate data from children. */
@@ -758,37 +788,105 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
   const double timeBase = r->e->timeBase;
   const double timeBase_inv = 1.0 / r->e->timeBase;
   const int count = c->count;
+  const int gcount = c->gcount;
+  struct part *const parts = c->parts;
+  struct xpart *const xparts = c->xparts;
+  struct gpart *const gparts = c->gparts;
   const int is_fixdt =
       (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt;
 
-  int new_dti;
-  int dti_timeline;
-
-  int updated = 0;
+  int updated = 0, g_updated = 0;
   int ti_end_min = max_nr_timesteps, ti_end_max = 0;
   double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0;
   float mom[3] = {0.0f, 0.0f, 0.0f};
   float ang[3] = {0.0f, 0.0f, 0.0f};
-  float x[3], v_full[3];
-  struct part *restrict p, *restrict parts = c->parts;
-  struct xpart *restrict xp, *restrict xparts = c->xparts;
 
   TIMER_TIC
 
   /* No children? */
   if (!c->split) {
 
+    /* Loop over the g-particles and kick the active ones. */
+    for (int k = 0; k < gcount; k++) {
+
+      /* Get a handle on the part. */
+      struct gpart *const gp = &gparts[k];
+
+      /* If the g-particle has no counterpart and needs to be kicked */
+      if (gp->id < 0 && (is_fixdt || gp->ti_end <= ti_current)) {
+
+        /* First, finish the force calculation */
+        gravity_end_force(gp);
+
+        /* Now we are ready to compute the next time-step size */
+        int new_dti;
+
+        if (is_fixdt) {
+
+          /* Now we have a time step, proceed with the kick */
+          new_dti = global_dt_max * timeBase_inv;
+
+        } else {
+
+          /* Compute the next timestep (gravity condition) */
+          float new_dt = gravity_compute_timestep(gp);
+
+          /* Limit timestep within the allowed range */
+          new_dt = fminf(new_dt, global_dt_max);
+          new_dt = fmaxf(new_dt, global_dt_min);
+
+          /* Convert to integer time */
+          new_dti = new_dt * timeBase_inv;
+
+          /* Recover the current timestep */
+          const int current_dti = gp->ti_end - gp->ti_begin;
+
+          /* Limit timestep increase */
+          if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);
+
+          /* Put this timestep on the time line */
+          int dti_timeline = max_nr_timesteps;
+          while (new_dti < dti_timeline) dti_timeline /= 2;
+
+          /* Now we have a time step, proceed with the kick */
+          new_dti = dti_timeline;
+        }
+
+        /* Compute the time step for this kick */
+        const int ti_start = (gp->ti_begin + gp->ti_end) / 2;
+        const int ti_end = gp->ti_end + new_dti / 2;
+        const double dt = (ti_end - ti_start) * timeBase;
+        const double half_dt = (ti_end - gp->ti_end) * timeBase;
+
+        /* Move particle forward in time */
+        gp->ti_begin = gp->ti_end;
+        gp->ti_end = gp->ti_begin + new_dti;
+
+        /* Kick particles in momentum space */
+        gp->v_full[0] += gp->a_grav[0] * dt;
+        gp->v_full[1] += gp->a_grav[1] * dt;
+        gp->v_full[2] += gp->a_grav[2] * dt;
+
+        /* Extra kick work */
+        gravity_kick_extra(gp, dt, half_dt);
+
+        /* Number of updated g-particles */
+        g_updated++;
+      }
+
+      /* Minimal time for next end of time-step */
+      ti_end_min = min(gp->ti_end, ti_end_min);
+      ti_end_max = max(gp->ti_end, ti_end_max);
+    }
+
+    /* Now do the hydro ones... */
+
     /* Loop over the particles and kick the active ones. */
     for (int k = 0; k < count; k++) {
 
       /* Get a handle on the part. */
-      p = &parts[k];
-      xp = &xparts[k];
-
-      const float m = p->mass;
-      x[0] = p->x[0];
-      x[1] = p->x[1];
-      x[2] = p->x[2];
+      struct part *const p = &parts[k];
+      struct xpart *const xp = &xparts[k];
 
       /* If particle needs to be kicked */
       if (is_fixdt || p->ti_end <= ti_current) {
@@ -798,8 +896,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
 
         /* And do the same of the extra variable */
         hydro_end_force(p);
+        if (p->gpart != NULL) gravity_end_force(p->gpart);
 
         /* Now we are ready to compute the next time-step size */
+        int new_dti;
 
         if (is_fixdt) {
 
@@ -808,9 +908,13 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
 
         } else {
 
-          /* Compute the next timestep */
+          /* Compute the next timestep (hydro condition) */
           const float new_dt_hydro = hydro_compute_timestep(p, xp);
-          const float new_dt_grav = gravity_compute_timestep(p, xp);
+
+          /* Compute the next timestep (gravity condition) */
+          float new_dt_grav = FLT_MAX;
+          if (p->gpart != NULL)
+            new_dt_grav = gravity_compute_timestep(p->gpart);
 
           float new_dt = fminf(new_dt_hydro, new_dt_grav);
 
@@ -835,7 +939,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
           if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);
 
           /* Put this timestep on the time line */
-          dti_timeline = max_nr_timesteps;
+          int dti_timeline = max_nr_timesteps;
           while (new_dti < dti_timeline) dti_timeline /= 2;
 
           /* Now we have a time step, proceed with the kick */
@@ -845,34 +949,55 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
         /* Compute the time step for this kick */
         const int ti_start = (p->ti_begin + p->ti_end) / 2;
         const int ti_end = p->ti_end + new_dti / 2;
-        const float dt = (ti_end - ti_start) * timeBase;
-        const float half_dt = (ti_end - p->ti_end) * timeBase;
+        const double dt = (ti_end - ti_start) * timeBase;
+        const double half_dt = (ti_end - p->ti_end) * timeBase;
 
         /* Move particle forward in time */
         p->ti_begin = p->ti_end;
         p->ti_end = p->ti_begin + new_dti;
+        if (p->gpart != NULL) {
+          p->gpart->ti_begin = p->ti_begin;
+          p->gpart->ti_end = p->ti_end;
+        }
+
+        /* Get the acceleration */
+        float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]};
+        if (p->gpart != NULL) {
+          a_tot[0] += p->gpart->a_grav[0];
+          a_tot[1] += p->gpart->a_grav[1];
+          a_tot[1] += p->gpart->a_grav[2];
+        }
 
         /* Kick particles in momentum space */
-        xp->v_full[0] += p->a_hydro[0] * dt;
-        xp->v_full[1] += p->a_hydro[1] * dt;
-        xp->v_full[2] += p->a_hydro[2] * dt;
+        xp->v_full[0] += a_tot[0] * dt;
+        xp->v_full[1] += a_tot[1] * dt;
+        xp->v_full[2] += a_tot[2] * dt;
+
+        if (p->gpart != NULL) {
+          p->gpart->v_full[0] = xp->v_full[0];
+          p->gpart->v_full[1] = xp->v_full[1];
+          p->gpart->v_full[2] = xp->v_full[2];
+        }
 
-        p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0];
-        p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1];
-        p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2];
+        /* Go back by half-step for the hydro velocity */
+        p->v[0] = xp->v_full[0] - half_dt * a_tot[0];
+        p->v[1] = xp->v_full[1] - half_dt * a_tot[1];
+        p->v[2] = xp->v_full[2] - half_dt * a_tot[2];
 
         /* Extra kick work */
         hydro_kick_extra(p, xp, dt, half_dt);
+        if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt);
 
         /* Number of updated particles */
         updated++;
+        if (p->gpart != NULL) g_updated++;
       }
 
       /* Now collect quantities for statistics */
 
-      v_full[0] = xp->v_full[0];
-      v_full[1] = xp->v_full[1];
-      v_full[2] = xp->v_full[2];
+      const double x[3] = {p->x[0], p->x[1], p->x[2]};
+      const float v_full[3] = {xp->v_full[0], xp->v_full[1], xp->v_full[2]};
+      const float m = p->mass;
 
       /* Collect mass */
       mass += m;
@@ -906,13 +1031,14 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
     /* Loop over the progeny. */
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) {
-        struct cell *cp = c->progeny[k];
+        struct cell *const cp = c->progeny[k];
 
         /* Recurse */
         runner_dokick(r, cp, 0);
 
         /* And aggregate */
         updated += cp->updated;
+        g_updated += cp->g_updated;
         e_kin += cp->e_kin;
         e_int += cp->e_int;
         e_pot += cp->e_pot;
@@ -930,6 +1056,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
 
   /* Store the values. */
   c->updated = updated;
+  c->g_updated = g_updated;
   c->e_kin = e_kin;
   c->e_int = e_int;
   c->e_pot = e_pot;
@@ -1057,9 +1184,12 @@ void *runner_main(void *data) {
         case task_type_grav_down:
           runner_dograv_down(r, t->ci);
           break;
-        case task_type_psort:
+        case task_type_part_sort:
           space_do_parts_sort();
           break;
+        case task_type_gpart_sort:
+          space_do_gparts_sort();
+          break;
         case task_type_split_cell:
           space_do_split(e->s, t->ci);
           break;
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index cf5d56e94169b44e6cd2974a3422a0bc5e4610ac..de339db6133fcc829bdc6ee0ce9e537b68982422 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -1235,7 +1235,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
 #else
 
           /* Does pi need to be updated too? */
-          if (pi->dt <= dt_step) {
+          if (pi->ti_end <= ti_current) {
 
             /* Add this interaction to the symmetric queue. */
             r2q2[icount2] = r2;
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index f374339da75e31b39a5295fcd8bbc23c34d8d67d..02626295a49f314fef840bc044a476f5c9cf332d 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -267,9 +267,9 @@ void runner_dograv_down(struct runner *r, struct cell *c) {
     /* Apply the multipole acceleration to all gparts. */
     for (int k = 0; k < c->gcount; k++) {
       struct gpart *p = &c->gparts[k];
-      p->a[0] += m->a[0];
-      p->a[1] += m->a[1];
-      p->a[2] += m->a[2];
+      p->a_grav[0] += m->a[0];
+      p->a_grav[1] += m->a[1];
+      p->a_grav[2] += m->a[2];
     }
   }
 }
@@ -594,5 +594,4 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj,
 
   if (gettimer) TIMER_TOC(timer_dosub_grav);
 }
-
 #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */
diff --git a/src/scheduler.c b/src/scheduler.c
index 722e344b5a86b5fbdc42c7038fd3cb00e44b2ee8..d1d343240b37f5afd5f41fecacf106b0e85f726f 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -43,7 +43,7 @@
 #include "cycle.h"
 #include "error.h"
 #include "intrinsics.h"
-#include "kernel.h"
+#include "kernel_hydro.h"
 #include "timers.h"
 
 /**
@@ -95,39 +95,38 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
 
 void scheduler_splittasks(struct scheduler *s) {
 
-  int j, k, ind, sid, tid = 0, redo;
-  struct cell *ci, *cj;
-  double hi, hj, shift[3];
-  struct task *t, *t_old;
-  // float dt_step = s->dt_step;
-  int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0},
-                   {-1, -1, 11, 10, 5, 4, 2, 1},
-                   {-1, -1, -1, 12, 7, 6, 4, 3},
-                   {-1, -1, -1, -1, 8, 7, 5, 4},
-                   {-1, -1, -1, -1, -1, 12, 10, 9},
-                   {-1, -1, -1, -1, -1, -1, 11, 10},
-                   {-1, -1, -1, -1, -1, -1, -1, 12}};
-  float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897,
-                         0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788};
+  const int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0},
+                         {-1, -1, 11, 10, 5, 4, 2, 1},
+                         {-1, -1, -1, 12, 7, 6, 4, 3},
+                         {-1, -1, -1, -1, 8, 7, 5, 4},
+                         {-1, -1, -1, -1, -1, 12, 10, 9},
+                         {-1, -1, -1, -1, -1, -1, 11, 10},
+                         {-1, -1, -1, -1, -1, -1, -1, 12}};
+  const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788,
+                               0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
+                               0.5788, 0.4025, 0.5788};
 
   /* Loop through the tasks... */
-  redo = 0;
-  t_old = t = NULL;
+  int tid = 0, redo = 0;
+  struct task *t_old = NULL;
   while (1) {
 
     /* Get a pointer on the task. */
+    struct task *t = t_old;
     if (redo) {
       redo = 0;
-      t = t_old;
     } else {
-      if ((ind = atomic_inc(&tid)) < s->nr_tasks)
+      const int ind = atomic_inc(&tid);
+      if (ind < s->nr_tasks)
         t_old = t = &s->tasks[s->tasks_ind[ind]];
       else
         break;
     }
 
     /* Skip sorting tasks. */
-    if (t->type == task_type_psort) continue;
+    if (t->type == task_type_part_sort) continue;
+
+    if (t->type == task_type_gpart_sort) continue;
 
     /* Empty task? */
     if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) {
@@ -161,7 +160,7 @@ void scheduler_splittasks(struct scheduler *s) {
     if (t->type == task_type_self) {
 
       /* Get a handle on the cell involved. */
-      ci = t->ci;
+      struct cell *ci = t->ci;
 
       /* Foreign task? */
       if (ci->nodeID != s->nodeID) {
@@ -187,18 +186,18 @@ void scheduler_splittasks(struct scheduler *s) {
           redo = 1;
 
           /* Add the self task. */
-          for (k = 0; ci->progeny[k] == NULL; k++)
-            ;
-          t->ci = ci->progeny[k];
-          for (k += 1; k < 8; k++)
+          int first_child = 0;
+          while (ci->progeny[first_child] == NULL) first_child++;
+          t->ci = ci->progeny[first_child];
+          for (int k = first_child + 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
               scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
                                 ci->progeny[k], NULL, 0);
 
           /* Make a task for each pair of progeny. */
-          for (j = 0; j < 8; j++)
+          for (int j = 0; j < 8; j++)
             if (ci->progeny[j] != NULL)
-              for (k = j + 1; k < 8; k++)
+              for (int k = j + 1; k < 8; k++)
                 if (ci->progeny[k] != NULL)
                   scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0,
                                     ci->progeny[j], ci->progeny[k], 0);
@@ -211,10 +210,10 @@ void scheduler_splittasks(struct scheduler *s) {
     else if (t->type == task_type_pair) {
 
       /* Get a handle on the cells involved. */
-      ci = t->ci;
-      cj = t->cj;
-      hi = ci->dmin;
-      hj = cj->dmin;
+      struct cell *ci = t->ci;
+      struct cell *cj = t->cj;
+      const double hi = ci->dmin;
+      const double hj = cj->dmin;
 
       /* Foreign task? */
       if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
@@ -224,7 +223,8 @@ void scheduler_splittasks(struct scheduler *s) {
 
       /* Get the sort ID, use space_getsid and not t->flags
          to make sure we get ci and cj swapped if needed. */
-      sid = space_getsid(s->space, &ci, &cj, shift);
+      double shift[3];
+      int sid = space_getsid(s->space, &ci, &cj, shift);
 
       /* Should this task be split-up? */
       if (ci->split && cj->split &&
@@ -480,9 +480,9 @@ void scheduler_splittasks(struct scheduler *s) {
         /* Replace the current task. */
         t->type = task_type_none;
 
-        for (j = 0; j < 8; j++)
+        for (int j = 0; j < 8; j++)
           if (ci->progeny[j] != NULL)
-            for (k = 0; k < 8; k++)
+            for (int k = 0; k < 8; k++)
               if (cj->progeny[k] != NULL) {
                 t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
                                       ci->progeny[j], cj->progeny[k], 0);
@@ -521,8 +521,8 @@ void scheduler_splittasks(struct scheduler *s) {
     else if (t->type == task_type_grav_mm) {
 
       /* Get a handle on the cells involved. */
-      ci = t->ci;
-      cj = t->cj;
+      struct cell *ci = t->ci;
+      struct cell *cj = t->cj;
 
       /* Self-interaction? */
       if (cj == NULL) {
@@ -546,7 +546,7 @@ void scheduler_splittasks(struct scheduler *s) {
 
             /* Split this task into tasks on its progeny. */
             t->type = task_type_none;
-            for (j = 0; j < 8; j++)
+            for (int j = 0; j < 8; j++)
               if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) {
                 if (t->type == task_type_none) {
                   t->type = task_type_grav_mm;
@@ -555,7 +555,7 @@ void scheduler_splittasks(struct scheduler *s) {
                 } else
                   t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none,
                                         0, 0, ci->progeny[j], NULL, 0);
-                for (k = j + 1; k < 8; k++)
+                for (int k = j + 1; k < 8; k++)
                   if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) {
                     if (t->type == task_type_none) {
                       t->type = task_type_grav_mm;
@@ -594,7 +594,7 @@ void scheduler_splittasks(struct scheduler *s) {
 
           /* Get the opening angle theta. */
           float dx[3], theta;
-          for (k = 0; k < 3; k++) {
+          for (int k = 0; k < 3; k++) {
             dx[k] = fabs(ci->loc[k] - cj->loc[k]);
             if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k])
               dx[k] = -dx[k] + s->space->dim[k];
@@ -615,9 +615,9 @@ void scheduler_splittasks(struct scheduler *s) {
 
               /* Split this task into tasks on its progeny. */
               t->type = task_type_none;
-              for (j = 0; j < 8; j++)
+              for (int j = 0; j < 8; j++)
                 if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) {
-                  for (k = 0; k < 8; k++)
+                  for (int k = 0; k < 8; k++)
                     if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) {
                       if (t->type == task_type_none) {
                         t->type = task_type_grav_mm;
@@ -663,17 +663,14 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype,
                                int flags, int wait, struct cell *ci,
                                struct cell *cj, int tight) {
 
-  int ind;
-  struct task *t;
-
   /* Get the next free task. */
-  ind = atomic_inc(&s->tasks_next);
+  const int ind = atomic_inc(&s->tasks_next);
 
   /* Overflow? */
   if (ind >= s->size) error("Task list overflow.");
 
   /* Get a pointer to the new task. */
-  t = &s->tasks[ind];
+  struct task *t = &s->tasks[ind];
 
   /* Copy the data. */
   t->type = type;
@@ -768,24 +765,24 @@ void scheduler_set_unlocks(struct scheduler *s) {
 
 void scheduler_ranktasks(struct scheduler *s) {
 
-  int i, j = 0, k, temp, left = 0, rank;
-  struct task *t, *tasks = s->tasks;
-  int *tid = s->tasks_ind, nr_tasks = s->nr_tasks;
+  struct task *tasks = s->tasks;
+  int *tid = s->tasks_ind;
+  const int nr_tasks = s->nr_tasks;
 
   /* Run through the tasks and get all the waits right. */
-  for (i = 0, k = 0; k < nr_tasks; k++) {
+  for (int k = 0; k < nr_tasks; k++) {
     tid[k] = k;
-    for (j = 0; j < tasks[k].nr_unlock_tasks; j++)
+    for (int j = 0; j < tasks[k].nr_unlock_tasks; j++)
       tasks[k].unlock_tasks[j]->wait += 1;
   }
 
   /* Main loop. */
-  for (j = 0, rank = 0; left < nr_tasks; rank++) {
+  for (int j = 0, rank = 0, left = 0; left < nr_tasks; rank++) {
 
     /* Load the tids of tasks with no waits. */
-    for (k = left; k < nr_tasks; k++)
+    for (int k = left; k < nr_tasks; k++)
       if (tasks[tid[k]].wait == 0) {
-        temp = tid[j];
+        int temp = tid[j];
         tid[j] = tid[k];
         tid[k] = temp;
         j += 1;
@@ -795,15 +792,16 @@ void scheduler_ranktasks(struct scheduler *s) {
     if (j == left) error("Unsatisfiable task dependencies detected.");
 
     /* Unlock the next layer of tasks. */
-    for (i = left; i < j; i++) {
-      t = &tasks[tid[i]];
+    for (int i = left; i < j; i++) {
+      struct task *t = &tasks[tid[i]];
       t->rank = rank;
       tid[i] = t - tasks;
       if (tid[i] >= nr_tasks) error("Task index overshoot.");
       /* message( "task %i of type %s has rank %i." , i ,
           (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ?
          "pair" : "sort" , rank ); */
-      for (k = 0; k < t->nr_unlock_tasks; k++) t->unlock_tasks[k]->wait -= 1;
+      for (int k = 0; k < t->nr_unlock_tasks; k++)
+        t->unlock_tasks[k]->wait -= 1;
     }
 
     /* The new left (no, not tony). */
@@ -825,8 +823,6 @@ void scheduler_ranktasks(struct scheduler *s) {
 
 void scheduler_reset(struct scheduler *s, int size) {
 
-  int k;
-
   /* Do we need to re-allocate? */
   if (size > s->size) {
 
@@ -853,7 +849,7 @@ void scheduler_reset(struct scheduler *s, int size) {
   s->nr_unlocks = 0;
 
   /* Set the task pointers in the queues. */
-  for (k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks;
+  for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks;
 }
 
 /**
@@ -864,21 +860,23 @@ void scheduler_reset(struct scheduler *s, int size) {
 
 void scheduler_reweight(struct scheduler *s) {
 
-  int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind;
-  struct task *t, *tasks = s->tasks;
-  int nodeID = s->nodeID;
-  float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897,
-                         0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788};
-  float wscale = 0.001;
+  const int nr_tasks = s->nr_tasks;
+  int *tid = s->tasks_ind;
+  struct task *tasks = s->tasks;
+  const int nodeID = s->nodeID;
+  const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788,
+                               0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
+                               0.5788, 0.4025, 0.5788};
+  const float wscale = 0.001;
   // ticks tic;
 
   /* Run through the tasks backwards and set their waits and
      weights. */
   // tic = getticks();
-  for (k = nr_tasks - 1; k >= 0; k--) {
-    t = &tasks[tid[k]];
+  for (int k = nr_tasks - 1; k >= 0; k--) {
+    struct task *t = &tasks[tid[k]];
     t->weight = 0;
-    for (j = 0; j < t->nr_unlock_tasks; j++)
+    for (int j = 0; j < t->nr_unlock_tasks; j++)
       if (t->unlock_tasks[j]->weight > t->weight)
         t->weight = t->unlock_tasks[j]->weight;
     if (!t->implicit && t->tic > 0)
@@ -959,8 +957,9 @@ void scheduler_reweight(struct scheduler *s) {
 void scheduler_start(struct scheduler *s, unsigned int mask,
                      unsigned int submask) {
 
-  int nr_tasks = s->nr_tasks, *tid = s->tasks_ind;
-  struct task *t, *tasks = s->tasks;
+  const int nr_tasks = s->nr_tasks;
+  int *tid = s->tasks_ind;
+  struct task *tasks = s->tasks;
   // ticks tic;
 
   /* Store the masks */
@@ -986,8 +985,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
   const int waiting_old = s->waiting;
 
   /* We are going to use the task structure in a modified way to pass
-     information
-     to the task. Don't do this at home !
+     information to the task. Don't do this at home !
      - ci and cj will give the range of tasks to which the waits will be applied
      - the flags will be used to transfer the mask
      - the rank will be used to transfer the submask
@@ -1012,6 +1010,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
 
   /* Wait for the rewait tasks to have executed. */
   pthread_mutex_lock(&s->sleep_mutex);
+  pthread_cond_broadcast(&s->sleep_cond);
   while (s->waiting > waiting_old) {
     pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex);
   }
@@ -1025,7 +1024,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
   /* Loop over the tasks and enqueue whoever is ready. */
   // tic = getticks();
   for (int k = 0; k < s->nr_tasks; k++) {
-    t = &tasks[tid[k]];
+    struct task *t = &tasks[tid[k]];
     if (atomic_dec(&t->wait) == 1 && ((1 << t->type) & s->mask) &&
         ((1 << t->subtype) & s->submask) && !t->skip) {
       scheduler_enqueue(s, t);
@@ -1033,6 +1032,11 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
     }
   }
 
+  /* To be safe, fire of one last sleep_cond in a safe way. */
+  pthread_mutex_lock(&s->sleep_mutex);
+  pthread_cond_broadcast(&s->sleep_cond);
+  pthread_mutex_unlock(&s->sleep_mutex);
+
   // message( "enqueueing tasks took %.3f %s." ,
   // clocks_from_ticks( getticks() - tic ), clocks_getunit());
 }
@@ -1046,10 +1050,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
 
 void scheduler_enqueue(struct scheduler *s, struct task *t) {
 
+  /* The target queue for this task. */
   int qid = -1;
-#ifdef WITH_MPI
-  int err;
-#endif
 
   /* Fail if this task has already been enqueued before. */
   if (t->rid >= 0) error("Task has already been enqueued.");
@@ -1071,6 +1073,9 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
 
   /* Otherwise, look for a suitable queue. */
   else {
+#ifdef WITH_MPI
+    int err;
+#endif
 
     /* Find the previous owner for each task type, and do
        any pre-processing needed. */
@@ -1093,13 +1098,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         break;
       case task_type_recv:
 #ifdef WITH_MPI
-        if ((err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type,
-                             t->ci->nodeID, t->flags, MPI_COMM_WORLD,
-                             &t->req)) != MPI_SUCCESS) {
-          char buff[MPI_MAX_ERROR_STRING];
-          int len;
-          MPI_Error_string(err, buff, &len);
-          error("Failed to emit irecv for particle data (%s).", buff);
+        err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
+                        t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        if (err != MPI_SUCCESS) {
+          mpi_error(err, "Failed to emit irecv for particle data.");
         }
         // message( "receiving %i parts with tag=%i from %i to %i." ,
         //     t->ci->count , t->flags , t->ci->nodeID , s->nodeID );
@@ -1111,13 +1113,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         break;
       case task_type_send:
 #ifdef WITH_MPI
-        if ((err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type,
-                             t->cj->nodeID, t->flags, MPI_COMM_WORLD,
-                             &t->req)) != MPI_SUCCESS) {
-          char buff[MPI_MAX_ERROR_STRING];
-          int len;
-          MPI_Error_string(err, buff, &len);
-          error("Failed to emit isend for particle data (%s).", buff);
+        err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
+                        t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+        if (err != MPI_SUCCESS) {
+          mpi_error(err, "Failed to emit isend for particle data.");
         }
         // message( "sending %i parts with tag=%i from %i to %i." ,
         //     t->ci->count , t->flags , s->nodeID , t->cj->nodeID );
@@ -1133,7 +1132,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
 
     if (qid >= s->nr_queues) error("Bad computed qid.");
 
-    /* If no previous owner, find the shortest queue. */
+    /* If no previous owner, pick a random queue. */
     if (qid < 0) qid = rand() % s->nr_queues;
 
     /* Increase the waiting counter. */
@@ -1164,7 +1163,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
   for (int k = 0; k < t->nr_unlock_tasks; k++) {
     struct task *t2 = t->unlock_tasks[k];
 
-    int res = atomic_dec(&t2->wait);
+    const int res = atomic_dec(&t2->wait);
     if (res < 1) {
       error("Negative wait!");
     } else if (res == 1) {
@@ -1203,7 +1202,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) {
      they are ready. */
   for (int k = 0; k < t->nr_unlock_tasks; k++) {
     struct task *t2 = t->unlock_tasks[k];
-    int res = atomic_dec(&t2->wait);
+    const int res = atomic_dec(&t2->wait);
     if (res < 1) {
       error("Negative wait!");
     } else if (res == 1) {
@@ -1240,7 +1239,7 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
                                const struct task *prev) {
 
   struct task *res = NULL;
-  int k, nr_queues = s->nr_queues;
+  const int nr_queues = s->nr_queues;
   unsigned int seed = qid;
 
   /* Check qid. */
@@ -1264,10 +1263,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
       /* If unsuccessful, try stealing from the other queues. */
       if (s->flags & scheduler_flag_steal) {
         int count = 0, qids[nr_queues];
-        for (k = 0; k < nr_queues; k++)
+        for (int k = 0; k < nr_queues; k++)
           if (s->queues[k].count > 0) qids[count++] = k;
-        for (k = 0; k < scheduler_maxsteal && count > 0; k++) {
-          int ind = rand_r(&seed) % count;
+        for (int k = 0; k < scheduler_maxsteal && count > 0; k++) {
+          const int ind = rand_r(&seed) % count;
           TIMER_TIC
           res = queue_gettask(&s->queues[qids[ind]], prev, 0);
           TIMER_TOC(timer_qsteal);
@@ -1287,7 +1286,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
     if (res == NULL) {
 #endif
       pthread_mutex_lock(&s->sleep_mutex);
-      if (s->waiting > 0) pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex);
+      res = queue_gettask(&s->queues[qid], prev, 1);
+      if (res == NULL && s->waiting > 0) {
+        pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex);
+      }
       pthread_mutex_unlock(&s->sleep_mutex);
     }
   }
@@ -1352,12 +1354,6 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
   s->tasks = NULL;
   s->tasks_ind = NULL;
   scheduler_reset(s, nr_tasks);
-
-/* Construct types for MPI communications */
-#ifdef WITH_MPI
-  part_create_mpi_type(&s->part_mpi_type);
-  xpart_create_mpi_type(&s->xpart_mpi_type);
-#endif
 }
 
 /**
@@ -1366,7 +1362,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
  * @param s The #scheduler
  * @param fileName Name of the file to write to
  */
-void scheduler_print_tasks(struct scheduler *s, char *fileName) {
+void scheduler_print_tasks(const struct scheduler *s, const char *fileName) {
 
   const int nr_tasks = s->nr_tasks, *tid = s->tasks_ind;
   struct task *t, *tasks = s->tasks;
diff --git a/src/scheduler.h b/src/scheduler.h
index 3f2d8c289d0d691d0d155b20ae0522c5830524aa..64c694aea295c13810a20b626055fc6c15eb0af8 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -100,12 +100,6 @@ struct scheduler {
 
   /* The node we are working on. */
   int nodeID;
-
-#ifdef WITH_MPI
-  /* MPI data type for the particle transfers */
-  MPI_Datatype part_mpi_type;
-  MPI_Datatype xpart_mpi_type;
-#endif
 };
 
 /* Function prototypes. */
@@ -128,7 +122,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t);
 void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb);
 void scheduler_set_unlocks(struct scheduler *s);
 void scheduler_dump_queue(struct scheduler *s);
-void scheduler_print_tasks(struct scheduler *s, char *fileName);
+void scheduler_print_tasks(const struct scheduler *s, const char *fileName);
 void scheduler_do_rewait(struct task *t_begin, struct task *t_end,
                          unsigned int mask, unsigned int submask);
 
diff --git a/src/serial_io.c b/src/serial_io.c
index 8e63db5cfad3a3b50fc7e350bbac6ce09708230a..10eab97f1bf118a842e274b521056d0d81b32db1 100644
--- a/src/serial_io.c
+++ b/src/serial_io.c
@@ -57,18 +57,18 @@
  * @param dim The dimension of the data (1 for scalar, 3 for vector)
  * @param part_c A (char*) pointer on the first occurrence of the field of
  *interest in the parts array
+ * @param partSize The size in bytes of the particle structure.
  * @param importance If COMPULSORY, the data must be present in the IC file. If
  *OPTIONAL, the array will be zeroed when the data is not present.
  *
  * @todo A better version using HDF5 hyper-slabs to read the file directly into
  *the part array
  * will be written once the structures have been stabilized.
- *
- * Calls #error() if an error occurs.
  */
 void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
                       int dim, long long N_total, long long offset,
-                      char* part_c, enum DATA_IMPORTANCE importance) {
+                      char* part_c, size_t partSize,
+                      enum DATA_IMPORTANCE importance) {
   hid_t h_data = 0, h_err = 0, h_type = 0, h_memspace = 0, h_filespace = 0;
   hsize_t shape[2], offsets[2];
   htri_t exist = 0;
@@ -76,7 +76,6 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
   int i = 0, rank = 0;
   const size_t typeSize = sizeOfType(type);
   const size_t copySize = typeSize * dim;
-  const size_t partSize = sizeof(struct part);
   char* temp_c = 0;
 
   /* Check whether the dataspace exists or not */
@@ -172,9 +171,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
  * Routines writing an output file
  *-----------------------------------------------------------------------------*/
 
-void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name,
-                  enum DATA_TYPE type, long long N_total, int dim,
-                  struct UnitSystem* us, enum UnitConversionFactor convFactor) {
+void prepareArray(hid_t grp, char* fileName, FILE* xmfFile,
+                  char* partTypeGroupName, char* name, enum DATA_TYPE type,
+                  long long N_total, int dim, struct UnitSystem* us,
+                  enum UnitConversionFactor convFactor) {
   hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0;
   int rank = 0;
   hsize_t shape[2];
@@ -234,14 +234,14 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name,
   }
 
   /* Write XMF description for this data set */
-  writeXMFline(xmfFile, fileName, name, N_total, dim, type);
+  writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, type);
 
   /* Write unit conversion factors for this data set */
-  conversionString(buffer, us, convFactor);
+  units_conversion_string(buffer, us, convFactor);
   writeAttribute_d(h_data, "CGS conversion factor",
-                   conversionFactor(us, convFactor));
-  writeAttribute_f(h_data, "h-scale exponent", hFactor(us, convFactor));
-  writeAttribute_f(h_data, "a-scale exponent", aFactor(us, convFactor));
+                   units_conversion_factor(us, convFactor));
+  writeAttribute_f(h_data, "h-scale exponent", units_h_factor(us, convFactor));
+  writeAttribute_f(h_data, "a-scale exponent", units_a_factor(us, convFactor));
   writeAttribute_s(h_data, "Conversion factor", buffer);
 
   H5Pclose(h_prop);
@@ -255,21 +255,22 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param grp The group in which to write.
  * @param fileName The name of the file in which the data is written
  * @param xmfFile The FILE used to write the XMF description
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param name The name of the array to write.
  * @param type The #DATA_TYPE of the array.
  * @param N The number of particles to write.
  * @param dim The dimension of the data (1 for scalar, 3 for vector)
  * @param part_c A (char*) pointer on the first occurrence of the field of
  *interest in the parts array
+ * @param partSize The size in bytes of the particle structure.
  * @param us The UnitSystem currently in use
- * @param convFactor The UnitConversionFactor for this array
- *
- *
- * Calls #error() if an error occurs.
+ * @param convFactor The UnitConversionFactor for this arrayo
  */
-void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
-                       enum DATA_TYPE type, int N, int dim, long long N_total,
-                       int mpi_rank, long long offset, char* part_c,
+void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile,
+                       char* partTypeGroupName, char* name, enum DATA_TYPE type,
+                       int N, int dim, long long N_total, int mpi_rank,
+                       long long offset, char* part_c, size_t partSize,
                        struct UnitSystem* us,
                        enum UnitConversionFactor convFactor) {
 
@@ -279,15 +280,14 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
   int i = 0, rank = 0;
   const size_t typeSize = sizeOfType(type);
   const size_t copySize = typeSize * dim;
-  const size_t partSize = sizeof(struct part);
   char* temp_c = 0;
 
   /* message("Writing '%s' array...", name); */
 
   /* Prepare the arrays in the file */
   if (mpi_rank == 0)
-    prepareArray(grp, fileName, xmfFile, name, type, N_total, dim, us,
-                 convFactor);
+    prepareArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N_total,
+                 dim, us, convFactor);
 
   /* Allocate temporary buffer */
   temp = malloc(N * dim * sizeOfType(type));
@@ -362,7 +362,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
 #define readArray(grp, name, type, N, dim, part, N_total, offset, field, \
                   importance)                                            \
   readArrayBackEnd(grp, name, type, N, dim, N_total, offset,             \
-                   (char*)(&(part[0]).field), importance)
+                   (char*)(&(part[0]).field), sizeof(part[0]), importance)
 
 /**
  * @brief A helper macro to call the readArrayBackEnd function more easily.
@@ -371,34 +371,48 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param fileName Unused parameter in non-MPI mode
  * @param xmfFile Unused parameter in non-MPI mode
  * @param name The name of the array to write.
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param type The #DATA_TYPE of the array.
  * @param N The number of particles to write.
  * @param dim The dimension of the data (1 for scalar, 3 for vector)
  * @param part A (char*) pointer on the first occurrence of the field of
- *interest
- *in the parts array
+ *interest in the parts array
+ * @param N_total Unused parameter in non-MPI mode
+ * @param mpi_rank Unused parameter in non-MPI mode
+ * @param offset Unused parameter in non-MPI mode
  * @param field The name (code name) of the field to read from.
  * @param us The UnitSystem currently in use
  * @param convFactor The UnitConversionFactor for this array
  *
  */
-#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \
-                   mpi_rank, offset, field, us, convFactor)                   \
-  writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total,      \
-                    mpi_rank, offset, (char*)(&(part[0]).field), us,          \
-                    convFactor)
+#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N,   \
+                   dim, part, N_total, mpi_rank, offset, field, us,            \
+                   convFactor)                                                 \
+  writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N,  \
+                    dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \
+                    sizeof(part[0]), us, convFactor)
 
 /* Import the right hydro definition */
 #include "hydro_io.h"
+/* Import the right gravity definition */
+#include "gravity_io.h"
 
 /**
  * @brief Reads an HDF5 initial condition file (GADGET-3 type)
  *
  * @param fileName The file to read.
  * @param dim (output) The dimension of the volume read from the file.
- * @param parts (output) The array of #part read from the file.
- * @param N (output) The number of particles read from the file.
+ * @param parts (output) The array of #part (gas particles) read from the file.
+ * @param gparts (output) The array of #gpart read from the file.
+ * @param Ngas (output) The number of #part read from the file on that node.
+ * @param Ngparts (output) The number of #gpart read from the file on that node.
  * @param periodic (output) 1 if the volume is periodic, 0 if not.
+ * @param mpi_rank The MPI rank of this node
+ * @param mpi_size The number of MPI ranks
+ * @param comm The MPI communicator
+ * @param info The MPI information object
+ * @param dry_run If 1, don't read the particle. Only allocates the arrays.
  *
  * Opens the HDF5 file fileName and reads the particles contained
  * in the parts array. N is the returned number of particles found
@@ -407,21 +421,20 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @warning Can not read snapshot distributed over more than 1 file !!!
  * @todo Read snapshots distributed in more than one file.
  *
- * Calls #error() if an error occurs.
- *
  */
 void read_ic_serial(char* fileName, double dim[3], struct part** parts,
-                    size_t* N, int* periodic, int mpi_rank, int mpi_size,
-                    MPI_Comm comm, MPI_Info info) {
+                    struct gpart** gparts, size_t* Ngas, size_t* Ngparts,
+                    int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm,
+                    MPI_Info info, int dry_run) {
   hid_t h_file = 0, h_grp = 0;
-  double boxSize[3] = {0.0, -1.0, -1.0};
   /* GADGET has only cubic boxes (in cosmological mode) */
-  int numParticles[6] = {0};
-  /* GADGET has 6 particle types. We only keep the type 0*/
-  int numParticles_highWord[6] = {0};
-  long long offset = 0;
-  long long N_total = 0;
-  int rank;
+  double boxSize[3] = {0.0, -1.0, -1.0};
+  /* GADGET has 6 particle types. We only keep the type 0 & 1 for now*/
+  int numParticles[NUM_PARTICLE_TYPES] = {0};
+  int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
+  size_t N[NUM_PARTICLE_TYPES] = {0};
+  long long N_total[NUM_PARTICLE_TYPES] = {0};
+  long long offset[NUM_PARTICLE_TYPES] = {0};
 
   /* First read some information about the content */
   if (mpi_rank == 0) {
@@ -453,8 +466,10 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts,
     readAttribute(h_grp, "NumPart_Total", UINT, numParticles);
     readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord);
 
-    N_total = ((long long)numParticles[0]) +
-              ((long long)numParticles_highWord[0] << 32);
+    for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
+      N_total[ptype] = ((long long)numParticles[ptype]) +
+                       ((long long)numParticles_highWord[ptype] << 32);
+
     dim[0] = boxSize[0];
     dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
     dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];
@@ -474,22 +489,40 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts,
 
   /* Now need to broadcast that information to all ranks. */
   MPI_Bcast(periodic, 1, MPI_INT, 0, comm);
-  MPI_Bcast(&N_total, 1, MPI_LONG_LONG, 0, comm);
+  MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm);
   MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm);
 
   /* Divide the particles among the tasks. */
-  offset = mpi_rank * N_total / mpi_size;
-  *N = (mpi_rank + 1) * N_total / mpi_size - offset;
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) {
+    offset[ptype] = mpi_rank * N_total[ptype] / mpi_size;
+    N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype];
+  }
 
-  /* Allocate memory to store particles */
-  if (posix_memalign((void*)parts, part_align, (*N) * sizeof(struct part)) != 0)
+  /* Allocate memory to store SPH particles */
+  *Ngas = N[0];
+  if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) !=
+      0)
     error("Error while allocating memory for particles");
-  bzero(*parts, *N * sizeof(struct part));
+  bzero(*parts, *Ngas * sizeof(struct part));
+
+  /* Allocate memory to store all particles */
+  const size_t Ndm = N[1];
+  *Ngparts = N[1] + N[0];
+  if (posix_memalign((void*)gparts, gpart_align,
+                     *Ngparts * sizeof(struct gpart)) != 0)
+    error("Error while allocating memory for gravity particles");
+  bzero(*gparts, *Ngparts * sizeof(struct gpart));
+
   /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / */
   /* 	  (1024.*1024.)); */
+  /* message("BoxSize = %lf", dim[0]); */
+  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */
+
+  /* For dry runs, only need to do this on rank 0 */
+  if (dry_run) mpi_size = 1;
 
   /* Now loop over ranks and read the data */
-  for (rank = 0; rank < mpi_size; ++rank) {
+  for (int rank = 0; rank < mpi_size; ++rank) {
 
     /* Is it this rank's turn to read ? */
     if (rank == mpi_rank) {
@@ -498,17 +531,43 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts,
       if (h_file < 0)
         error("Error while opening file '%s' on rank %d.", fileName, mpi_rank);
 
-      /* Open SPH particles group */
-      /* message("Reading particle arrays..."); */
-      h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT);
-      if (h_grp < 0)
-        error("Error while opening particle group on rank %d.\n", mpi_rank);
+      /* Loop over all particle types */
+      for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
 
-      /* Read particle fields into the particle structure */
-      hydro_read_particles(h_grp, *N, N_total, offset, *parts);
+        /* Don't do anything if no particle of this kind */
+        if (N[ptype] == 0) continue;
 
-      /* Close particle group */
-      H5Gclose(h_grp);
+        /* Open the particle group in the file */
+        char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
+        snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
+                 ptype);
+        h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT);
+        if (h_grp < 0) {
+          error("Error while opening particle group %s.", partTypeGroupName);
+        }
+
+        /* Read particle fields into the particle structure */
+        switch (ptype) {
+
+          case GAS:
+            if (!dry_run)
+              hydro_read_particles(h_grp, N[ptype], N_total[ptype],
+                                   offset[ptype], *parts);
+            break;
+
+          case DM:
+            if (!dry_run)
+              darkmatter_read_particles(h_grp, N[ptype], N_total[ptype],
+                                        offset[ptype], *gparts);
+            break;
+
+          default:
+            error("Particle Type %d not yet supported. Aborting", ptype);
+        }
+
+        /* Close particle group */
+        H5Gclose(h_grp);
+      }
 
       /* Close file */
       H5Fclose(h_file);
@@ -518,6 +577,12 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts,
     MPI_Barrier(comm);
   }
 
+  /* Prepare the DM particles */
+  if (!dry_run) prepare_dm_gparts(*gparts, Ndm);
+
+  /* Now duplicate the hydro particle into gparts */
+  if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+
   /* message("Done Reading particles..."); */
 }
 
@@ -525,7 +590,11 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts,
  * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor
  *
  * @param e The engine containing all the system.
- * @param us The UnitSystem used for the conversion of units in the output
+ * @param us The UnitSystem used for the conversion of units in the output.
+ * @param mpi_rank The MPI rank of this node.
+ * @param mpi_size The number of MPI ranks.
+ * @param comm The MPI communicator.
+ * @param info The MPI information object
  *
  * Creates an HDF5 output file and writes the particles contained
  * in the engine. If such a file already exists, it is erased and replaced
@@ -538,35 +607,40 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts,
 void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
                          int mpi_size, MPI_Comm comm, MPI_Info info) {
   hid_t h_file = 0, h_grp = 0, h_grpsph = 0;
-  int N = e->s->nr_parts;
+  const size_t Ngas = e->s->nr_parts;
+  const size_t Ntot = e->s->nr_gparts;
   int periodic = e->s->periodic;
-  int numParticles[6] = {N, 0};
-  int numParticlesHighWord[6] = {0};
-  unsigned int flagEntropy[6] = {0};
-  long long N_total = 0, offset = 0;
-  double offset_d = 0., N_d = 0., N_total_d = 0.;
   int numFiles = 1;
-  int rank = 0;
   struct part* parts = e->s->parts;
-  FILE* xmfFile = 0;
+  struct gpart* gparts = e->s->gparts;
+  struct gpart* dmparts = NULL;
   static int outputCount = 0;
+  FILE* xmfFile = 0;
+
+  /* Number of particles of each type */
+  // const size_t Ndm = Ntot - Ngas;
+
+  /* MATTHIEU: Temporary fix to preserve master */
+  const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0;
+  /* MATTHIEU: End temporary fix */
 
   /* File name */
-  char fileName[200];
-  sprintf(fileName, "output_%03i.hdf5", outputCount);
+  char fileName[FILENAME_BUFFER_SIZE];
+  snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount);
 
   /* Compute offset in the file and total number of particles */
-  /* Done using double to allow for up to 2^50=10^15 particles */
-  N_d = (double)N;
-  MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm);
-  N_total_d = offset_d + N_d;
-  MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm);
-  if (N_total_d > 1.e15)
-    error(
-        "Error while computing the offset for parallel output: Simulation has "
-        "more than 10^15 particles.\n");
-  N_total = (long long)N_total_d;
-  offset = (long long)offset_d;
+  size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0};
+  long long N_total[NUM_PARTICLE_TYPES] = {0};
+  long long offset[NUM_PARTICLE_TYPES] = {0};
+  MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm);
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
+    N_total[ptype] = offset[ptype] + N[ptype];
+
+  /* The last rank now has the correct N_total. Let's broadcast from there */
+  MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm);
+
+  /* Now everybody konws its offset and the total number of particles of each
+   * type */
 
   /* Do common stuff first */
   if (mpi_rank == 0) {
@@ -578,7 +652,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
     xmfFile = prepareXMFfile();
 
     /* Write the part corresponding to this specific output */
-    writeXMFheader(xmfFile, N_total, fileName, e->time);
+    writeXMFoutputheader(xmfFile, fileName, e->time);
 
     /* Open file */
     /* message("Opening file '%s'.", fileName); */
@@ -610,15 +684,24 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
     writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1);
 
     /* GADGET-2 legacy values */
-    numParticles[0] = (unsigned int)N_total;
-    writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6);
-    writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6);
-    numParticlesHighWord[0] = (unsigned int)(N_total >> 32);
+    /* Number of particles of each type */
+    unsigned int numParticles[NUM_PARTICLE_TYPES] = {0};
+    unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0};
+    for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) {
+      numParticles[ptype] = (unsigned int)N_total[ptype];
+      numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32);
+    }
+    writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total,
+                   NUM_PARTICLE_TYPES);
+    writeAttribute(h_grp, "NumPart_Total", UINT, numParticles,
+                   NUM_PARTICLE_TYPES);
     writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord,
-                   6);
+                   NUM_PARTICLE_TYPES);
     double MassTable[6] = {0., 0., 0., 0., 0., 0.};
-    writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6);
-    writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6);
+    writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES);
+    unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0};
+    writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy,
+                   NUM_PARTICLE_TYPES);
     writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1);
 
     /* Close header */
@@ -636,21 +719,32 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
     /* Print the system of Units */
     writeUnitSystem(h_file, us);
 
-    /* Create SPH particles group */
-    /* message("Writing particle arrays..."); */
-    h_grp =
-        H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    if (h_grp < 0) error("Error while creating particle group.\n");
+    /* Loop over all particle types */
+    for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
 
-    /* Close particle group */
-    H5Gclose(h_grp);
+      /* Don't do anything if no particle of this kind */
+      if (N_total[ptype] == 0) continue;
+
+      /* Open the particle group in the file */
+      char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
+      snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
+               ptype);
+      h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT,
+                        H5P_DEFAULT);
+      if (h_grp < 0) {
+        error("Error while creating particle group.\n");
+      }
+
+      /* Close particle group */
+      H5Gclose(h_grp);
+    }
 
     /* Close file */
     H5Fclose(h_file);
   }
 
   /* Now loop over ranks and write the data */
-  for (rank = 0; rank < mpi_size; ++rank) {
+  for (int rank = 0; rank < mpi_size; ++rank) {
 
     /* Is it this rank's turn to write ? */
     if (rank == mpi_rank) {
@@ -659,18 +753,65 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
       if (h_file < 0)
         error("Error while opening file '%s' on rank %d.", fileName, mpi_rank);
 
-      /* Open SPH particles group */
-      /* message("Reading particle arrays..."); */
-      h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT);
-      if (h_grp < 0)
-        error("Error while opening particle group on rank %d.\n", mpi_rank);
+      /* Loop over all particle types */
+      for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
 
-      /* Write particle fields from the particle structure */
-      hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank,
-                            offset, parts, us);
+        /* Don't do anything if no particle of this kind */
+        if (N_total[ptype] == 0) continue;
 
-      /* Close particle group */
-      H5Gclose(h_grp);
+        /* Add the global information for that particle type to the XMF
+         * meta-file */
+        if (mpi_rank == 0)
+          writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype);
+
+        /* Open the particle group in the file */
+        char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
+        snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
+                 ptype);
+        h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT);
+        if (h_grp < 0) {
+          error("Error while opening particle group %s.", partTypeGroupName);
+        }
+
+        /* Read particle fields into the particle structure */
+        switch (ptype) {
+
+          case GAS:
+            hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile,
+                                  N[ptype], N_total[ptype], mpi_rank,
+                                  offset[ptype], parts, us);
+
+            break;
+
+          case DM:
+            /* Allocate temporary array */
+            if (posix_memalign((void*)&dmparts, gpart_align,
+                               Ndm * sizeof(struct gpart)) != 0)
+              error("Error while allocating temporart memory for DM particles");
+            bzero(dmparts, Ndm * sizeof(struct gpart));
+
+            /* Collect the DM particles from gpart */
+            collect_dm_gparts(gparts, Ntot, dmparts, Ndm);
+
+            /* Write DM particles */
+            darkmatter_write_particles(h_grp, fileName, partTypeGroupName,
+                                       xmfFile, N[ptype], N_total[ptype],
+                                       mpi_rank, offset[ptype], dmparts, us);
+
+            /* Free temporary array */
+            free(dmparts);
+            break;
+
+          default:
+            error("Particle Type %d not yet supported. Aborting", ptype);
+        }
+
+        /* Close particle group */
+        H5Gclose(h_grp);
+
+        /* Close this particle group in the XMF file as well */
+        if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype);
+      }
 
       /* Close file */
       H5Fclose(h_file);
@@ -681,7 +822,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
   }
 
   /* Write footer of LXMF file descriptor */
-  if (mpi_rank == 0) writeXMFfooter(xmfFile);
+  if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time);
 
   /* message("Done writing particles..."); */
   ++outputCount;
diff --git a/src/serial_io.h b/src/serial_io.h
index 95f09f5977a97a359e978db7a1b71b02030d6a14..74ab8326dbeeb955e354687059cdd595657285f0 100644
--- a/src/serial_io.h
+++ b/src/serial_io.h
@@ -32,8 +32,9 @@
 #if defined(HAVE_HDF5) && defined(WITH_MPI) && !defined(HAVE_PARALLEL_HDF5)
 
 void read_ic_serial(char* fileName, double dim[3], struct part** parts,
-                    size_t* N, int* periodic, int mpi_rank, int mpi_size,
-                    MPI_Comm comm, MPI_Info info);
+                    struct gpart** gparts, size_t* Ngas, size_t* Ngparts,
+                    int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm,
+                    MPI_Info info, int dry_run);
 
 void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank,
                          int mpi_size, MPI_Comm comm, MPI_Info info);
diff --git a/src/single_io.c b/src/single_io.c
index 59686a68b5d9e5ea41267ba7b3aad9391862fae4..1dc71087e102ff884dba7b7d4b6dcd6339335cac 100644
--- a/src/single_io.c
+++ b/src/single_io.c
@@ -39,9 +39,6 @@
 #include "common_io.h"
 #include "error.h"
 
-#define FILENAME_BUFFER_SIZE 150
-#define PARTICLE_GROUP_BUFFER_SIZE 20
-
 /*-----------------------------------------------------------------------------
  * Routines reading an IC file
  *-----------------------------------------------------------------------------*/
@@ -56,24 +53,23 @@
  * @param dim The dimension of the data (1 for scalar, 3 for vector)
  * @param part_c A (char*) pointer on the first occurrence of the field of
  *interest in the parts array
+ * @param partSize The size in bytes of the particle structure.
  * @param importance If COMPULSORY, the data must be present in the IC file. If
  *OPTIONAL, the array will be zeroed when the data is not present.
  *
  * @todo A better version using HDF5 hyper-slabs to read the file directly into
  *the part array
  * will be written once the structures have been stabilized.
- *
- * Calls #error() if an error occurs.
  */
 void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
-                      int dim, char* part_c, enum DATA_IMPORTANCE importance) {
+                      int dim, char* part_c, size_t partSize,
+                      enum DATA_IMPORTANCE importance) {
   hid_t h_data = 0, h_err = 0, h_type = 0;
   htri_t exist = 0;
   void* temp;
   int i = 0;
   const size_t typeSize = sizeOfType(type);
   const size_t copySize = typeSize * dim;
-  const size_t partSize = sizeof(struct part);
   char* temp_c = 0;
 
   /* Check whether the dataspace exists or not */
@@ -141,23 +137,25 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N,
  * @param grp The group in which to write.
  * @param fileName The name of the file in which the data is written
  * @param xmfFile The FILE used to write the XMF description
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param name The name of the array to write.
  * @param type The #DATA_TYPE of the array.
  * @param N The number of particles to write.
  * @param dim The dimension of the data (1 for scalar, 3 for vector)
  * @param part_c A (char*) pointer on the first occurrence of the field of
- *interest in the parts array
+ *interest in the parts array.
+ * @param partSize The size in bytes of the particle structure.
  * @param us The UnitSystem currently in use
  * @param convFactor The UnitConversionFactor for this array
  *
  * @todo A better version using HDF5 hyper-slabs to write the file directly from
  *the part array
  * will be written once the structures have been stabilized.
- *
- * Calls #error() if an error occurs.
  */
-void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
-                       enum DATA_TYPE type, int N, int dim, char* part_c,
+void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile,
+                       char* partTypeGroupName, char* name, enum DATA_TYPE type,
+                       int N, int dim, char* part_c, size_t partSize,
                        struct UnitSystem* us,
                        enum UnitConversionFactor convFactor) {
   hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0;
@@ -165,7 +163,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
   int i = 0, rank = 0;
   const size_t typeSize = sizeOfType(type);
   const size_t copySize = typeSize * dim;
-  const size_t partSize = sizeof(struct part);
   char* temp_c = 0;
   hsize_t shape[2];
   hsize_t chunk_shape[2];
@@ -204,7 +201,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
 
   /* Make sure the chunks are not larger than the dataset */
   if (chunk_shape[0] > N) chunk_shape[0] = N;
-  
+
   /* Change shape of data space */
   h_err = H5Sset_extent_simple(h_space, rank, shape, NULL);
   if (h_err < 0) {
@@ -241,14 +238,14 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
   }
 
   /* Write XMF description for this data set */
-  writeXMFline(xmfFile, fileName, name, N, dim, type);
+  writeXMFline(xmfFile, fileName, partTypeGroupName, name, N, dim, type);
 
   /* Write unit conversion factors for this data set */
-  conversionString(buffer, us, convFactor);
+  units_conversion_string(buffer, us, convFactor);
   writeAttribute_d(h_data, "CGS conversion factor",
-                   conversionFactor(us, convFactor));
-  writeAttribute_f(h_data, "h-scale exponent", hFactor(us, convFactor));
-  writeAttribute_f(h_data, "a-scale exponent", aFactor(us, convFactor));
+                   units_conversion_factor(us, convFactor));
+  writeAttribute_f(h_data, "h-scale exponent", units_h_factor(us, convFactor));
+  writeAttribute_f(h_data, "a-scale exponent", units_a_factor(us, convFactor));
   writeAttribute_s(h_data, "Conversion factor", buffer);
 
   /* Free and close everything */
@@ -276,7 +273,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
 #define readArray(grp, name, type, N, dim, part, N_total, offset, field, \
                   importance)                                            \
   readArrayBackEnd(grp, name, type, N, dim, (char*)(&(part[0]).field),   \
-                   importance)
+                   sizeof(part[0]), importance)
 
 /**
  * @brief A helper macro to call the readArrayBackEnd function more easily.
@@ -285,6 +282,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param fileName The name of the file in which the data is written
  * @param xmfFile The FILE used to write the XMF description
  * @param name The name of the array to write.
+ * @param partTypeGroupName The name of the group containing the particles in
+ *the HDF5 file.
  * @param type The #DATA_TYPE of the array.
  * @param N The number of particles to write.
  * @param dim The dimension of the data (1 for scalar, 3 for vector)
@@ -298,10 +297,12 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param convFactor The UnitConversionFactor for this array
  *
  */
-#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \
-                   mpi_rank, offset, field, us, convFactor)                   \
-  writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim,               \
-                    (char*)(&(part[0]).field), us, convFactor)
+#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N,  \
+                   dim, part, N_total, mpi_rank, offset, field, us,           \
+                   convFactor)                                                \
+  writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \
+                    dim, (char*)(&(part[0]).field), sizeof(part[0]), us,      \
+                    convFactor)
 
 /* Import the right hydro definition */
 #include "hydro_io.h"
@@ -314,10 +315,11 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @param fileName The file to read.
  * @param dim (output) The dimension of the volume.
  * @param parts (output) Array of Gas particles.
- * @param gparts (output) Array of DM particles.
+ * @param gparts (output) Array of #gpart particles.
  * @param Ngas (output) number of Gas particles read.
- * @param Ngparts (output) The number of DM particles read.
+ * @param Ngparts (output) The number of #gpart read.
  * @param periodic (output) 1 if the volume is periodic, 0 if not.
+ * @param dry_run If 1, don't read the particle. Only allocates the arrays.
  *
  * Opens the HDF5 file fileName and reads the particles contained
  * in the parts array. N is the returned number of particles found
@@ -326,17 +328,17 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name,
  * @warning Can not read snapshot distributed over more than 1 file !!!
  * @todo Read snapshots distributed in more than one file.
  *
- * Calls #error() if an error occurs.
- *
  */
 void read_ic_single(char* fileName, double dim[3], struct part** parts,
                     struct gpart** gparts, size_t* Ngas, size_t* Ngparts,
-                    int* periodic) {
+                    int* periodic, int dry_run) {
   hid_t h_file = 0, h_grp = 0;
   /* GADGET has only cubic boxes (in cosmological mode) */
   double boxSize[3] = {0.0, -1.0, -1.0};
   /* GADGET has 6 particle types. We only keep the type 0 & 1 for now...*/
   int numParticles[NUM_PARTICLE_TYPES] = {0};
+  int numParticles_highWord[NUM_PARTICLE_TYPES] = {0};
+  size_t N[NUM_PARTICLE_TYPES] = {0};
   size_t Ndm;
 
   /* Open file */
@@ -365,9 +367,12 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts,
   /* Read the relevant information and print status */
   readAttribute(h_grp, "BoxSize", DOUBLE, boxSize);
   readAttribute(h_grp, "NumPart_Total", UINT, numParticles);
+  readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord);
+
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype)
+    N[ptype] = ((long long)numParticles[ptype]) +
+               ((long long)numParticles_highWord[ptype] << 32);
 
-  *Ngas = numParticles[0];
-  Ndm = numParticles[1];
   dim[0] = boxSize[0];
   dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
   dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];
@@ -378,16 +383,16 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts,
   /* Close header */
   H5Gclose(h_grp);
 
-  /* Total number of particles */
-  *Ngparts = *Ngas + Ndm;
-
   /* Allocate memory to store SPH particles */
+  *Ngas = N[0];
   if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) !=
       0)
     error("Error while allocating memory for SPH particles");
   bzero(*parts, *Ngas * sizeof(struct part));
 
   /* Allocate memory to store all particles */
+  Ndm = N[1];
+  *Ngparts = N[1] + N[0];
   if (posix_memalign((void*)gparts, gpart_align,
                      *Ngparts * sizeof(struct gpart)) != 0)
     error("Error while allocating memory for gravity particles");
@@ -396,16 +401,14 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts,
   /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) /
    * (1024.*1024.)); */
 
-  /* Open SPH particles group */
-  /* message("Reading particle arrays..."); */
-  message("BoxSize = %lf", dim[0]);
-  message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts);
+  /* message("BoxSize = %lf", dim[0]); */
+  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */
 
   /* Loop over all particle types */
   for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) {
 
     /* Don't do anything if no particle of this kind */
-    if (numParticles[ptype] == 0) continue;
+    if (N[ptype] == 0) continue;
 
     /* Open the particle group in the file */
     char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
@@ -422,11 +425,11 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts,
     switch (ptype) {
 
       case GAS:
-        hydro_read_particles(h_grp, *Ngas, *Ngas, 0, *parts);
+        if (!dry_run) hydro_read_particles(h_grp, *Ngas, *Ngas, 0, *parts);
         break;
 
       case DM:
-        darkmatter_read_particles(h_grp, Ndm, Ndm, 0, *gparts);
+        if (!dry_run) darkmatter_read_particles(h_grp, Ndm, Ndm, 0, *gparts);
         break;
 
       default:
@@ -438,10 +441,10 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts,
   }
 
   /* Prepare the DM particles */
-  prepare_dm_gparts(*gparts, Ndm);
+  if (!dry_run) prepare_dm_gparts(*gparts, Ndm);
 
   /* Now duplicate the hydro particle into gparts */
-  duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
+  if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
 
   /* message("Done Reading particles..."); */
 
@@ -476,10 +479,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
   static int outputCount = 0;
 
   /* Number of particles of each type */
-  const size_t Ndm = Ntot - Ngas;
-  int numParticles[NUM_PARTICLE_TYPES] = /* Gadget-2 convention here */
-      {Ngas, Ndm, 0};                    /* Could use size_t instead */
-  int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0};
+  // const size_t Ndm = Ntot - Ngas;
+
+  /* MATTHIEU: Temporary fix to preserve master */
+  const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0;
+  /* MATTHIEU: End temporary fix */
+
+  long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0};
 
   /* File name */
   char fileName[FILENAME_BUFFER_SIZE];
@@ -493,7 +499,7 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
   xmfFile = prepareXMFfile();
 
   /* Write the part corresponding to this specific output */
-  writeXMFheader(xmfFile, Ngas, fileName, e->time);
+  writeXMFoutputheader(xmfFile, fileName, e->time);
 
   /* Open file */
   /* message("Opening file '%s'.", fileName); */
@@ -521,19 +527,27 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
 
   /* Print the relevant information and print status */
   writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
-  writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles,
-                 NUM_PARTICLE_TYPES);
   double dblTime = e->time;
   writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1);
 
   /* GADGET-2 legacy values */
+  /* Number of particles of each type */
+  unsigned int numParticles[NUM_PARTICLE_TYPES] = {0};
+  unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0};
+  for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) {
+    numParticles[ptype] = (unsigned int)N_total[ptype];
+    numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32);
+  }
+  writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total,
+                 NUM_PARTICLE_TYPES);
   writeAttribute(h_grp, "NumPart_Total", UINT, numParticles,
                  NUM_PARTICLE_TYPES);
   writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord,
                  NUM_PARTICLE_TYPES);
-  double MassTable[NUM_PARTICLE_TYPES] = {0., 0., 0., 0., 0., 0.};
+  double MassTable[NUM_PARTICLE_TYPES] = {0};
   writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES);
-  writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, numParticlesHighWord,
+  unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0};
+  writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy,
                  NUM_PARTICLE_TYPES);
   writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1);
 
@@ -558,6 +572,9 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
     /* Don't do anything if no particle of this kind */
     if (numParticles[ptype] == 0) continue;
 
+    /* Add the global information for that particle type to the XMF meta-file */
+    writeXMFgroupheader(xmfFile, fileName, numParticles[ptype], ptype);
+
     /* Open the particle group in the file */
     char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
     snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
@@ -574,8 +591,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
     switch (ptype) {
 
       case GAS:
-        hydro_write_particles(h_grp, fileName, xmfFile, Ngas, Ngas, 0, 0, parts,
-                              us);
+        hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, Ngas,
+                              Ngas, 0, 0, parts, us);
         break;
 
       case DM:
@@ -589,8 +606,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
         collect_dm_gparts(gparts, Ntot, dmparts, Ndm);
 
         /* Write DM particles */
-        darkmatter_write_particles(h_grp, fileName, xmfFile, Ndm, Ndm, 0, 0,
-                                   dmparts, us);
+        darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile,
+                                   Ndm, Ndm, 0, 0, dmparts, us);
 
         /* Free temporary array */
         free(dmparts);
@@ -602,10 +619,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) {
 
     /* Close particle group */
     H5Gclose(h_grp);
+
+    /* Close this particle group in the XMF file as well */
+    writeXMFgroupfooter(xmfFile, ptype);
   }
 
   /* Write LXMF file descriptor */
-  writeXMFfooter(xmfFile);
+  writeXMFoutputfooter(xmfFile, outputCount, e->time);
 
   /* message("Done writing particles..."); */
 
diff --git a/src/single_io.h b/src/single_io.h
index c5250280e82e1801b2a4a6136d404d09093dd0ec..587ebe07b6fa2b984b964baf282e7ceb1003ad29 100644
--- a/src/single_io.h
+++ b/src/single_io.h
@@ -28,7 +28,7 @@
 
 void read_ic_single(char* fileName, double dim[3], struct part** parts,
                     struct gpart** gparts, size_t* Ngas, size_t* Ndm,
-                    int* periodic);
+                    int* periodic, int dry_run);
 
 void write_output_single(struct engine* e, struct UnitSystem* us);
 
diff --git a/src/space.c b/src/space.c
index 62cc292588f7f57f0c91e2d8351ff8c5ee17a81a..17b1c72980c3e3343d4713c3088e1de072eacc3e 100644
--- a/src/space.c
+++ b/src/space.c
@@ -40,7 +40,7 @@
 #include "atomic.h"
 #include "engine.h"
 #include "error.h"
-#include "kernel.h"
+#include "kernel_hydro.h"
 #include "lock.h"
 #include "minmax.h"
 #include "runner.h"
@@ -99,12 +99,10 @@ const int sortlistID[27] = {
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift) {
 
-  int k, sid = 0, periodic = s->periodic;
-  struct cell *temp;
-  double dx[3];
-
   /* Get the relative distance between the pairs, wrapping. */
-  for (k = 0; k < 3; k++) {
+  const int periodic = s->periodic;
+  double dx[3];
+  for (int k = 0; k < 3; k++) {
     dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
     if (periodic && dx[k] < -s->dim[k] / 2)
       shift[k] = s->dim[k];
@@ -116,15 +114,16 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
   }
 
   /* Get the sorting index. */
-  for (k = 0; k < 3; k++)
+  int sid = 0;
+  for (int k = 0; k < 3; k++)
     sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));
 
   /* Switch the cells around? */
   if (runner_flip[sid]) {
-    temp = *ci;
+    struct cell *temp = *ci;
     *ci = *cj;
     *cj = temp;
-    for (k = 0; k < 3; k++) shift[k] = -shift[k];
+    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
   }
   sid = sortlistID[sid];
 
@@ -139,10 +138,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
 
 void space_rebuild_recycle(struct space *s, struct cell *c) {
 
-  int k;
-
   if (c->split)
-    for (k = 0; k < 8; k++)
+    for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL) {
         space_rebuild_recycle(s, c->progeny[k]);
         space_recycle(s, c->progeny[k]);
@@ -160,19 +157,19 @@ void space_rebuild_recycle(struct space *s, struct cell *c) {
 
 void space_regrid(struct space *s, double cell_max, int verbose) {
 
-  float h_max = s->cell_min / kernel_gamma / space_stretch, dmin;
-  int i, j, k, cdim[3], nr_parts = s->nr_parts;
+  float h_max = s->cell_min / kernel_gamma / space_stretch;
+  const size_t nr_parts = s->nr_parts;
   struct cell *restrict c;
   ticks tic = getticks();
 
   /* Run through the parts and get the current h_max. */
   // tic = getticks();
   if (s->cells != NULL) {
-    for (k = 0; k < s->nr_cells; k++) {
+    for (int k = 0; k < s->nr_cells; k++) {
       if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max;
     }
   } else {
-    for (k = 0; k < nr_parts; k++) {
+    for (int k = 0; k < nr_parts; k++) {
       if (s->parts[k].h > h_max) h_max = s->parts[k].h;
     }
     s->h_max = h_max;
@@ -192,7 +189,8 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
   if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
 
   /* Get the new putative cell dimensions. */
-  for (k = 0; k < 3; k++)
+  int cdim[3];
+  for (int k = 0; k < 3; k++)
     cdim[k] =
         floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max));
 
@@ -242,7 +240,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
     /* Free the old cells, if they were allocated. */
     if (s->cells != NULL) {
-      for (k = 0; k < s->nr_cells; k++) {
+      for (int k = 0; k < s->nr_cells; k++) {
         space_rebuild_recycle(s, &s->cells[k]);
         if (s->cells[k].sort != NULL) free(s->cells[k].sort);
       }
@@ -251,12 +249,12 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
     }
 
     /* Set the new cell dimensions only if smaller. */
-    for (k = 0; k < 3; k++) {
+    for (int k = 0; k < 3; k++) {
       s->cdim[k] = cdim[k];
       s->h[k] = s->dim[k] / cdim[k];
       s->ih[k] = 1.0 / s->h[k];
     }
-    dmin = fminf(s->h[0], fminf(s->h[1], s->h[2]));
+    const float dmin = fminf(s->h[0], fminf(s->h[1], s->h[2]));
 
     /* Allocate the highest level of cells. */
     s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
@@ -264,13 +262,13 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
                        s->nr_cells * sizeof(struct cell)) != 0)
       error("Failed to allocate cells.");
     bzero(s->cells, s->nr_cells * sizeof(struct cell));
-    for (k = 0; k < s->nr_cells; k++)
+    for (int k = 0; k < s->nr_cells; k++)
       if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock.");
 
     /* Set the cell location and sizes. */
-    for (i = 0; i < cdim[0]; i++)
-      for (j = 0; j < cdim[1]; j++)
-        for (k = 0; k < cdim[2]; k++) {
+    for (int i = 0; i < cdim[0]; i++)
+      for (int j = 0; j < cdim[1]; j++)
+        for (int k = 0; k < cdim[2]; k++) {
           c = &s->cells[cell_getid(cdim, i, j, k)];
           c->loc[0] = i * s->h[0];
           c->loc[1] = j * s->h[1];
@@ -333,7 +331,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
   else {
 
     /* Free the old cells, if they were allocated. */
-    for (k = 0; k < s->nr_cells; k++) {
+    for (int k = 0; k < s->nr_cells; k++) {
       space_rebuild_recycle(s, &s->cells[k]);
       s->cells[k].sorts = NULL;
       s->cells[k].nr_tasks = 0;
@@ -370,7 +368,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
 
 void space_rebuild(struct space *s, double cell_max, int verbose) {
 
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   /* Be verbose about this. */
   // message( "re)building space..." ); fflush(stdout);
@@ -382,23 +380,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   int nr_gparts = s->nr_gparts;
   struct cell *restrict cells = s->cells;
 
-  double ih[3], dim[3];
-  int cdim[3];
-  ih[0] = s->ih[0];
-  ih[1] = s->ih[1];
-  ih[2] = s->ih[2];
-  dim[0] = s->dim[0];
-  dim[1] = s->dim[1];
-  dim[2] = s->dim[2];
-  cdim[0] = s->cdim[0];
-  cdim[1] = s->cdim[1];
-  cdim[2] = s->cdim[2];
+  const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]};
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
 
   /* Run through the particles and get their cell index. */
   // tic = getticks();
   const size_t ind_size = s->size_parts;
-  size_t *ind;
-  if ((ind = (size_t *)malloc(sizeof(size_t) * ind_size)) == NULL)
+  int *ind;
+  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
     error("Failed to allocate temporary particle indices.");
   for (int k = 0; k < nr_parts; k++) {
     struct part *restrict p = &s->parts[k];
@@ -411,37 +401,91 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
     cells[ind[k]].count++;
   }
+  // message( "getting particle indices took %.3f %s." ,
+  // clocks_from_ticks(getticks() - tic), clocks_getunit()):
+
+  /* Run through the gravity particles and get their cell index. */
+  // tic = getticks();
+  const size_t gind_size = s->size_gparts;
+  int *gind;
+  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
+    error("Failed to allocate temporary g-particle indices.");
+  for (int k = 0; k < nr_gparts; k++) {
+    struct gpart *restrict gp = &s->gparts[k];
+    for (int j = 0; j < 3; j++)
+      if (gp->x[j] < 0.0)
+        gp->x[j] += dim[j];
+      else if (gp->x[j] >= dim[j])
+        gp->x[j] -= dim[j];
+    gind[k] =
+        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
+    cells[gind[k]].gcount++;
+  }
 // message( "getting particle indices took %.3f %s." ,
-// clocks_from_ticks(getticks() - tic), clocks_getunit()):
+// clocks_from_ticks(getticks() - tic), clocks_getunit());
 
 #ifdef WITH_MPI
   /* Move non-local parts to the end of the list. */
-  const int nodeID = s->e->nodeID;
+  const int local_nodeID = s->e->nodeID;
   for (int k = 0; k < nr_parts; k++)
-    if (cells[ind[k]].nodeID != nodeID) {
+    if (cells[ind[k]].nodeID != local_nodeID) {
       cells[ind[k]].count -= 1;
       nr_parts -= 1;
-      struct part tp = s->parts[k];
+      const struct part tp = s->parts[k];
       s->parts[k] = s->parts[nr_parts];
       s->parts[nr_parts] = tp;
-      struct xpart txp = s->xparts[k];
+      if (s->parts[k].gpart != NULL) {
+        s->parts[k].gpart->part = &s->parts[k];
+      }
+      if (s->parts[nr_parts].gpart != NULL) {
+        s->parts[nr_parts].gpart->part = &s->parts[nr_parts];
+      }
+      const struct xpart txp = s->xparts[k];
       s->xparts[k] = s->xparts[nr_parts];
       s->xparts[nr_parts] = txp;
-      int t = ind[k];
+      const int t = ind[k];
       ind[k] = ind[nr_parts];
       ind[nr_parts] = t;
     }
 
+  /* Move non-local gparts to the end of the list. */
+  for (int k = 0; k < nr_gparts; k++)
+    if (cells[gind[k]].nodeID != local_nodeID) {
+      cells[gind[k]].gcount -= 1;
+      nr_gparts -= 1;
+      const struct gpart tp = s->gparts[k];
+      s->gparts[k] = s->gparts[nr_gparts];
+      s->gparts[nr_gparts] = tp;
+      if (s->gparts[k].id > 0) {
+        s->gparts[k].part->gpart = &s->gparts[k];
+      }
+      if (s->gparts[nr_gparts].id > 0) {
+        s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts];
+      }
+      const int t = gind[k];
+      gind[k] = gind[nr_gparts];
+      gind[nr_gparts] = t;
+    }
+
   /* Exchange the strays, note that this potentially re-allocates
      the parts arrays. */
-  s->nr_parts =
-      nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts],
-                                        s->nr_parts - nr_parts);
+  /* TODO: This function also exchanges gparts, but this is shorted-out
+     until they are fully implemented. */
+  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
+  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
+  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
+                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);
+
+  /* Add post-processing, i.e. re-linking/creating of gparts here. */
+
+  /* Set the new particle counts. */
+  s->nr_parts = nr_parts + nr_parts_exchanged;
+  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
 
   /* Re-allocate the index array if needed.. */
   if (s->nr_parts > ind_size) {
-    size_t *ind_new;
-    if ((ind_new = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL)
+    int *ind_new;
+    if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
       error("Failed to allocate temporary particle indices.");
     memcpy(ind_new, ind, sizeof(size_t) * nr_parts);
     free(ind);
@@ -450,7 +494,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 
   /* Assign each particle to its cell. */
   for (int k = nr_parts; k < s->nr_parts; k++) {
-    struct part *p = &s->parts[k];
+    const struct part *const p = &s->parts[k];
     ind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
     cells[ind[k]].count += 1;
@@ -481,65 +525,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* We no longer need the indices as of here. */
   free(ind);
 
-  /* Run through the gravity particles and get their cell index. */
-  // tic = getticks();
-  const size_t gind_size = s->size_gparts;
-  size_t *gind;
-  if ((gind = (size_t *)malloc(sizeof(size_t) * gind_size)) == NULL)
-    error("Failed to allocate temporary g-particle indices.");
-  for (int k = 0; k < nr_gparts; k++) {
-    struct gpart *gp = &s->gparts[k];
-    for (int j = 0; j < 3; j++)
-      if (gp->x[j] < 0.0)
-        gp->x[j] += dim[j];
-      else if (gp->x[j] >= dim[j])
-        gp->x[j] -= dim[j];
-    gind[k] =
-        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    cells[gind[k]].gcount++;
-  }
-// message( "getting particle indices took %.3f %s." ,
-// clocks_from_ticks(getticks() - tic), clocks_getunit());
-
 #ifdef WITH_MPI
 
-  /* Move non-local gparts to the end of the list. */
-  for (int k = 0; k < nr_gparts; k++)
-    if (cells[ind[k]].nodeID != nodeID) {
-      cells[ind[k]].gcount -= 1;
-      nr_gparts -= 1;
-      struct gpart tp = s->gparts[k];
-      s->gparts[k] = s->gparts[nr_gparts];
-      s->gparts[nr_gparts] = tp;
-      int t = ind[k];
-      ind[k] = ind[nr_gparts];
-      ind[nr_gparts] = t;
-    }
-
-  /* Exchange the strays, note that this potentially re-allocates
-     the parts arrays. */
-  // s->nr_gparts =
-  //    nr_gparts + engine_exchange_strays(s->e, nr_gparts, &ind[nr_gparts],
-  //                                        s->nr_gparts - nr_gparts);
-  if (nr_gparts > 0)
-    error("Need to implement the exchange of strays for the gparts");
-
   /* Re-allocate the index array if needed.. */
   if (s->nr_gparts > gind_size) {
-    size_t *gind_new;
-    if ((gind_new = (size_t *)malloc(sizeof(size_t) * s->nr_gparts)) == NULL)
+    int *gind_new;
+    if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
       error("Failed to allocate temporary g-particle indices.");
-    memcpy(gind_new, gind, sizeof(size_t) * nr_gparts);
+    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
     free(gind);
     gind = gind_new;
   }
 
   /* Assign each particle to its cell. */
   for (int k = nr_gparts; k < s->nr_gparts; k++) {
-    struct gpart *p = &s->gparts[k];
+    const struct gpart *const p = &s->gparts[k];
     gind[k] =
         cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells[gind[k]].count += 1;
+    cells[gind[k]].gcount += 1;
     /* if ( cells[ ind[k] ].nodeID != nodeID )
         error( "Received part that does not belong to me (nodeID=%i)." , cells[
        ind[k] ].nodeID ); */
@@ -549,7 +552,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
 #endif
 
   /* Sort the parts according to their cells. */
-  space_gparts_sort(s->gparts, gind, nr_gparts, 0, s->nr_cells - 1);
+  space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
 
   /* Re-link the parts. */
   for (int k = 0; k < nr_gparts; k++)
@@ -558,6 +561,28 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   /* We no longer need the indices as of here. */
   free(gind);
 
+  /* Verify that the links are correct */
+  /* MATTHIEU: To be commented out once we are happy */
+  for (size_t k = 0; k < nr_gparts; ++k) {
+
+    if (s->gparts[k].id > 0) {
+
+      if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !");
+
+      if (s->gparts[k].x[0] != s->gparts[k].part->x[0] ||
+          s->gparts[k].x[1] != s->gparts[k].part->x[1] ||
+          s->gparts[k].x[2] != s->gparts[k].part->x[2])
+        error("Linked particles are not at the same position !");
+    }
+  }
+  for (size_t k = 0; k < nr_parts; ++k) {
+
+    if (s->parts[k].gpart != NULL) {
+
+      if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !");
+    }
+  }
+
   /* Hook the cells up to the parts. */
   // tic = getticks();
   struct part *finger = s->parts;
@@ -593,7 +618,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
  */
 void space_split(struct space *s, struct cell *cells, int verbose) {
 
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   for (int k = 0; k < s->nr_cells; k++)
     scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k,
@@ -617,10 +642,10 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
  * @param verbose Are we talkative ?
  */
 
-void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
+void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose) {
 
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   /*Populate the global parallel_sort structure with the input data */
   space_sort_struct.parts = s->parts;
@@ -644,7 +669,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
   space_sort_struct.waiting = 1;
 
   /* Launch the sorting tasks. */
-  engine_launch(s->e, s->e->nr_threads, (1 << task_type_psort), 0);
+  engine_launch(s->e, s->e->nr_threads, (1 << task_type_part_sort), 0);
 
   /* Verify space_sort_struct. */
   /* for (int i = 1; i < N; i++)
@@ -665,7 +690,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
 void space_do_parts_sort() {
 
   /* Pointers to the sorting data. */
-  size_t *ind = space_sort_struct.ind;
+  int *ind = space_sort_struct.ind;
   struct part *parts = space_sort_struct.parts;
   struct xpart *xparts = space_sort_struct.xparts;
 
@@ -787,103 +812,140 @@ void space_do_parts_sort() {
   } /* main loop. */
 }
 
-void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
-                       int max) {
-
-  struct qstack {
-    volatile size_t i, j;
-    volatile int min, max;
-    volatile int ready;
-  };
-  struct qstack *qstack;
-  int qstack_size = 2 * (max - min) + 10;
-  volatile unsigned int first, last, waiting;
-
-  int pivot;
-  ptrdiff_t i, ii, j, jj, temp_i;
-  int qid;
-  struct gpart temp_p;
-
-  /* for ( int k = 0 ; k < N ; k++ )
-      if ( ind[k] > max || ind[k] < min )
-          error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */
-
-  /* Allocate the stack. */
-  if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL)
-    error("Failed to allocate qstack.");
-
-  /* Init the interval stack. */
-  qstack[0].i = 0;
-  qstack[0].j = N - 1;
-  qstack[0].min = min;
-  qstack[0].max = max;
-  qstack[0].ready = 1;
-  for (i = 1; i < qstack_size; i++) qstack[i].ready = 0;
-  first = 0;
-  last = 1;
-  waiting = 1;
+/**
+ * @brief Sort the g-particles and condensed particles according to the given
+ *indices.
+ *
+ * @param s The #space.
+ * @param ind The indices with respect to which the gparts are sorted.
+ * @param N The number of gparts
+ * @param min Lowest index.
+ * @param max highest index.
+ * @param verbose Are we talkative ?
+ */
+void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
+                       int verbose) {
+
+  const ticks tic = getticks();
+
+  /*Populate the global parallel_sort structure with the input data */
+  space_sort_struct.gparts = s->gparts;
+  space_sort_struct.ind = ind;
+  space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
+  if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
+                                        space_sort_struct.stack_size)) == NULL)
+    error("Failed to allocate sorting stack.");
+  for (int i = 0; i < space_sort_struct.stack_size; i++)
+    space_sort_struct.stack[i].ready = 0;
+
+  /* Add the first interval. */
+  space_sort_struct.stack[0].i = 0;
+  space_sort_struct.stack[0].j = N - 1;
+  space_sort_struct.stack[0].min = min;
+  space_sort_struct.stack[0].max = max;
+  space_sort_struct.stack[0].ready = 1;
+  space_sort_struct.first = 0;
+  space_sort_struct.last = 1;
+  space_sort_struct.waiting = 1;
+
+  /* Launch the sorting tasks. */
+  engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0);
+
+  /* Verify space_sort_struct. */
+  /* for (int i = 1; i < N; i++)
+    if (ind[i - 1] > ind[i])
+      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
+  ind[i - 1], i,
+            ind[i], min, max);
+  message("Sorting succeeded."); */
+
+  /* Clean up. */
+  free(space_sort_struct.stack);
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+void space_do_gparts_sort() {
+
+  /* Pointers to the sorting data. */
+  int *ind = space_sort_struct.ind;
+  struct gpart *gparts = space_sort_struct.gparts;
 
   /* Main loop. */
-  while (waiting > 0) {
+  while (space_sort_struct.waiting) {
 
     /* Grab an interval off the queue. */
-    qid = (first++) % qstack_size;
+    int qid =
+        atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
+
+    /* Wait for the entry to be ready, or for the sorting do be done. */
+    while (!space_sort_struct.stack[qid].ready)
+      if (!space_sort_struct.waiting) return;
 
     /* Get the stack entry. */
-    i = qstack[qid].i;
-    j = qstack[qid].j;
-    min = qstack[qid].min;
-    max = qstack[qid].max;
-    qstack[qid].ready = 0;
+    ptrdiff_t i = space_sort_struct.stack[qid].i;
+    ptrdiff_t j = space_sort_struct.stack[qid].j;
+    int min = space_sort_struct.stack[qid].min;
+    int max = space_sort_struct.stack[qid].max;
+    space_sort_struct.stack[qid].ready = 0;
 
     /* Loop over sub-intervals. */
     while (1) {
 
       /* Bring beer. */
-      pivot = (min + max) / 2;
+      const int pivot = (min + max) / 2;
+      /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
+              i, j, min, max, pivot); */
 
       /* One pass of QuickSort's partitioning. */
-      ii = i;
-      jj = j;
+      ptrdiff_t ii = i;
+      ptrdiff_t jj = j;
       while (ii < jj) {
         while (ii <= j && ind[ii] <= pivot) ii++;
         while (jj >= i && ind[jj] > pivot) jj--;
         if (ii < jj) {
-          temp_i = ind[ii];
+          size_t temp_i = ind[ii];
           ind[ii] = ind[jj];
           ind[jj] = temp_i;
-          temp_p = gparts[ii];
+          struct gpart temp_p = gparts[ii];
           gparts[ii] = gparts[jj];
           gparts[jj] = temp_p;
         }
       }
 
       /* Verify space_sort_struct. */
-      /* for ( int k = i ; k <= jj ; k++ )
-         if ( ind[k] > pivot ) {
-         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
-         N=%i." , k , ind[k] , pivot , i , j , N );
-         error( "Partition failed (<=pivot)." );
-         }
-         for ( int k = jj+1 ; k <= j ; k++ )
-         if ( ind[k] <= pivot ) {
-         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
-         N=%i." , k , ind[k] , pivot , i , j , N );
-         error( "Partition failed (>pivot)." );
-         } */
+      /* for (int k = i; k <= jj; k++)
+        if (ind[k] > pivot) {
+          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
+                  ind[k], pivot, i, j);
+          error("Partition failed (<=pivot).");
+        }
+      for (int k = jj + 1; k <= j; k++)
+        if (ind[k] <= pivot) {
+          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
+                  ind[k], pivot, i, j);
+          error("Partition failed (>pivot).");
+        } */
 
       /* Split-off largest interval. */
       if (jj - i > j - jj + 1) {
 
         /* Recurse on the left? */
         if (jj > i && pivot > min) {
-          qid = (last++) % qstack_size;
-          qstack[qid].i = i;
-          qstack[qid].j = jj;
-          qstack[qid].min = min;
-          qstack[qid].max = pivot;
-          qstack[qid].ready = 1;
-          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+          qid = atomic_inc(&space_sort_struct.last) %
+                space_sort_struct.stack_size;
+          while (space_sort_struct.stack[qid].ready)
+            ;
+          space_sort_struct.stack[qid].i = i;
+          space_sort_struct.stack[qid].j = jj;
+          space_sort_struct.stack[qid].min = min;
+          space_sort_struct.stack[qid].max = pivot;
+          if (atomic_inc(&space_sort_struct.waiting) >=
+              space_sort_struct.stack_size)
+            error("Qstack overflow.");
+          space_sort_struct.stack[qid].ready = 1;
         }
 
         /* Recurse on the right? */
@@ -897,13 +959,18 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
 
         /* Recurse on the right? */
         if (pivot + 1 < max) {
-          qid = (last++) % qstack_size;
-          qstack[qid].i = jj + 1;
-          qstack[qid].j = j;
-          qstack[qid].min = pivot + 1;
-          qstack[qid].max = max;
-          qstack[qid].ready = 1;
-          if ((waiting++) >= qstack_size) error("Qstack overflow.");
+          qid = atomic_inc(&space_sort_struct.last) %
+                space_sort_struct.stack_size;
+          while (space_sort_struct.stack[qid].ready)
+            ;
+          space_sort_struct.stack[qid].i = jj + 1;
+          space_sort_struct.stack[qid].j = j;
+          space_sort_struct.stack[qid].min = pivot + 1;
+          space_sort_struct.stack[qid].max = max;
+          if (atomic_inc(&space_sort_struct.waiting) >=
+              space_sort_struct.stack_size)
+            error("Qstack overflow.");
+          space_sort_struct.stack[qid].ready = 1;
         }
 
         /* Recurse on the left? */
@@ -916,18 +983,9 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
 
     } /* loop over sub-intervals. */
 
-    waiting--;
+    atomic_dec(&space_sort_struct.waiting);
 
   } /* main loop. */
-
-  /* Verify space_sort_struct. */
-  /* for ( i = 1 ; i < N ; i++ )
-      if ( ind[i-1] > ind[i] )
-          error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
-     , ind[i] ); */
-
-  /* Clean up. */
-  free(qstack);
 }
 
 /**
@@ -1299,14 +1357,15 @@ struct cell *space_getcell(struct space *s) {
  * @brief Split the space into cells given the array of particles.
  *
  * @param s The #space to initialize.
+ * @param params The parsed parameter file.
  * @param dim Spatial dimensions of the domain.
  * @param parts Array of Gas particles.
  * @param gparts Array of Gravity particles.
- * @param Ngas The number of Gas particles in the space.
+ * @param Npart The number of Gas particles in the space.
  * @param Ngpart The number of Gravity particles in the space.
  * @param periodic flag whether the domain is periodic or not.
- * @param h_max The maximal interaction radius.
  * @param verbose Print messages to stdout or not
+ * @param dry_run If 1, just initialise stuff, don't do anything with the parts.
  *
  * Makes a grid of edge length > r_max and fills the particles
  * into the respective cells. Cells containing more than #space_splitsize
@@ -1314,65 +1373,114 @@ struct cell *space_getcell(struct space *s) {
  * recursively.
  */
 
-void space_init(struct space *s, double dim[3], struct part *parts,
-                struct gpart *gparts, size_t Ngas, size_t Ngpart, int periodic,
-                double h_max, int verbose) {
+void space_init(struct space *s, const struct swift_params *params,
+                double dim[3], struct part *parts, struct gpart *gparts,
+                size_t Npart, size_t Ngpart, int periodic, int verbose,
+                int dry_run) {
+
+  /* Clean-up everything */
+  bzero(s, sizeof(struct space));
 
   /* Store everything in the space. */
   s->dim[0] = dim[0];
   s->dim[1] = dim[1];
   s->dim[2] = dim[2];
   s->periodic = periodic;
-  s->nr_parts = Ngas;
-  s->size_parts = Ngas;
+  s->nr_parts = Npart;
+  s->size_parts = Npart;
   s->parts = parts;
   s->nr_gparts = Ngpart;
   s->size_gparts = Ngpart;
   s->gparts = gparts;
-  s->cell_min = h_max;
-  s->nr_queues = 1;
+  s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length");
+  s->nr_queues = 1; /* Temporary value until engine construction */
   s->size_parts_foreign = 0;
 
-  /* Check that all the gas particle positions are reasonable, wrap if periodic.
-   */
-  if (periodic) {
-    for (int k = 0; k < Ngas; k++)
-      for (int j = 0; j < 3; j++) {
-        while (parts[k].x[j] < 0) parts[k].x[j] += dim[j];
-        while (parts[k].x[j] >= dim[j]) parts[k].x[j] -= dim[j];
-      }
-  } else {
-    for (int k = 0; k < Ngas; k++)
-      for (int j = 0; j < 3; j++)
-        if (parts[k].x[j] < 0 || parts[k].x[j] >= dim[j])
-          error("Not all particles are within the specified domain.");
+  /* Get the constants for the scheduler */
+  space_maxsize = parser_get_param_int(params, "Scheduler:cell_max_size");
+  space_subsize = parser_get_param_int(params, "Scheduler:cell_sub_size");
+  space_splitsize = parser_get_param_int(params, "Scheduler:cell_split_size");
+  if(verbose)
+    message("max_size set to %d, sub_size set to %d, split_size set to %d",
+	    space_maxsize, space_subsize, space_splitsize);
+
+  /* Check that we have enough cells */
+  if (s->cell_min * 3 > dim[0] || s->cell_min * 3 > dim[1] ||
+      s->cell_min * 3 > dim[2])
+    error(
+        "Maximal smoothing length (%e) too large. Needs to be "
+        "smaller than 1/3 the simulation box size [%e %e %e]",
+        s->cell_min, dim[0], dim[1], dim[2]);
+
+  /* Apply h scaling */
+  const double scaling =
+      parser_get_param_double(params, "InitialConditions:h_scaling");
+  if (scaling != 1.0 && !dry_run) {
+    message("Re-scaling smoothing lengths by a factor %e", scaling);
+    for (size_t k = 0; k < Npart; k++) parts[k].h *= scaling;
   }
 
-  /* Same for the gparts */
-  if (periodic) {
-    for (int k = 0; k < Ngpart; k++)
-      for (int j = 0; j < 3; j++) {
-        while (gparts[k].x[j] < 0) gparts[k].x[j] += dim[j];
-        while (gparts[k].x[j] >= dim[j]) gparts[k].x[j] -= dim[j];
-      }
-  } else {
-    for (int k = 0; k < Ngpart; k++)
-      for (int j = 0; j < 3; j++)
-        if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j])
-          error("Not all particles are within the specified domain.");
+  /* Apply shift */
+  double shift[3] = {0.0, 0.0, 0.0};
+  shift[0] = parser_get_param_double(params, "InitialConditions:shift_x");
+  shift[1] = parser_get_param_double(params, "InitialConditions:shift_y");
+  shift[2] = parser_get_param_double(params, "InitialConditions:shift_z");
+  if ((shift[0] != 0 || shift[1] != 0 || shift[2] != 0) && !dry_run) {
+    message("Shifting particles by [%e %e %e]", shift[0], shift[1], shift[2]);
+    for (size_t k = 0; k < Npart; k++) {
+      parts[k].x[0] += shift[0];
+      parts[k].x[1] += shift[1];
+      parts[k].x[2] += shift[2];
+    }
+    for (size_t k = 0; k < Ngpart; k++) {
+      gparts[k].x[0] += shift[0];
+      gparts[k].x[1] += shift[1];
+      gparts[k].x[2] += shift[2];
+    }
+  }
+
+  if (!dry_run) {
+
+    /* Check that all the part positions are reasonable, wrap if periodic. */
+    if (periodic) {
+      for (int k = 0; k < Npart; k++)
+        for (int j = 0; j < 3; j++) {
+          while (parts[k].x[j] < 0) parts[k].x[j] += dim[j];
+          while (parts[k].x[j] >= dim[j]) parts[k].x[j] -= dim[j];
+        }
+    } else {
+      for (int k = 0; k < Npart; k++)
+        for (int j = 0; j < 3; j++)
+          if (parts[k].x[j] < 0 || parts[k].x[j] >= dim[j])
+            error("Not all particles are within the specified domain.");
+    }
+
+    /* Same for the gparts */
+    if (periodic) {
+      for (int k = 0; k < Ngpart; k++)
+        for (int j = 0; j < 3; j++) {
+          while (gparts[k].x[j] < 0) gparts[k].x[j] += dim[j];
+          while (gparts[k].x[j] >= dim[j]) gparts[k].x[j] -= dim[j];
+        }
+    } else {
+      for (int k = 0; k < Ngpart; k++)
+        for (int j = 0; j < 3; j++)
+          if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j])
+            error("Not all g-particles are within the specified domain.");
+    }
   }
 
   /* Allocate the extra parts array. */
   if (posix_memalign((void *)&s->xparts, xpart_align,
-                     Ngas * sizeof(struct xpart)) != 0)
+                     Npart * sizeof(struct xpart)) != 0)
     error("Failed to allocate xparts.");
-  bzero(s->xparts, Ngas * sizeof(struct xpart));
+  bzero(s->xparts, Npart * sizeof(struct xpart));
 
   /* Init the space lock. */
   if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock.");
 
   /* Build the cells and the tasks. */
-  space_regrid(s, h_max, verbose);
+  if (!dry_run) space_regrid(s, s->cell_min, verbose);
 }
 
 /**
diff --git a/src/space.h b/src/space.h
index 91485ff7e2ebe9da8ab927748589ae9f71320803..88e2f6f52774651217c4ff24e25f549d8ae1e347 100644
--- a/src/space.h
+++ b/src/space.h
@@ -24,6 +24,7 @@
 
 /* Local includes. */
 #include "cell.h"
+#include "parser.h"
 #include "part.h"
 
 /* Forward-declare the engine to avoid cyclic includes. */
@@ -64,9 +65,6 @@ struct space {
   /* The minimum and maximum cutoff radii. */
   double h_max, cell_min;
 
-  /* Current time step for particles. */
-  float dt_step;
-
   /* Current maximum displacement for particles. */
   float dx_max;
 
@@ -106,6 +104,8 @@ struct space {
   /* Buffers for parts that we will receive from foreign cells. */
   struct part *parts_foreign;
   size_t nr_parts_foreign, size_parts_foreign;
+  struct gpart *gparts_foreign;
+  size_t nr_gparts_foreign, size_gparts_foreign;
 };
 
 /* Interval stack necessary for parallel particle sorting. */
@@ -116,8 +116,9 @@ struct qstack {
 };
 struct parallel_sort {
   struct part *parts;
+  struct gpart *gparts;
   struct xpart *xparts;
-  size_t *ind;
+  int *ind;
   struct qstack *stack;
   unsigned int stack_size;
   volatile unsigned int first, last, waiting;
@@ -125,16 +126,17 @@ struct parallel_sort {
 extern struct parallel_sort space_sort_struct;
 
 /* function prototypes. */
-void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
+void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
                       int verbose);
-void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
-                       int max);
+void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
+                       int verbose);
 struct cell *space_getcell(struct space *s);
 int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                  double *shift);
-void space_init(struct space *s, double dim[3], struct part *parts,
-                struct gpart *gparts, size_t N, size_t Ngpart, int periodic,
-                double h_max, int verbose);
+void space_init(struct space *s, const struct swift_params *params,
+                double dim[3], struct part *parts, struct gpart *gparts,
+                size_t Npart, size_t Ngpart, int periodic, int verbose,
+                int dry_run);
 void space_map_cells_pre(struct space *s, int full,
                          void (*fun)(struct cell *c, void *data), void *data);
 void space_map_parts(struct space *s,
@@ -150,5 +152,6 @@ void space_recycle(struct space *s, struct cell *c);
 void space_split(struct space *s, struct cell *cells, int verbose);
 void space_do_split(struct space *s, struct cell *c);
 void space_do_parts_sort();
+void space_do_gparts_sort();
 void space_link_cleanup(struct space *s);
 #endif /* SWIFT_SPACE_H */
diff --git a/src/swift.h b/src/swift.h
index 9ab090dccd195ff4927d3e614e446b36d273f824..e568a28c888295affc9ec45b6d059d34f5b4bf04 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -27,7 +27,6 @@
 #include "cell.h"
 #include "clocks.h"
 #include "const.h"
-#include "const.h"
 #include "cycle.h"
 #include "debug.h"
 #include "engine.h"
@@ -38,7 +37,9 @@
 #include "map.h"
 #include "multipole.h"
 #include "parallel_io.h"
+#include "parser.h"
 #include "part.h"
+#include "partition.h"
 #include "queue.h"
 #include "runner.h"
 #include "scheduler.h"
@@ -47,9 +48,8 @@
 #include "space.h"
 #include "task.h"
 #include "timers.h"
-#include "units.h"
 #include "tools.h"
-#include "partition.h"
+#include "units.h"
 #include "version.h"
 
 #endif /* SWIFT_SWIFT_H */
diff --git a/src/task.c b/src/task.c
index 69109f9e6d4fe8730a317db46ea3862e65ab90b2..5f1475a46e4626e1f51db673d73fd84f86e6edb6 100644
--- a/src/task.c
+++ b/src/task.c
@@ -43,9 +43,10 @@
 
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
-    "none",    "sort",    "self",      "pair",  "sub",        "init",
-    "ghost",   "drift",   "kick",      "send",  "recv",       "grav_pp",
-    "grav_mm", "grav_up", "grav_down", "psort", "split_cell", "rewait"};
+    "none",      "sort",       "self",       "pair",    "sub",
+    "init",      "ghost",      "drift",      "kick",    "send",
+    "recv",      "grav_pp",    "grav_mm",    "grav_up", "grav_down",
+    "part_sort", "gpart_sort", "split_cell", "rewait"};
 
 const char *subtaskID_names[task_type_count] = {"none",  "density",
                                                 "force", "grav"};
@@ -78,9 +79,10 @@ float task_overlap(const struct task *ta, const struct task *tb) {
   /* First check if any of the two tasks are of a type that don't
      use cells. */
   if (ta == NULL || tb == NULL || ta->type == task_type_none ||
-      ta->type == task_type_psort || ta->type == task_type_split_cell ||
-      ta->type == task_type_rewait || tb->type == task_type_none ||
-      tb->type == task_type_psort || tb->type == task_type_split_cell ||
+      ta->type == task_type_part_sort || ta->type == task_type_gpart_sort ||
+      ta->type == task_type_split_cell || ta->type == task_type_rewait ||
+      tb->type == task_type_none || tb->type == task_type_part_sort ||
+      tb->type == task_type_gpart_sort || tb->type == task_type_split_cell ||
       tb->type == task_type_rewait)
     return 0.0f;
 
@@ -145,7 +147,7 @@ int task_lock(struct task *t) {
 
 #ifdef WITH_MPI
     /* Check the status of the MPI request. */
-    int res, err;
+    int res = 0, err = 0;
     MPI_Status stat;
     if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
       char buff[MPI_MAX_ERROR_STRING];
diff --git a/src/task.h b/src/task.h
index b86631cc49bfad102302e3bab380bfb5eb8ed1e0..9c0ba6087d772d7362a98bc40a838c6fa3713166 100644
--- a/src/task.h
+++ b/src/task.h
@@ -45,7 +45,8 @@ enum task_types {
   task_type_grav_mm,
   task_type_grav_up,
   task_type_grav_down,
-  task_type_psort,
+  task_type_part_sort,
+  task_type_gpart_sort,
   task_type_split_cell,
   task_type_rewait,
   task_type_count
diff --git a/src/tools.c b/src/tools.c
index 5feba7759f730faea1f38ceb9835f2076bc37a56..d25b7401a1e0515c650333b41193d54b5e155d39 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -236,6 +236,53 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) {
   }
 }
 
+void self_all_density(struct runner *r, struct cell *ci) {
+  float r2, hi, hj, hig2, hjg2, dxi[3];  //, dxj[3];
+  struct part *pi, *pj;
+
+  /* Implements a double-for loop and checks every interaction */
+  for (int i = 0; i < ci->count; ++i) {
+
+    pi = &ci->parts[i];
+    hi = pi->h;
+    hig2 = hi * hi * kernel_gamma2;
+
+    for (int j = i + 1; j < ci->count; ++j) {
+
+      pj = &ci->parts[j];
+      hj = pj->h;
+      hjg2 = hj * hj * kernel_gamma2;
+
+      if (pi == pj) continue;
+
+      /* Pairwise distance */
+      r2 = 0.0f;
+      for (int k = 0; k < 3; k++) {
+        dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k];
+        r2 += dxi[k] * dxi[k];
+      }
+
+      /* Hit or miss? */
+      if (r2 < hig2) {
+
+        /* Interact */
+        runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj);
+      }
+
+      /* Hit or miss? */
+      if (r2 < hjg2) {
+
+        dxi[0] = -dxi[0];
+        dxi[1] = -dxi[1];
+        dxi[2] = -dxi[2];
+
+        /* Interact */
+        runner_iact_nonsym_density(r2, dxi, hj, hi, pj, pi);
+      }
+    }
+  }
+}
+
 void pairs_single_grav(double *dim, long long int pid,
                        struct gpart *__restrict__ parts, int N, int periodic) {
 
@@ -253,9 +300,9 @@ void pairs_single_grav(double *dim, long long int pid,
       break;
   if (k == N) error("Part not found.");
   pi = parts[k];
-  pi.a[0] = 0.0f;
-  pi.a[1] = 0.0f;
-  pi.a[2] = 0.0f;
+  pi.a_grav[0] = 0.0f;
+  pi.a_grav[1] = 0.0f;
+  pi.a_grav[2] = 0.0f;
 
   /* Loop over all particle pairs. */
   for (k = 0; k < N; k++) {
@@ -273,15 +320,15 @@ void pairs_single_grav(double *dim, long long int pid,
     }
     r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2];
     runner_iact_grav(r2, fdx, &pi, &pj);
-    a[0] += pi.a[0];
-    a[1] += pi.a[1];
-    a[2] += pi.a[2];
-    aabs[0] += fabsf(pi.a[0]);
-    aabs[1] += fabsf(pi.a[1]);
-    aabs[2] += fabsf(pi.a[2]);
-    pi.a[0] = 0.0f;
-    pi.a[1] = 0.0f;
-    pi.a[2] = 0.0f;
+    a[0] += pi.a_grav[0];
+    a[1] += pi.a_grav[1];
+    a[2] += pi.a_grav[2];
+    aabs[0] += fabsf(pi.a_grav[0]);
+    aabs[1] += fabsf(pi.a_grav[1]);
+    aabs[2] += fabsf(pi.a_grav[2]);
+    pi.a_grav[0] = 0.0f;
+    pi.a_grav[1] = 0.0f;
+    pi.a_grav[2] = 0.0f;
   }
 
   /* Dump the result. */
diff --git a/src/tools.h b/src/tools.h
index 59646291bda46a7dd0f5a34e158e3e0a6f21d3ca..ccffc77ceb8a967fd40c3737651ba75d529eee0f 100644
--- a/src/tools.h
+++ b/src/tools.h
@@ -33,6 +33,7 @@ void pairs_single_density(double *dim, long long int pid,
                           struct part *__restrict__ parts, int N, int periodic);
 
 void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj);
+void self_all_density(struct runner *r, struct cell *ci);
 
 void pairs_n2(double *dim, struct part *__restrict__ parts, int N,
               int periodic);
diff --git a/src/units.c b/src/units.c
index 8c9fd14452e9e1fdfe029ac89d22d7cd43aa0ef7..184dbe8a0df000008dba1d7003558d83b1f08cad 100644
--- a/src/units.c
+++ b/src/units.c
@@ -43,17 +43,24 @@
 
 /**
  * @brief Initialises the UnitSystem structure with the constants given in
- * const.h
- * @param us The UnitSystem to initialize
+ * rhe parameter file.
+ *
+ * @param us The UnitSystem to initialize.
+ * @param params The parsed parameter file.
  */
-
-void initUnitSystem(struct UnitSystem* us) {
-  us->UnitMass_in_cgs = const_unit_mass_in_cgs;
-  us->UnitLength_in_cgs = const_unit_length_in_cgs;
-  us->UnitTime_in_cgs = 1. / ((double)const_unit_velocity_in_cgs /
-                              ((double)const_unit_length_in_cgs));
-  us->UnitCurrent_in_cgs = 1.;
-  us->UnitTemperature_in_cgs = 1.;
+void units_init(struct UnitSystem* us, const struct swift_params* params) {
+
+  us->UnitMass_in_cgs =
+      parser_get_param_double(params, "UnitSystem:UnitMass_in_cgs");
+  us->UnitLength_in_cgs =
+      parser_get_param_double(params, "UnitSystem:UnitLength_in_cgs");
+  const double unitVelocity =
+      parser_get_param_double(params, "UnitSystem:UnitVelocity_in_cgs");
+  us->UnitTime_in_cgs = us->UnitLength_in_cgs / unitVelocity;
+  us->UnitCurrent_in_cgs =
+      parser_get_param_double(params, "UnitSystem:UnitCurrent_in_cgs");
+  us->UnitTemperature_in_cgs =
+      parser_get_param_double(params, "UnitSystem:UnitTemp_in_cgs");
 }
 
 /**
@@ -61,7 +68,8 @@ void initUnitSystem(struct UnitSystem* us) {
  * @param us The UnitSystem used
  * @param baseUnit The base unit
  */
-double getBaseUnit(struct UnitSystem* us, enum BaseUnits baseUnit) {
+double units_get_base_unit(const struct UnitSystem* us,
+                           enum BaseUnits baseUnit) {
   switch (baseUnit) {
     case UNIT_MASS:
       return us->UnitMass_in_cgs;
@@ -83,7 +91,7 @@ double getBaseUnit(struct UnitSystem* us, enum BaseUnits baseUnit) {
  * @brief Returns the base unit symbol
  * @param baseUnit The base unit
  */
-const char* getBaseUnitSymbol(enum BaseUnits baseUnit) {
+const char* units_get_base_unit_symbol(enum BaseUnits baseUnit) {
   switch (baseUnit) {
     case UNIT_MASS:
       return "U_M";
@@ -105,7 +113,7 @@ const char* getBaseUnitSymbol(enum BaseUnits baseUnit) {
  * @brief Returns the base unit symbol in the cgs system
  * @param baseUnit The base unit
  */
-const char* getBaseUnitCGSSymbol(enum BaseUnits baseUnit) {
+const char* units_get_base_unit_CGS_symbol(enum BaseUnits baseUnit) {
   switch (baseUnit) {
     case UNIT_MASS:
       return "g";
@@ -123,8 +131,8 @@ const char* getBaseUnitCGSSymbol(enum BaseUnits baseUnit) {
   return "";
 }
 
-void getBaseUnitExponantsArray(float baseUnitsExp[5],
-                               enum UnitConversionFactor unit) {
+void units_get_base_unit_exponants_array(float baseUnitsExp[5],
+                                         enum UnitConversionFactor unit) {
   switch (unit) {
     case UNIT_CONV_NO_UNITS:
       break;
@@ -265,12 +273,13 @@ void getBaseUnitExponantsArray(float baseUnitsExp[5],
  * @param us The system of units in use
  * @param unit The unit to convert
  */
-double conversionFactor(struct UnitSystem* us, enum UnitConversionFactor unit) {
+double units_conversion_factor(const struct UnitSystem* us,
+                               enum UnitConversionFactor unit) {
   float baseUnitsExp[5] = {0.f};
 
-  getBaseUnitExponantsArray(baseUnitsExp, unit);
+  units_get_base_unit_exponants_array(baseUnitsExp, unit);
 
-  return generalConversionFactor(us, baseUnitsExp);
+  return units_general_conversion_factor(us, baseUnitsExp);
 }
 
 /**
@@ -278,12 +287,13 @@ double conversionFactor(struct UnitSystem* us, enum UnitConversionFactor unit) {
  * @param us The system of units in use
  * @param unit The unit to convert
  */
-float hFactor(struct UnitSystem* us, enum UnitConversionFactor unit) {
+float units_h_factor(const struct UnitSystem* us,
+                     enum UnitConversionFactor unit) {
   float baseUnitsExp[5] = {0.f};
 
-  getBaseUnitExponantsArray(baseUnitsExp, unit);
+  units_get_base_unit_exponants_array(baseUnitsExp, unit);
 
-  return generalhFactor(us, baseUnitsExp);
+  return units_general_h_factor(us, baseUnitsExp);
 }
 
 /**
@@ -291,25 +301,26 @@ float hFactor(struct UnitSystem* us, enum UnitConversionFactor unit) {
  * @param us The system of units in use
  * @param unit The unit to convert
  */
-float aFactor(struct UnitSystem* us, enum UnitConversionFactor unit) {
+float units_a_factor(const struct UnitSystem* us,
+                     enum UnitConversionFactor unit) {
   float baseUnitsExp[5] = {0.f};
 
-  getBaseUnitExponantsArray(baseUnitsExp, unit);
+  units_get_base_unit_exponants_array(baseUnitsExp, unit);
 
-  return generalaFactor(us, baseUnitsExp);
+  return units_general_a_factor(us, baseUnitsExp);
 }
 
 /**
  * @brief Returns a string containing the exponents of the base units making up
  * the conversion factors
  */
-void conversionString(char* buffer, struct UnitSystem* us,
-                      enum UnitConversionFactor unit) {
+void units_conversion_string(char* buffer, const struct UnitSystem* us,
+                             enum UnitConversionFactor unit) {
   float baseUnitsExp[5] = {0.f};
 
-  getBaseUnitExponantsArray(baseUnitsExp, unit);
+  units_get_base_unit_exponants_array(baseUnitsExp, unit);
 
-  generalConversionString(buffer, us, baseUnitsExp);
+  units_general_conversion_string(buffer, us, baseUnitsExp);
 }
 
 /**
@@ -319,14 +330,14 @@ void conversionString(char* buffer, struct UnitSystem* us,
  * @param baseUnitsExponants The exponent of each base units required to form
  * the desired quantity. See conversionFactor() for a working example
  */
-double generalConversionFactor(struct UnitSystem* us,
-                               float baseUnitsExponants[5]) {
+double units_general_conversion_factor(const struct UnitSystem* us,
+                                       float baseUnitsExponants[5]) {
   double factor = 1.;
   int i;
 
   for (i = 0; i < 5; ++i)
     if (baseUnitsExponants[i] != 0)
-      factor *= pow(getBaseUnit(us, i), baseUnitsExponants[i]);
+      factor *= pow(units_get_base_unit(us, i), baseUnitsExponants[i]);
   return factor;
 }
 
@@ -337,7 +348,8 @@ double generalConversionFactor(struct UnitSystem* us,
  * @param baseUnitsExponants The exponent of each base units required to form
  * the desired quantity. See conversionFactor() for a working example
  */
-float generalhFactor(struct UnitSystem* us, float baseUnitsExponants[5]) {
+float units_general_h_factor(const struct UnitSystem* us,
+                             float baseUnitsExponants[5]) {
   float factor_exp = 0.f;
 
   factor_exp += -baseUnitsExponants[UNIT_MASS];
@@ -354,7 +366,8 @@ float generalhFactor(struct UnitSystem* us, float baseUnitsExponants[5]) {
  * @param baseUnitsExponants The exponent of each base units required to form
  * the desired quantity. See conversionFactor() for a working example
  */
-float generalaFactor(struct UnitSystem* us, float baseUnitsExponants[5]) {
+float units_general_a_factor(const struct UnitSystem* us,
+                             float baseUnitsExponants[5]) {
   float factor_exp = 0.f;
 
   factor_exp += baseUnitsExponants[UNIT_LENGTH];
@@ -371,11 +384,11 @@ float generalaFactor(struct UnitSystem* us, float baseUnitsExponants[5]) {
  * @param baseUnitsExponants The exponent of each base units required to form
  * the desired quantity. See conversionFactor() for a working example
  */
-void generalConversionString(char* buffer, struct UnitSystem* us,
-                             float baseUnitsExponants[5]) {
+void units_general_conversion_string(char* buffer, const struct UnitSystem* us,
+                                     float baseUnitsExponants[5]) {
   char temp[14];
-  double a_exp = generalaFactor(us, baseUnitsExponants);
-  double h_exp = generalhFactor(us, baseUnitsExponants);
+  double a_exp = units_general_a_factor(us, baseUnitsExponants);
+  double h_exp = units_general_h_factor(us, baseUnitsExponants);
   int i;
 
   /* Check whether we are unitless or not */
@@ -415,12 +428,13 @@ void generalConversionString(char* buffer, struct UnitSystem* us,
       if (baseUnitsExponants[i] == 0.)
         sprintf(temp, " ");
       else if (baseUnitsExponants[i] == 1.)
-        sprintf(temp, "%s ", getBaseUnitSymbol(i));
+        sprintf(temp, "%s ", units_get_base_unit_symbol(i));
       else if (remainder(baseUnitsExponants[i], 1.) == 0)
-        sprintf(temp, "%s^%d ", getBaseUnitSymbol(i),
+        sprintf(temp, "%s^%d ", units_get_base_unit_symbol(i),
                 (int)baseUnitsExponants[i]);
       else
-        sprintf(temp, "%s^%7.4f ", getBaseUnitSymbol(i), baseUnitsExponants[i]);
+        sprintf(temp, "%s^%7.4f ", units_get_base_unit_symbol(i),
+                baseUnitsExponants[i]);
       strncat(buffer, temp, 12);
     }
 
@@ -432,12 +446,12 @@ void generalConversionString(char* buffer, struct UnitSystem* us,
       if (baseUnitsExponants[i] == 0.)
         continue;
       else if (baseUnitsExponants[i] == 1.)
-        sprintf(temp, "%s ", getBaseUnitCGSSymbol(i));
+        sprintf(temp, "%s ", units_get_base_unit_CGS_symbol(i));
       else if (remainder(baseUnitsExponants[i], 1.) == 0)
-        sprintf(temp, "%s^%d ", getBaseUnitCGSSymbol(i),
+        sprintf(temp, "%s^%d ", units_get_base_unit_CGS_symbol(i),
                 (int)baseUnitsExponants[i]);
       else
-        sprintf(temp, "%s^%7.4f ", getBaseUnitCGSSymbol(i),
+        sprintf(temp, "%s^%7.4f ", units_get_base_unit_CGS_symbol(i),
                 baseUnitsExponants[i]);
       strncat(buffer, temp, 12);
     }
diff --git a/src/units.h b/src/units.h
index 1b977529784c1ef3069e1e932b16fd0b87073786..3e349dc16787cd4052a3e9205b21dce3c3732448 100644
--- a/src/units.h
+++ b/src/units.h
@@ -19,6 +19,12 @@
 #ifndef SWIFT_UNITS_H
 #define SWIFT_UNITS_H
 
+/* Config parameters. */
+#include "../config.h"
+
+/* Local includes. */
+#include "parser.h"
+
 /**
  * @brief The unit system used internally.
  *
@@ -86,74 +92,25 @@ enum UnitConversionFactor {
   UNIT_CONV_TEMPERATURE
 };
 
-/**
- * @brief Initialises the UnitSystem structure with the constants given in
- * const.h
- */
-void initUnitSystem(struct UnitSystem*);
-
-/**
- * @brief Returns the base unit conversion factor for a given unit system
- */
-double getBaseUnit(struct UnitSystem*, enum BaseUnits);
-
-/**
- * @brief Returns the base unit symbol in the cgs system
- */
-const char* getBaseUnitSymbol(enum BaseUnits);
-
-/**
- * @brief Returns the base unit symbol in the cgs system
- */
-const char* getBaseUnitCGSSymbol(enum BaseUnits);
-
-/**
- * @brief Returns the conversion factor for a given unit (expressed in terms of
- * the 5 fundamental units) in the chosen unit system
- */
-double generalConversionFactor(struct UnitSystem* us,
-                               float baseUnitsExponants[5]);
-
-/**
- * @brief Returns the conversion factor for a given unit in the chosen unit
- * system
- */
-double conversionFactor(struct UnitSystem* us, enum UnitConversionFactor unit);
-
-/**
- * @brief Returns the h factor for a given unit (expressed in terms of the 5
- * fundamental units) in the chosen unit system
- */
-float generalhFactor(struct UnitSystem* us, float baseUnitsExponants[5]);
-
-/**
- * @brief Returns the h factor for a given unit in the chosen unit system
- */
-float hFactor(struct UnitSystem* us, enum UnitConversionFactor unit);
-
-/**
- * @brief Returns the scaling factor for a given unit (expressed in terms of the
- * 5 fundamental units) in the chosen unit system
- */
-float generalaFactor(struct UnitSystem* us, float baseUnitsExponants[5]);
-
-/**
- * @brief Returns the scaling factor for a given unit in the chosen unit system
- */
-float aFactor(struct UnitSystem* us, enum UnitConversionFactor unit);
-
-/**
- * @brief Returns a string containing the exponents of the base units making up
- * the conversion factors (expressed in terms of the 5 fundamental units)
- */
-void generalConversionString(char* buffer, struct UnitSystem* us,
+void units_init(struct UnitSystem*, const struct swift_params*);
+double units_get_base_unit(const struct UnitSystem*, enum BaseUnits);
+const char* units_get_base_unit_symbol(enum BaseUnits);
+const char* units_get_base_unit_CGS_symbol(enum BaseUnits);
+double units_general_conversion_factor(const struct UnitSystem* us,
+                                       float baseUnitsExponants[5]);
+double units_conversion_factor(const struct UnitSystem* us,
+                               enum UnitConversionFactor unit);
+float units_general_h_factor(const struct UnitSystem* us,
                              float baseUnitsExponants[5]);
-
-/**
- * @brief Returns a string containing the exponents of the base units making up
- * the conversion factors
- */
-void conversionString(char* buffer, struct UnitSystem* us,
-                      enum UnitConversionFactor unit);
+float units_h_factor(const struct UnitSystem* us,
+                     enum UnitConversionFactor unit);
+float units_general_a_factor(const struct UnitSystem* us,
+                             float baseUnitsExponants[5]);
+float units_a_factor(const struct UnitSystem* us,
+                     enum UnitConversionFactor unit);
+void units_general_conversion_string(char* buffer, const struct UnitSystem* us,
+                                     float baseUnitsExponants[5]);
+void units_conversion_string(char* buffer, const struct UnitSystem* us,
+                             enum UnitConversionFactor unit);
 
 #endif /* SWIFT_UNITS_H */
diff --git a/src/version.c b/src/version.c
index 6aeee2d8bcbc4652f679bbb786e9e512ebc4caa6..27841a16019a69442e66b21c327f4241e440fb12 100644
--- a/src/version.c
+++ b/src/version.c
@@ -241,7 +241,7 @@ const char *metis_version(void) {
  */
 void greetings(void) {
 
-  printf(" Welcome to the cosmological code\n");
+  printf(" Welcome to the cosmological hydrodynamical code\n");
   printf("    ______       _________________\n");
   printf("   / ___/ |     / /  _/ ___/_  __/\n");
   printf("   \\__ \\| | /| / // // /_   / /   \n");
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f0bfbefd3c7f4591134d1707c4ac9bf63278e855..b53a08615c5a8c7c2c31475bf7207522f8b9a58c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,10 +21,12 @@ AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -DTIMER
 AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS)
 
 # List of programs and scripts to run in the test suite
-TESTS = testGreetings testReading.sh testSingle testTimeIntegration
+TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \
+	test27cells.sh test27cellsPerturbed.sh testParser.sh
 
 # List of test programs to compile
-check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration testSPHStep testVectorize
+check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
+		 testSPHStep testPair test27cells testParser testKernel
 
 # Sources for the individual programs
 testGreetings_SOURCES = testGreetings.c
@@ -37,7 +39,15 @@ testSPHStep_SOURCES = testSPHStep.c
 
 testSingle_SOURCES = testSingle.c
 
-testVectorize_SOURCES = testVectorize.c
+testPair_SOURCES = testPair.c
+
+test27cells_SOURCES = test27cells.c
+
+testParser_SOURCES = testParser.c
+
+testKernel_SOURCES = testKernel.c
 
 # Files necessary for distribution
-EXTRA_DIST = testReading.sh makeInput.py
+EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \
+	     test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \
+	     testParserInput.yaml
diff --git a/tests/difffloat.py b/tests/difffloat.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4b48d54cbb9f292ed49b3cc142826cd1d71f87e
--- /dev/null
+++ b/tests/difffloat.py
@@ -0,0 +1,118 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
+from numpy import *
+import sys
+
+abs_tol = 1e-7
+rel_tol = 1e-7
+
+# Compares the content of two ASCII tables of floats line by line and
+# reports all differences beyond the given tolerances
+# Comparisons are done both in absolute and relative terms
+
+# Individual tolerances for each column can be provided in a file
+# The (cube root of) the number of lines to check is provided as
+# an optional 4th argument
+
+file1 = sys.argv[1]
+file2 = sys.argv[2]
+number_to_check = -1
+
+if len(sys.argv) == 5:
+    number_to_check = int(sys.argv[4])
+
+fileTol = ""
+if len(sys.argv) >= 4:
+    fileTol = sys.argv[3]
+
+data1 = loadtxt(file1)
+data2 = loadtxt(file2)
+if fileTol != "":
+    dataTol = loadtxt(fileTol)
+    n_linesTol = shape(dataTol)[0]
+    n_columnsTol = shape(dataTol)[1]
+
+
+if shape(data1) != shape(data2):
+    print "Non-matching array sizes in the files", file1, "and", file2, "."
+    sys.exit(1)
+
+n_lines = shape(data1)[0]
+n_columns = shape(data1)[1]
+
+if fileTol != "":
+    if n_linesTol != 2:
+        print "Incorrect number of lines in tolerance file '%s'."%fileTol
+    if n_columnsTol != n_columns:
+        print "Incorrect number of columns in tolerance file '%s'."%fileTol
+
+if fileTol == "":
+    print "Absolute difference tolerance:", abs_tol
+    print "Relative difference tolerance:", rel_tol
+    absTol = ones(n_columns) * abs_tol
+    relTol = ones(n_columns) * rel_tol
+else:
+    print "Tolerances read from file"
+    absTol = dataTol[0,:]
+    relTol = dataTol[1,:]
+
+n_lines_to_check = 0
+if number_to_check > 0:
+    n_lines_to_check = number_to_check**3
+    n_lines_to_check = min(n_lines_to_check, n_lines)
+    print "Checking the first %d particles."%n_lines_to_check
+else:
+    n_lines_to_check = n_lines
+    print "Checking all particles in the file."
+
+error = False
+for i in range(n_lines_to_check):
+    for j in range(n_columns):
+
+        abs_diff = abs(data1[i,j] - data2[i,j])
+
+        sum = abs(data1[i,j] + data2[i,j])
+        if sum > 0:
+            rel_diff = abs(data1[i,j] - data2[i,j]) / sum
+        else:
+            rel_diff = 0.
+
+        if( abs_diff > absTol[j]):
+            print "Absolute difference larger than tolerance (%e) for particle %d, column %d:"%(absTol[j], i,j)
+            print "%10s:           a = %e"%("File 1", data1[i,j])
+            print "%10s:           b = %e"%("File 2", data2[i,j])
+            print "%10s:       |a-b| = %e"%("Difference", abs_diff)
+            print ""
+            error = True
+
+        if( rel_diff > relTol[j]):
+            print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j)
+            print "%10s:           a = %e"%("File 1", data1[i,j])
+            print "%10s:           b = %e"%("File 2", data2[i,j])
+            print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff)
+            print ""
+            error = True
+
+
+if error:
+    exit(1)
+else:
+    print "No differences found"
+    exit(0)
diff --git a/tests/test27cells.c b/tests/test27cells.c
new file mode 100644
index 0000000000000000000000000000000000000000..7915511eed50a229a94eda6bb338607099303421
--- /dev/null
+++ b/tests/test27cells.c
@@ -0,0 +1,413 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <fenv.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "swift.h"
+
+enum velocity_types {
+  velocity_zero,
+  velocity_random,
+  velocity_divergent,
+  velocity_rotating
+};
+
+/**
+ * @brief Returns a random number (uniformly distributed) in [a,b[
+ */
+double random_uniform(double a, double b) {
+  return (rand() / (double)RAND_MAX) * (b - a) + a;
+}
+
+
+/**
+ * @brief Constructs a cell and all of its particle in a valid state prior to
+ * a DOPAIR or DOSELF calcuation.
+ *
+ * @param n The cube root of the number of particles.
+ * @param offset The position of the cell offset from (0,0,0).
+ * @param size The cell size.
+ * @param h The smoothing length of the particles in units of the inter-particle separation.
+ * @param density The density of the fluid.
+ * @param partId The running counter of IDs.
+ * @param pert The perturbation to apply to the particles in the cell in units of the inter-particle separation.
+ * @param vel The type of velocity field (0, random, divergent, rotating)
+ */
+struct cell *make_cell(size_t n, double *offset, double size, double h,
+                       double density, long long *partId, double pert,
+                       enum velocity_types vel) {
+  const size_t count = n * n * n;
+  const double volume = size * size * size;
+  struct cell *cell = malloc(sizeof(struct cell));
+  bzero(cell, sizeof(struct cell));
+
+  if (posix_memalign((void **)&cell->parts, part_align,
+                     count * sizeof(struct part)) != 0) {
+    error("couldn't allocate particles, no. of particles: %d", (int)count);
+  }
+  bzero(cell->parts, count * sizeof(struct part));
+
+  /* Construct the parts */
+  struct part *part = cell->parts;
+  for (size_t x = 0; x < n; ++x) {
+    for (size_t y = 0; y < n; ++y) {
+      for (size_t z = 0; z < n; ++z) {
+        part->x[0] =
+            offset[0] +
+            size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[1] =
+            offset[1] +
+            size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[2] =
+            offset[2] +
+            size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        switch (vel) {
+          case velocity_zero:
+            part->v[0] = 0.f;
+            part->v[1] = 0.f;
+            part->v[2] = 0.f;
+            break;
+          case velocity_random:
+            part->v[0] = random_uniform(-0.05, 0.05);
+            part->v[1] = random_uniform(-0.05, 0.05);
+            part->v[2] = random_uniform(-0.05, 0.05);
+            break;
+          case velocity_divergent:
+            part->v[0] = part->x[0] - 1.5 * size;
+            part->v[1] = part->x[1] - 1.5 * size;
+            part->v[2] = part->x[2] - 1.5 * size;
+            break;
+          case velocity_rotating:
+            part->v[0] = part->x[1];
+            part->v[1] = -part->x[0];
+            part->v[2] = 0.f;
+            break;
+        }
+        part->h = size * h / (float)n;
+        part->id = ++(*partId);
+        part->mass = density * volume / count;
+        part->ti_begin = 0;
+        part->ti_end = 1;
+        ++part;
+      }
+    }
+  }
+
+  /* Cell properties */
+  cell->split = 0;
+  cell->h_max = h;
+  cell->count = count;
+  cell->dx_max = 0.;
+  cell->h[0] = size;
+  cell->h[1] = size;
+  cell->h[2] = size;
+  cell->loc[0] = offset[0];
+  cell->loc[1] = offset[1];
+  cell->loc[2] = offset[2];
+
+  cell->ti_end_min = 1;
+  cell->ti_end_max = 1;
+
+  cell->sorted = 0;
+  cell->sort = NULL;
+  cell->sortsize = 0;
+  runner_dosort(NULL, cell, 0x1FFF, 0);
+
+  return cell;
+}
+
+void clean_up(struct cell *ci) {
+  free(ci->parts);
+  free(ci->sort);
+  free(ci);
+}
+
+/**
+ * @brief Initializes all particles field to be ready for a density calculation
+ */
+void zero_particle_fields(struct cell *c) {
+
+  for (size_t pid = 0; pid < c->count; pid++) {
+    c->parts[pid].rho = 0.f;
+    c->parts[pid].rho_dh = 0.f;
+    hydro_init_part(&c->parts[pid]);
+  }
+}
+
+/**
+ * @brief Ends the loop by adding the appropriate coefficients
+ */
+void end_calculation(struct cell *c) {
+
+  for (size_t pid = 0; pid < c->count; pid++) {
+    hydro_end_density(&c->parts[pid], 1);
+  }
+}
+
+/**
+ * @brief Dump all the particles to a file
+ */
+void dump_particle_fields(char *fileName, struct cell *main_cell,
+                          struct cell **cells) {
+
+  FILE *file = fopen(fileName, "w");
+
+  /* Write header */
+  fprintf(file,
+          "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s "
+          "%13s %13s %13s\n",
+          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh",
+          "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz");
+
+  fprintf(file, "# Main cell --------------------------------------------\n");
+
+  /* Write main cell */
+  for (size_t pid = 0; pid < main_cell->count; pid++) {
+    fprintf(file,
+            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+            "%13e %13e %13e\n",
+            main_cell->parts[pid].id, main_cell->parts[pid].x[0],
+            main_cell->parts[pid].x[1], main_cell->parts[pid].x[2],
+            main_cell->parts[pid].v[0], main_cell->parts[pid].v[1],
+            main_cell->parts[pid].v[2], main_cell->parts[pid].rho,
+            main_cell->parts[pid].rho_dh, main_cell->parts[pid].density.wcount,
+            main_cell->parts[pid].density.wcount_dh,
+#ifdef GADGET2_SPH
+            main_cell->parts[pid].div_v, main_cell->parts[pid].density.rot_v[0],
+            main_cell->parts[pid].density.rot_v[1],
+            main_cell->parts[pid].density.rot_v[2]
+#else
+            0., 0., 0., 0.
+#endif
+            );
+  }
+
+  /* Write all other cells */
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 3; ++k) {
+
+        struct cell *cj = cells[i * 9 + j * 3 + k];
+        if (cj == main_cell) continue;
+
+        fprintf(file,
+                "# Offset: [%2d %2d %2d] -----------------------------------\n",
+                i - 1, j - 1, k - 1);
+
+        for (size_t pjd = 0; pjd < cj->count; pjd++) {
+          fprintf(
+              file,
+              "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+              "%13e %13e %13e\n",
+              cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
+              cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
+              cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh,
+              cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
+#ifdef GADGET2_SPH
+              cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0],
+              cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]
+#else
+              0., 0., 0., 0.
+#endif
+              );
+        }
+      }
+    }
+  }
+  fclose(file);
+}
+
+/* Just a forward declaration... */
+void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
+void runner_doself1_density(struct runner *r, struct cell *ci);
+
+/* And go... */
+int main(int argc, char *argv[]) {
+
+  size_t runs = 0, particles = 0;
+  double h = 1.2348, size = 1., rho = 1.;
+  double perturbation = 0.;
+  char outputFileNameExtension[200] = "";
+  char outputFileName[200] = "";
+  int vel = velocity_zero;
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+  /* Get some randomness going */
+  srand(0);
+
+  char c;
+  while ((c = getopt(argc, argv, "m:s:h:p:r:t:d:f:v:")) != -1) {
+    switch (c) {
+      case 'h':
+        sscanf(optarg, "%lf", &h);
+        break;
+      case 's':
+        sscanf(optarg, "%lf", &size);
+        break;
+      case 'p':
+        sscanf(optarg, "%zu", &particles);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
+        break;
+      case 'd':
+        sscanf(optarg, "%lf", &perturbation);
+        break;
+      case 'm':
+        sscanf(optarg, "%lf", &rho);
+        break;
+      case 'f':
+        strcpy(outputFileNameExtension, optarg);
+        break;
+      case 'v':
+        sscanf(optarg, "%d", &vel);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+  }
+
+  if (h < 0 || particles == 0 || runs == 0) {
+    printf(
+        "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
+        "\nGenerates a cell pair, filled with particles on a Cartesian grid."
+        "\nThese are then interacted using runner_dopair1_density."
+        "\n\nOptions:"
+        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
+        "\n-m rho             - Physical density in the cell"
+        "\n-s size            - Physical size of the cell"
+        "\n-d pert            - Perturbation to apply to the particles [0,1["
+        "\n-v type (0,1,2,3)  - Velocity field: (zero, random, divergent, "
+        "rotating)"
+        "\n-f fileName        - Part of the file name used to save the dumps\n",
+        argv[0]);
+    exit(1);
+  }
+
+  /* Help users... */
+  message("Smoothing length: h = %f", h * size);
+  message("Kernel:               %s", kernel_name);
+  message("Neighbour target: N = %f", h * h * h * kernel_nwneigh / 1.88273);
+  message("Density target: rho = %f", rho);
+  message("div_v target:   div = %f", vel == 2 ? 3.f : 0.f);
+  message("curl_v target: curl = [0., 0., %f]", vel == 3 ? -2.f : 0.f);
+  printf("\n");
+
+  /* Build the infrastructure */
+  struct space space;
+  space.periodic = 0;
+  space.h_max = h;
+
+  struct engine engine;
+  engine.s = &space;
+  engine.time = 0.1f;
+  engine.ti_current = 1;
+
+  struct runner runner;
+  runner.e = &engine;
+
+  /* Construct some cells */
+  struct cell *cells[27];
+  struct cell *main_cell;
+  static long long partId = 0;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 3; ++k) {
+
+        double offset[3] = {i * size, j * size, k * size};
+        cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho,
+                                             &partId, perturbation, vel);
+      }
+    }
+  }
+
+  /* Store the main cell for future use */
+  main_cell = cells[13];
+
+  ticks time = 0;
+  for (size_t i = 0; i < runs; ++i) {
+
+    /* Zero the fields */
+    for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]);
+
+    const ticks tic = getticks();
+
+    /* Run all the pairs */
+    for (int j = 0; j < 27; ++j)
+      if (cells[j] != main_cell)
+        runner_dopair1_density(&runner, main_cell, cells[j]);
+
+    /* And now the self-interaction */
+    runner_doself1_density(&runner, main_cell);
+
+    const ticks toc = getticks();
+    time += toc - tic;
+
+    /* Let's get physical ! */
+    end_calculation(main_cell);
+
+    /* Dump if necessary */
+    if (i % 50 == 0) {
+      sprintf(outputFileName, "swift_dopair_27_%s.dat",
+              outputFileNameExtension);
+      dump_particle_fields(outputFileName, main_cell, cells);
+    }
+  }
+
+  /* Output timing */
+  message("SWIFT calculation took       : %15lli ticks.", time / runs);
+
+  /* Now perform a brute-force version for accuracy tests */
+
+  /* Zero the fields */
+  for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]);
+
+  const ticks tic = getticks();
+
+  /* Run all the brute-force pairs */
+  for (int j = 0; j < 27; ++j)
+    if (cells[j] != main_cell) pairs_all_density(&runner, main_cell, cells[j]);
+
+  /* And now the self-interaction */
+  self_all_density(&runner, main_cell);
+
+  const ticks toc = getticks();
+
+  /* Let's get physical ! */
+  end_calculation(main_cell);
+
+  /* Dump */
+  sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension);
+  dump_particle_fields(outputFileName, main_cell, cells);
+
+  /* Output timing */
+  message("Brute force calculation took : %15lli ticks.", toc - tic);
+
+  /* Clean things to make the sanitizer happy ... */
+  for (int i = 0; i < 27; ++i) clean_up(cells[i]);
+
+  return 0;
+}
diff --git a/tests/test27cells.sh b/tests/test27cells.sh
new file mode 100755
index 0000000000000000000000000000000000000000..0afdc32c95397ce76190e847bdcd04a0b079ef78
--- /dev/null
+++ b/tests/test27cells.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_27_standard.dat swift_dopair_27_standard.dat
+
+./test27cells -p 6 -r 1 -d 0 -f standard
+
+python difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat tolerance.dat 6
+
+exit $?
diff --git a/tests/test27cellsPerturbed.sh b/tests/test27cellsPerturbed.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a553a2553e92cedee7c2c0679d231ec9d982fc28
--- /dev/null
+++ b/tests/test27cellsPerturbed.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat
+
+./test27cells -p 6 -r 1 -d 0.1 -f perturbed
+
+python difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat tolerance.dat 6
+
+exit $?
diff --git a/tests/testKernel.c b/tests/testKernel.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ad9cc81ea92e6ef9487489c5d560abf414e38df
--- /dev/null
+++ b/tests/testKernel.c
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "swift.h"
+
+int main() {
+
+  const float h = const_eta_kernel;
+  const int numPoints = 30;
+
+  for (int i = 0; i < numPoints; ++i) {
+
+    const float x = i * 3.f / numPoints;
+    float W, dW;
+    kernel_deval(x / h, &W, &dW);
+
+    printf("h= %f H= %f x=%f W(x,h)=%f\n", h, h * kernel_gamma, x, W);
+  }
+
+  return 0;
+}
diff --git a/tests/testPair.c b/tests/testPair.c
new file mode 100644
index 0000000000000000000000000000000000000000..6e46b577ca63a8d3c2edce888a7485af0949813d
--- /dev/null
+++ b/tests/testPair.c
@@ -0,0 +1,305 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include <fenv.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "swift.h"
+
+/**
+ * Returns a random number (uniformly distributed) in [a,b[
+ */
+double random_uniform(double a, double b) {
+  return (rand() / (double)RAND_MAX) * (b - a) + a;
+}
+
+/* n is both particles per axis and box size:
+ * particles are generated on a mesh with unit spacing
+ */
+struct cell *make_cell(size_t n, double *offset, double size, double h,
+                       double density, unsigned long long *partId,
+                       double pert) {
+  const size_t count = n * n * n;
+  const double volume = size * size * size;
+  struct cell *cell = malloc(sizeof(struct cell));
+  bzero(cell, sizeof(struct cell));
+
+  if (posix_memalign((void **)&cell->parts, part_align,
+                     count * sizeof(struct part)) != 0) {
+    error("couldn't allocate particles, no. of particles: %d", (int)count);
+  }
+  bzero(cell->parts, count * sizeof(struct part));
+
+  /* Construct the parts */
+  struct part *part = cell->parts;
+  for (size_t x = 0; x < n; ++x) {
+    for (size_t y = 0; y < n; ++y) {
+      for (size_t z = 0; z < n; ++z) {
+        part->x[0] =
+            offset[0] +
+            size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[1] =
+            offset[1] +
+            size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        part->x[2] =
+            offset[2] +
+            size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
+        // part->v[0] = part->x[0] - 1.5;
+        // part->v[1] = part->x[1] - 1.5;
+        // part->v[2] = part->x[2] - 1.5;
+        part->v[0] = random_uniform(-0.05, 0.05);
+        part->v[1] = random_uniform(-0.05, 0.05);
+        part->v[2] = random_uniform(-0.05, 0.05);
+        part->h = size * h / (float)n;
+        part->id = ++(*partId);
+        part->mass = density * volume / count;
+        part->ti_begin = 0;
+        part->ti_end = 1;
+        ++part;
+      }
+    }
+  }
+
+  /* Cell properties */
+  cell->split = 0;
+  cell->h_max = h;
+  cell->count = count;
+  cell->dx_max = 0.;
+  cell->h[0] = n;
+  cell->h[1] = n;
+  cell->h[2] = n;
+  cell->loc[0] = offset[0];
+  cell->loc[1] = offset[1];
+  cell->loc[2] = offset[2];
+
+  cell->ti_end_min = 1;
+  cell->ti_end_max = 1;
+
+  cell->sorted = 0;
+  cell->sort = NULL;
+  cell->sortsize = 0;
+  runner_dosort(NULL, cell, 0x1FFF, 0);
+
+  return cell;
+}
+
+void clean_up(struct cell *ci) {
+  free(ci->parts);
+  free(ci->sort);
+  free(ci);
+}
+
+/**
+ * @brief Initializes all particles field to be ready for a density calculation
+ */
+void zero_particle_fields(struct cell *c) {
+
+  for (size_t pid = 0; pid < c->count; pid++) {
+    c->parts[pid].rho = 0.f;
+    c->parts[pid].rho_dh = 0.f;
+    hydro_init_part(&c->parts[pid]);
+  }
+}
+
+/**
+ * @brief Dump all the particles to a file
+ */
+void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
+
+  FILE *file = fopen(fileName, "w");
+
+  /* Write header */
+  fprintf(file,
+          "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s "
+          "%13s %13s %13s\n",
+          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh",
+          "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz");
+
+  fprintf(file, "# ci --------------------------------------------\n");
+
+  for (size_t pid = 0; pid < ci->count; pid++) {
+    fprintf(file,
+            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+            "%13e %13e %13e\n",
+            ci->parts[pid].id, ci->parts[pid].x[0], ci->parts[pid].x[1],
+            ci->parts[pid].x[2], ci->parts[pid].v[0], ci->parts[pid].v[1],
+            ci->parts[pid].v[2], ci->parts[pid].rho, ci->parts[pid].rho_dh,
+            ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh,
+#ifdef GADGET2_SPH
+            ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0],
+            ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]
+#else
+            0., 0., 0., 0.
+#endif
+            );
+  }
+
+  fprintf(file, "# cj --------------------------------------------\n");
+
+  for (size_t pjd = 0; pjd < cj->count; pjd++) {
+    fprintf(file,
+            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
+            "%13e %13e %13e\n",
+            cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
+            cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
+            cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh,
+            cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
+#ifdef GADGET2_SPH
+            cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0],
+            cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]
+#else
+            0., 0., 0., 0.
+#endif
+            );
+  }
+
+  fclose(file);
+}
+
+/* Just a forward declaration... */
+void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
+
+int main(int argc, char *argv[]) {
+  size_t particles = 0, runs = 0, volume, type = 0;
+  double offset[3] = {0, 0, 0}, h = 1.1255, size = 1., rho = 1.;
+  double perturbation = 0.;
+  struct cell *ci, *cj;
+  struct space space;
+  struct engine engine;
+  struct runner runner;
+  char c;
+  static unsigned long long partId = 0;
+  char outputFileNameExtension[200] = "";
+  char outputFileName[200] = "";
+  ticks tic, toc, time;
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+  srand(0);
+
+  while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) {
+    switch (c) {
+      case 'h':
+        sscanf(optarg, "%lf", &h);
+        break;
+      case 'p':
+        sscanf(optarg, "%zu", &particles);
+        break;
+      case 'r':
+        sscanf(optarg, "%zu", &runs);
+        break;
+      case 't':
+        sscanf(optarg, "%zu", &type);
+        break;
+      case 'd':
+        sscanf(optarg, "%lf", &perturbation);
+        break;
+      case 'f':
+        strcpy(outputFileNameExtension, optarg);
+        break;
+      case '?':
+        error("Unknown option.");
+        break;
+    }
+  }
+
+  if (h < 0 || particles == 0 || runs == 0 || type > 2) {
+    printf(
+        "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
+        "\nGenerates a cell pair, filled with particles on a Cartesian grid."
+        "\nThese are then interacted using runner_dopair1_density."
+        "\n\nOptions:"
+        "\n-t TYPE=0          - cells share face (0), edge (1) or corner (2)"
+        "\n-h DISTANCE=1.1255 - smoothing length"
+        "\n-d pert            - perturbation to apply to the particles [0,1["
+        "\n-f fileName        - part of the file name used to save the dumps\n",
+        argv[0]);
+    exit(1);
+  }
+
+  space.periodic = 0;
+  space.h_max = h;
+
+  engine.s = &space;
+  engine.time = 0.1f;
+  engine.ti_current = 1;
+  runner.e = &engine;
+
+  volume = particles * particles * particles;
+  message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part));
+
+  ci = make_cell(particles, offset, size, h, rho, &partId, perturbation);
+  for (size_t i = 0; i < type + 1; ++i) offset[i] = 1.;
+  cj = make_cell(particles, offset, size, h, rho, &partId, perturbation);
+
+  time = 0;
+  for (size_t i = 0; i < runs; ++i) {
+
+    /* Zero the fields */
+    zero_particle_fields(ci);
+    zero_particle_fields(cj);
+
+    tic = getticks();
+
+    /* Run the test */
+    runner_dopair1_density(&runner, ci, cj);
+
+    toc = getticks();
+    time += toc - tic;
+
+    /* Dump if necessary */
+    if (i % 50 == 0) {
+      sprintf(outputFileName, "swift_dopair_%s.dat", outputFileNameExtension);
+      dump_particle_fields(outputFileName, ci, cj);
+    }
+  }
+
+  /* Output timing */
+  message("SWIFT calculation took       %lli ticks.", time / runs);
+
+  /* Now perform a brute-force version for accuracy tests */
+
+  /* Zero the fields */
+  zero_particle_fields(ci);
+  zero_particle_fields(cj);
+
+  tic = getticks();
+
+  /* Run the brute-force test */
+  pairs_all_density(&runner, ci, cj);
+
+  toc = getticks();
+
+  /* Dump */
+  sprintf(outputFileName, "brute_force_%s.dat", outputFileNameExtension);
+  dump_particle_fields(outputFileName, ci, cj);
+
+  /* Output timing */
+  message("Brute force calculation took %lli ticks.", toc - tic);
+
+  /* Clean things to make the sanitizer happy ... */
+  clean_up(ci);
+  clean_up(cj);
+
+  return 0;
+}
diff --git a/tests/testPair.sh b/tests/testPair.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f6f505e56a2c7a5c3cff0ec04bd871278634193c
--- /dev/null
+++ b/tests/testPair.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_standard.dat swift_dopair_standard.dat
+
+./testPair -p 6 -r 1 -d 0 -f standard
+
+python difffloat.py brute_force_standard.dat swift_dopair_standard.dat tolerance.dat
+
+exit $?
diff --git a/tests/testPairPerturbed.sh b/tests/testPairPerturbed.sh
new file mode 100755
index 0000000000000000000000000000000000000000..544ba1b032da8426c065dcfb2ce3ee554c5e76a1
--- /dev/null
+++ b/tests/testPairPerturbed.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+rm brute_force_perturbed.dat swift_dopair_perturbed.dat
+
+./testPair -p 6 -r 1 -d 0.1 -f perturbed
+
+python difffloat.py brute_force_perturbed.dat swift_dopair_perturbed.dat tolerance.dat
+
+exit $?
diff --git a/tests/testParser.c b/tests/testParser.c
new file mode 100644
index 0000000000000000000000000000000000000000..0b08d20c9e2d48de1858877cf186eaa9d0ac84c0
--- /dev/null
+++ b/tests/testParser.c
@@ -0,0 +1,70 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 James Willis (james.s.willis@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "parser.h"
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+int main(int argc, char *argv[]) {
+  const char *input_file = argv[1];
+
+  /* Create a structure to read file into. */
+  struct swift_params param_file;
+
+  /* Read the parameter file. */
+  parser_read_file(input_file, &param_file);
+
+  /* Print the contents of the structure to stdout. */
+  parser_print_params(&param_file);
+
+  /* Print the contents of the structure to a file in YAML format. */
+  parser_write_params_to_file(&param_file, "parser_output.yml");
+
+  /* Retrieve parameters and store them in variables defined above.
+   * Have to specify the name of the parameter as it appears in the
+   * input file: testParserInput.yaml.*/
+  const int no_of_threads =
+      parser_get_param_int(&param_file, "Scheduler:no_of_threads");
+  const int no_of_time_steps =
+      parser_get_param_int(&param_file, "Simulation:no_of_time_steps");
+  const float max_h = parser_get_param_float(&param_file, "Simulation:max_h");
+  const double start_time =
+      parser_get_param_double(&param_file, "Simulation:start_time");
+  const int kernel = parser_get_param_int(&param_file, "kernel");
+
+  char ic_file[PARSER_MAX_LINE_SIZE];
+  parser_get_param_string(&param_file, "IO:ic_file", ic_file);
+
+  /* Print the variables to check their values are correct. */
+  printf(
+      "no_of_threads: %d, no_of_time_steps: %d, max_h: %f, start_time: %lf, "
+      "ic_file: %s, kernel: %d\n",
+      no_of_threads, no_of_time_steps, max_h, start_time, ic_file, kernel);
+
+  assert(no_of_threads == 16);
+  assert(no_of_time_steps == 10);
+  assert(fabs(max_h - 1.1255) < 0.00001);
+  assert(fabs(start_time - 1.23456789) < 0.00001);
+  assert(strcmp(ic_file, "ic_file.ini") == 0); /*strcmp returns 0 if correct.*/
+  assert(kernel == 4);
+
+  return 0;
+}
diff --git a/tests/testParser.sh b/tests/testParser.sh
new file mode 100755
index 0000000000000000000000000000000000000000..53d2bbe4e0230032666ace228449f913f03e0464
--- /dev/null
+++ b/tests/testParser.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+rm parser_output.yml
+./testParser testParserInput.yaml
diff --git a/tests/testParserInput.yaml b/tests/testParserInput.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7fefb3242ab4e140756789aad9979d024f83906
--- /dev/null
+++ b/tests/testParserInput.yaml
@@ -0,0 +1,19 @@
+---
+#section_1:
+#  var_a: 4.5e10
+#  var_b: Hello World!
+
+Scheduler:
+  no_of_threads:      16 # The number of threads that will be used. 
+
+kernel: 4
+
+Simulation:    
+  no_of_time_steps:   10
+  max_h:              1.1255
+  start_time:         1.23456789
+
+IO:
+  #Input file
+  ic_file:            ic_file.ini
+...
diff --git a/tests/testReading.c b/tests/testReading.c
index d2a2a766171a85ace486914f0f39a987d9d8c3d3..33aeb5095ba499bc0fd18ba15b513e351692432e 100644
--- a/tests/testReading.c
+++ b/tests/testReading.c
@@ -22,7 +22,7 @@
 
 int main() {
 
-  int Ngas = -1, Ngpart = -1;
+  size_t Ngas = 0, Ngpart = 0;
   int periodic = -1;
   int i, j, k, n;
   double dim[3];
@@ -35,7 +35,8 @@ int main() {
   const double rho = 2.;
 
   /* Read data */
-  read_ic_single("input.hdf5", dim, &parts, &gparts, &Ngas, &Ngpart, &periodic);
+  read_ic_single("input.hdf5", dim, &parts, &gparts, &Ngas, &Ngpart, &periodic,
+                 0);
 
   /* Check global properties read are correct */
   assert(dim[0] == boxSize);
diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c
index 984b8ea867250d0bda1bc14d2600279a27321b2c..223078ecb637e64d94e37cdf8c0f60a86bdd5ff7 100644
--- a/tests/testSPHStep.c
+++ b/tests/testSPHStep.c
@@ -77,6 +77,10 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) {
 
 #ifdef DEFAULT_SPH
 
+/* Just a forward declaration... */
+void runner_doself1_density(struct runner *r, struct cell *ci);
+void runner_doself2_force(struct runner *r, struct cell *ci);
+
 /* Run a full time step integration for one cell */
 int main() {
 
@@ -132,7 +136,7 @@ int main() {
 
   /* Initialise the particles */
   for (j = 0; j < 27; ++j) {
-    runner_doinit(&r, cells[j]);
+    runner_doinit(&r, cells[j], 0);
   }
 
   /* Compute density */
@@ -145,7 +149,7 @@ int main() {
   runner_doself2_force(&r, ci);
   runner_dokick(&r, ci, 1);
 
-  message("t_end=%f", p->t_end);
+  message("ti_end=%d", p->ti_end);
 
   free(ci->parts);
   free(ci->xparts);
diff --git a/tests/testSingle.c b/tests/testSingle.c
index c85b77ff1c5b2285c33fa7787bbd53deab463039..eb49a570b93b14734c9e6af37d3d8a2b90d04078 100644
--- a/tests/testSingle.c
+++ b/tests/testSingle.c
@@ -91,8 +91,8 @@ int main(int argc, char *argv[]) {
   p2.force.POrho2 = p2.u * (const_hydro_gamma - 1.0f) / p2.rho;
 
   /* Dump a header. */
-  printParticle_single(&p1);
-  printParticle_single(&p2);
+  // printParticle_single(&p1, NULL);
+  // printParticle_single(&p2, NULL);
   printf("# r a_1 udt_1 a_2 udt_2\n");
 
   /* Loop over the different radii. */
@@ -103,9 +103,9 @@ int main(int argc, char *argv[]) {
     r2 = dx[0] * dx[0];
 
     /* Clear the particle fields. */
-    p1.a[0] = 0.0f;
+    p1.a_hydro[0] = 0.0f;
     p1.force.u_dt = 0.0f;
-    p2.a[0] = 0.0f;
+    p2.a_hydro[0] = 0.0f;
     p2.force.u_dt = 0.0f;
 
     /* Interact the particles. */
@@ -130,8 +130,8 @@ int main(int argc, char *argv[]) {
 
     /* Output the results. */
     printf(
-        "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], p1.a[0],
-        p1.a[1], p1.a[2], p1.force.u_dt,
+        "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0],
+        p1.a_hydro[0], p1.a_hydro[1], p1.a_hydro[2], p1.force.u_dt,
         /// -dx[0] , p1.rho , p1.density.wcount , p2.rho , p2.density.wcount ,
         w, dwdx, gradw[0], gradw[1], gradw[2]);
 
diff --git a/tests/testVectorize.c b/tests/testVectorize.c
deleted file mode 100644
index a18b6e8af5ac3f7b94bd7be3bdf8fd21e49681ff..0000000000000000000000000000000000000000
--- a/tests/testVectorize.c
+++ /dev/null
@@ -1,212 +0,0 @@
-#include <fenv.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "swift.h"
-
-/* n is both particles per axis and box size:
- * particles are generated on a mesh with unit spacing
- */
-struct cell *make_cell(size_t n, double *offset, double h,
-                       unsigned long long *partId) {
-  size_t count = n * n * n;
-  struct cell *cell = malloc(sizeof *cell);
-  struct part *part;
-  size_t x, y, z, size;
-
-  size = count * sizeof(struct part);
-  if (posix_memalign((void **)&cell->parts, part_align, size) != 0) {
-    error("couldn't allocate particles, no. of particles: %d", (int)count);
-  }
-
-  part = cell->parts;
-  for (x = 0; x < n; ++x) {
-    for (y = 0; y < n; ++y) {
-      for (z = 0; z < n; ++z) {
-        // Add .5 for symmetry: 0.5, 1.5, 2.5 vs. 0, 1, 2
-        part->x[0] = x + offset[0] + 0.5;
-        part->x[1] = y + offset[1] + 0.5;
-        part->x[2] = z + offset[2] + 0.5;
-        part->v[0] = 1.0f;
-        part->v[1] = 1.0f;
-        part->v[2] = 1.0f;
-        part->h = h;
-        part->id = ++(*partId);
-        part->mass = 1.0f;
-        part->ti_begin = 0;
-        part->ti_end = 1;
-        ++part;
-      }
-    }
-  }
-
-  cell->split = 0;
-  cell->h_max = h;
-  cell->count = count;
-  cell->dx_max = 1.;
-  cell->h[0] = n;
-  cell->h[1] = n;
-  cell->h[2] = n;
-
-  cell->sort = malloc(13 * count * sizeof *cell->sort);
-  runner_dosort(NULL, cell, 0x1FFF, 0);
-
-  return cell;
-}
-
-void clean_up(struct cell *ci) {
-  free(ci->parts);
-  free(ci->sort);
-  free(ci);
-}
-
-/**
- * @brief Initializes all particles field to be ready for a density calculation
- */
-void zero_particle_fields(struct cell *c) {
-
-  for (size_t pid = 0; pid < c->count; pid++) {
-    c->parts[pid].rho = 0.f;
-    c->parts[pid].rho_dh = 0.f;
-    hydro_init_part(&c->parts[pid]);
-  }
-}
-
-/**
- * @brief Dump all the particles to a file
- */
-void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) {
-
-  FILE *file = fopen(fileName, "w");
-
-  fprintf(file,
-          "# ID  rho  rho_dh  wcount  wcount_dh  div_v  curl_v:[x y z]\n");
-
-  for (size_t pid = 0; pid < ci->count; pid++) {
-    fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", ci->parts[pid].id,
-            ci->parts[pid].rho, ci->parts[pid].rho_dh,
-            ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh,
-            ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0],
-            ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]);
-  }
-
-  fprintf(file, "# -----------------------------------\n");
-
-  for (size_t pjd = 0; pjd < cj->count; pjd++) {
-    fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", cj->parts[pjd].id,
-            cj->parts[pjd].rho, cj->parts[pjd].rho_dh,
-            cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
-            cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0],
-            cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]);
-  }
-
-  fclose(file);
-}
-
-/* Just a forward declaration... */
-void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
-
-int main(int argc, char *argv[]) {
-  size_t particles = 0, runs = 0, volume, type = 0;
-  double offset[3] = {0, 0, 0}, h = 1.1255;  // * DIM/PARTS_PER_AXIS == * 1
-  struct cell *ci, *cj;
-  struct space space;
-  struct engine engine;
-  struct runner runner;
-  char c;
-  static unsigned long long partId = 0;
-  ticks tic, toc, time;
-
-  while ((c = getopt(argc, argv, "h:p:r:t:")) != -1) {
-    switch (c) {
-      case 'h':
-        sscanf(optarg, "%lf", &h);
-        break;
-      case 'p':
-        sscanf(optarg, "%zu", &particles);
-        break;
-      case 'r':
-        sscanf(optarg, "%zu", &runs);
-        break;
-      case 't':
-        sscanf(optarg, "%zu", &type);
-        break;
-    }
-  }
-
-  if (h < 0 || particles == 0 || runs == 0 || type > 2) {
-    printf(
-        "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
-        "\nGenerates a cell pair, filled with particles on a Cartesian grid."
-        "\nThese are then interacted using runner_dopair1_density."
-        "\n\nOptions:"
-        "\n-t TYPE=0          - cells share face (0), edge (1) or corner (2)"
-        "\n-h DISTANCE=1.1255 - smoothing length\n",
-        argv[0]);
-    exit(1);
-  }
-
-  volume = particles * particles * particles;
-  message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part));
-
-  ci = make_cell(particles, offset, h, &partId);
-  for (size_t i = 0; i < type + 1; ++i) offset[i] = particles;
-  cj = make_cell(particles, offset, h, &partId);
-
-  for (int i = 0; i < 3; ++i) {
-    space.h_max = h;
-    space.dt_step = 0.1;
-  }
-
-  engine.s = &space;
-  engine.time = 0.1f;
-  runner.e = &engine;
-
-  time = 0;
-  for (size_t i = 0; i < runs; ++i) {
-
-    /* Zero the fields */
-    zero_particle_fields(ci);
-    zero_particle_fields(cj);
-
-    tic = getticks();
-
-    /* Run the test */
-    runner_dopair1_density(&runner, ci, cj);
-
-    toc = getticks();
-    time += toc - tic;
-
-    /* Dump if necessary */
-    if (i % 50 == 0) dump_particle_fields("swift_dopair.dat", ci, cj);
-  }
-
-  /* Output timing */
-  message("SWIFT calculation took       %lli ticks.", time / runs);
-
-  /* Now perform a brute-force version for accuracy tests */
-
-  /* Zero the fields */
-  zero_particle_fields(ci);
-  zero_particle_fields(cj);
-
-  tic = getticks();
-
-  /* Run the test */
-  pairs_all_density(&runner, ci, cj);
-
-  toc = getticks();
-
-  /* Dump */
-  dump_particle_fields("brute_force.dat", ci, cj);
-
-  /* Output timing */
-  message("Brute force calculation took %lli ticks.", toc - tic);
-
-  /* Clean things to make the sanitizer happy ... */
-  clean_up(ci);
-  clean_up(cj);
-
-  return 0;
-}
diff --git a/tests/tolerance.dat b/tests/tolerance.dat
new file mode 100644
index 0000000000000000000000000000000000000000..f5031c5f47dfa203300ebcc9a47fbac42f854d26
--- /dev/null
+++ b/tests/tolerance.dat
@@ -0,0 +1,3 @@
+#   ID      pos_x      pos_y      pos_z        v_x        v_y        v_z           rho        rho_dh        wcount     wcount_dh         div_v       curl_vx       curl_vy       curl_vz
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-5	      1e-5	    2e-5       3e-2		 1e-5	     1e-5	   1e-5		 1e-5
+    0	    1e-6       1e-6	  1e-6 	       1e-6 	  1e-6	     1e-6	   1e-5	      1.2e-5	    1e-5       1e-2		 1e-4	     1e-4	   1e-4		 1e-4
diff --git a/theory/kernel/kernel.pdf b/theory/kernel/kernel.pdf
deleted file mode 100644
index b6e540dc61c36dd00e56f02e44ce33f9f91a7f01..0000000000000000000000000000000000000000
Binary files a/theory/kernel/kernel.pdf and /dev/null differ
diff --git a/theory/kernel/kernel.tex b/theory/kernel/kernel.tex
deleted file mode 100644
index 7087555d423afbe2745bb91010a17c52a32084f2..0000000000000000000000000000000000000000
--- a/theory/kernel/kernel.tex
+++ /dev/null
@@ -1,155 +0,0 @@
-\documentclass[a4paper,10pt]{article}
-\usepackage[utf8]{inputenc}
-\usepackage{amsmath}
-
-%opening
-\title{SPH kernels in SWIFT}
-\author{Matthieu Schaller}
-
-\begin{document}
-
-\maketitle
-
-\section{General Definitions}
-
-The smoothing kernels used in SPH are almost always isotropic and can hence be written in 3D as
-
-\begin{equation}
- W(\vec{x},h) = \frac{1}{h^3}f\left(\frac{|\vec{x}|}{h}\right),
-\end{equation}
-
-where $f(q)$ is a dimensionless function, usually a low-order polynomial, normalized to unity. For computational 
-reasons, this kernel 
-usually has a finite support of radius $H$. In other words,
-
-\begin{equation}
- W(\vec{x},h) = 0\quad \forall\quad |\vec{x}| > H.
-\end{equation}
- One can then define the weighted number of neighbours within $H$ as
-
-\begin{equation}
- N_{ngb} = \frac{4}{3}\pi H^3 \sum_j W(\vec{x}_i - \vec{x}_j,h).
-\end{equation}
-
-The value of $N_{ngb}$ is often used in the codes to find the smoothing length of each particle via Newton iterations 
-or a bissection algorithm. $H$ is defined as \emph{the smoothing length} in the GADGET code. This definition is useful 
-for implementation reasons but does not really correspond to a true physical quantity. \\
-The main question is the definition of the smoothing length. The function $W(\vec{x},h)$ is invariant under the 
-rescaling $h\rightarrow \alpha h,~f(q)\rightarrow\alpha^{-3}f(\alpha q)$, which makes the definition of $h$ difficult. 
-This ambiguity is present in the litterature with authors using different definition of the \emph{physical} smoothing 
-length, $h=\frac{1}{2}H$ or $h=H$ for instance. \\
-A more physically motivated estimate is the standard deviation of the kernel:
-
-\begin{equation}
- \sigma^2 = \frac{1}{3} \int \vec{x}^2~W(\vec{x},h)~d^3\vec{x}
-\end{equation}
- 
-which then allows us to set $h=2\sigma$. This definition of the smoothing length is more physical as one can 
-demonstrate that the reconstruction of any smooth field $A(\vec{x})$ using interpolation of particles at the point 
-$\vec{x}_i$ can be expanded as
-
-\begin{equation}
-A_i  \approx A(\vec{x}_i) + \frac{1}{2}\sigma^2 \nabla^2A(\vec{x}_i) + \mathcal{O}\left(\sigma^4\right).
-\end{equation}
-
-The quantity $H/\sigma$ is independant of the choice of $h$ made and is purely a functional of $f(q)$. The number of 
-neighbours (used in the code to construct the neighborhood of a given particle) can then be expressed as a function of 
-this \emph{physical} $h$ (or $\sigma$). Or to relate it even more to the particle distribution, we can write 
-$h=\eta\Delta x$, with $\Delta x$ the mean inter-particle separation:
-
-\begin{equation}
- N_{ngb} = \frac{4}{3}\pi \left(\frac{1}{2}\eta\frac{H}{\sigma}\right)^3 = \frac{4}{3}\pi 
-\left(\eta\zeta\right)^3
-\end{equation}
-
-This definition of the number of neighbours only depends on $f(q)$ (via $\zeta$) and on the mean inter-particle 
-separation. The problem is then fully specified by specifying a form for $f(q)$ and $\eta$. \\
-Experiments suggest that $\eta \approx 1.2 - 1.3$ is a reasonnable choice. The bigger $\eta$, the better the smoothing 
-and hence the better the reconstruction of the field. This, however, comes at a higher computational cost as more 
-interactions between neighbours will have to be computed. Also, spline kernels become instable when $\eta>1.5$. 
-
-\section{Kernels available in SWIFT}
-
-The different kernels available are listed below.
-\paragraph{Cubic Spline Kernel}
-\begin{equation*}
- f(q) = \frac{1}{\pi}\left\lbrace \begin{array}{rcl}
-                      \frac{3}{4}q^3 - 15q^2 + 1 & \mbox{if} & 0 \leq q < 1 \\
-		      -\frac{1}{4}q^3 + \frac{3}{2}q^2-3q+2 & \mbox{if} & 1 \leq q < 2 \\
-		      0 & \mbox{if} & q \geq 2 \\
-                     \end{array}\right.
-\end{equation*}
-with $\zeta = \frac{1}{2}\sqrt{\frac{40}{3}} \approx 1.825742$. Thus, for a resolution of $\eta = 1.235$, this kernel 
-uses $N_{ngb} \approx 48$. The code uses $h = \frac{1}{2}H = \zeta \sigma$ internally.
-
-\paragraph{Quartic Spline Kernel}
-\begin{equation*}
- f(q) = \frac{1}{20\pi}\left\lbrace \begin{array}{rcl}
-                      6q^4 - 15q^2 + \frac{115}{8} & \mbox{if} & 0 \leq q < \frac{1}{2} \\
-		      -4q^4 + 20q^3-30q^2 + 5q + \frac{55}{4} & \mbox{if} & \frac{1}{2} \leq q < \frac{3}{2} \\
-		      q^4-10q^3+\frac{75}{2}q^2-\frac{125}{2}q+\frac{625}{16} & \mbox{if} & \frac{3}{2} \leq q < 
-\frac{5}{2} \\
-		      0 & \mbox{if} & q \geq \frac{5}{2} \\
-                     \end{array}\right.
-\end{equation*}
-with $\zeta = \frac{1}{2}\sqrt{\frac{375}{23}} \approx 2.018932$. Thus, for a resolution of $\eta = 1.235$, this kernel 
-uses $N_{ngb} \approx 64.9$. The code uses $h = \frac{2}{5}H =\frac{4}{5}\zeta \sigma$ internally.
-
-\paragraph{Quintic Spline Kernel}
-\begin{equation*}
- f(q) = \frac{1}{120\pi}\left\lbrace \begin{array}{rcl}
-                      -10q^5 + 30q^4 - 60q^2 + 66 & \mbox{if} & 0 \leq q < 1 \\
-		      5q^5 - 45q^4 + 150q^3 - 210q^2 + 75q + 51 & \mbox{if} & 1 \leq q < 2 \\
-		      -q^5 + 15q^4 - 90q^3 + 270q^2 - 405q + 243 & \mbox{if} & 2 \leq q < 3 \\
-		      0 & \mbox{if} & q \geq 3 \\
-                     \end{array}\right.
-\end{equation*}
-with $\zeta = \frac{1}{2}\sqrt{\frac{135}{7}} \approx 2.195775$. Thus, for a resolution of $\eta = 1.235$, this kernel 
-uses $N_{ngb} \approx 83.5$. The code uses $h = \frac{1}{3}H = \frac{2}{3}\zeta \sigma$ internally.
-
-\paragraph{Wendland $C2$ Kernel}
-\begin{equation*}
- f(q) = \frac{21}{2\pi}\left\lbrace \begin{array}{rcl}
-                      4 q^5-15 q^4+20 q^3-10 q^2+1 & \mbox{if} & 0 \leq q < 1 \\
-		      0 & \mbox{if} & q \geq 1 \\
-                     \end{array}\right.
-\end{equation*}
- with $\zeta = \frac{1}{2}\sqrt{15} \approx 1.93649$. Thus, for a resolution of $\eta = 1.235$, this kernel 
-uses $N_{ngb} \approx 57.3$. The code uses $h = H = 2\zeta \sigma$ internally.
-
-
-\paragraph{Wendland $C4$ Kernel}
-\begin{equation*}
- f(q) = \frac{495}{32\pi}\left\lbrace \begin{array}{rcl}
-                      \frac{35}{3} q^8-64 q^7+ 140 q^6-\frac{448}{3} q^5+70 q^4-\frac{28}{3} q^2+1 & \mbox{if} & 0 
-\leq q < 1 \\
-		      0 & \mbox{if} & q \geq 1 \\
-                     \end{array}\right.
-\end{equation*}
- with $\zeta = \frac{1}{2}\sqrt{\frac{39}{2}} \approx 2.20794$. Thus, for a resolution of $\eta = 1.235$, this kernel 
-uses $N_{ngb} \approx 84.9$. The code uses $h = H = 2\zeta \sigma$ internally.
-
-\paragraph{Wendland $C6$ Kernel}
-\begin{equation*}
- f(q) = \frac{1365}{64\pi}\left\lbrace \begin{array}{rcl}
-                      32 q^{11}-231 q^{10}+704 q^9-1155 q^8+1056 q^7-462 q^6+66 q^4-11 q^2+1 & \mbox{if} & 0 
-\leq q < 1 \\
-		      0 & \mbox{if} & q \geq 1 \\
-                     \end{array}\right.
-\end{equation*}
- with $\zeta = \frac{1}{2}\sqrt{24} \approx 2.44949$. Thus, for a resolution of $\eta = 1.235$, this kernel 
-uses $N_{ngb} \approx 116$. The code uses $h = H = 2\zeta \sigma$ internally.
-
-\section{Kernel Derivatives}
-
-The derivatives of the kernel function have relatively simple expressions:
-
-\begin{eqnarray*}
- \vec\nabla_x W(\vec{x},h) &=& \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|} \\
- \frac{\partial W(\vec{x},h)}{\partial h} &=&- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + 
-\frac{|\vec{x}|}{h}f'\left(\frac{|\vec{x}|}{h}\right)\right]
-\end{eqnarray*}
-
-Note that for all the kernels listed above, $f'(0) = 0$. 
-
-\end{document}
diff --git a/theory/kernel/kernel_definitions.tex b/theory/kernel/kernel_definitions.tex
new file mode 100644
index 0000000000000000000000000000000000000000..8999636109ffadcbf148ce3c1fbccdc44feafe65
--- /dev/null
+++ b/theory/kernel/kernel_definitions.tex
@@ -0,0 +1,242 @@
+\documentclass[a4paper]{mnras}
+\usepackage[utf8]{inputenc}
+\usepackage{amsmath}
+\usepackage{graphicx}
+\usepackage{xspace}
+
+\newcommand{\swift}{{\sc Swift}\xspace}
+
+
+
+%opening
+\title{SPH kernels in SWIFT}
+\author{Matthieu Schaller}
+
+\begin{document}
+
+\maketitle
+
+In here we follow the definitions of Dehnen \& Aly 2012.
+
+\section{General Definitions}
+
+The desirable properties of an SPH kernels $W(\vec{x},h)$ are:
+\begin{enumerate}
+\item $W(\vec{x},h)$ should be isotropic in $\vec{x}$.
+\item $W(\vec{x},h)$ should be positive and decrease monotonically.
+\item $W(\vec{x},h)$ should be twice differentiable.
+\item $W(\vec{x},h)$ should have a finite support and be cheap to
+  compute.
+\end{enumerate}
+
+As a consequence, the smoothing kernels used in SPH can
+hence be written (in 3D) as
+
+\begin{equation}
+ W(\vec{x},h) \equiv \frac{1}{H^3}f\left(\frac{|\vec{x}|}{H}\right),
+\end{equation}
+
+where $H=\gamma h$ is defined below and $f(u)$ is a dimensionless
+function, usually a low-order polynomial, such that $f(u \geq 1) = 0$
+and normalised such that
+
+\begin{equation}
+  \int f(|\vec{u}|){\rm d}^3u = 1.
+\end{equation}
+
+$H$ is the kernel's support radius and is used as the ``smoothing
+length'' in the Gadget code( {i.e.} $H=h$). This definition is,
+however, not very physical and makes comparison of kernels at a
+\emph{fixed resolution} difficult. A more sensible definition of the
+smoothing length, related to the Taylor expansion of the
+re-constructed density field is given in terms of the kernel's
+standard deviation
+
+\begin{equation}
+  \sigma^2 \equiv \frac{1}{3}\int \vec{u}^2 W(\vec{u},h) {\rm d}^3u.
+  \label{eq:sph:sigma}
+\end{equation}
+
+The smoothing length is then:
+\begin{equation}
+  h\equiv2\sigma.
+    \label{eq:sph:h}
+\end{equation}
+
+Each kernel, {\it i.e.} defintion of $f(u)$, will have a different
+ratio $\gamma = H/h$. So for a \emph{fixed resolution} $h$, one will
+have different kernel support sizes, $H$, and a different number of
+neighbours, $N_{\rm ngb}$ to interact with. One would typically choose
+$h$ for a simulation as a multiple $\eta$ of the mean-interparticle
+separation:
+
+\begin{equation}
+  h = \eta \langle x \rangle = \eta \left(\frac{m}{\rho}\right)^{1/3},
+\end{equation}
+
+where $\rho$ is the local density of the fluid and $m$ the SPH
+particle mass. 
+
+The (weighted) number of neighbours within the kernel support is a
+useful quantity to use in implementations of SPH. It is defined as (in
+3D)
+
+\begin{equation}
+  N_{\rm ngb} \equiv \frac{4}{3}\pi \left(\frac{H}{h}\eta\right)^3.
+\end{equation}
+
+Once the fixed ratio $\gamma= H/h$ is known (via equations
+\ref{eq:sph:sigma} and \ref{eq:sph:h}) for a given kernel, the number
+of neighbours only depends on the resolution parameter $\eta$.  For
+the usual cubic spline kernel (see below), setting the simulation
+resolution to $\eta=1.2348$ yields the commonly used value $N_{\rm
+  ngb} = 48$.
+
+\section{Kernels available in \swift}
+
+The \swift kernels are split into two categories, the B-splines
+($M_{4,5,6}$) and the Wendland kernels ($C2$, $C4$ and $C6$). In all
+cases we impose $f(u>1) = 0$.\\
+
+The spline kernels are defined as:
+
+\begin{align}
+  f(u) &= C M_n(u), \\
+  M_n(u) &\equiv \frac{1}{2\pi}
+  \int_{-\infty}^{\infty}
+  \left(\frac{\sin\left(k/n\right)}{k/n}\right)^n\cos\left(ku\right){\rm
+  d}k,
+\end{align}
+
+whilst the Wendland kernels read
+
+\begin{align}
+  f(u) &= C \Psi_{i,j}(u), \\
+  \Psi_{i,j}(u) &\equiv \mathcal{I}^k\left[\max\left(1-u,0\right)\right],\\
+  \mathcal{I}[f](u) &\equiv \int_u^\infty f\left(k\right)k{\rm d}k.
+\end{align}
+
+\subsubsection{Cubic spline ($M_4$) kernel}
+
+In 3D, we have $C=\frac{16}{\pi}$ and $\gamma=H/h = 1.825742$.\\
+The kernel function $f(u)$ reads:
+
+\begin{equation}
+  M_4(u) = \left\lbrace\begin{array}{rcl}
+  3u^3 - 3u^2 + \frac{1}{2} & \mbox{if} & u<\frac{1}{2}\\
+  -u^3 + 3u^2 - 3u + 1 & \mbox{if} & u \geq \frac{1}{2}
+  \end{array}
+  \right.
+    \nonumber
+\end{equation}
+
+
+\subsubsection{Quartic spline ($M_5$) kernel}
+
+In 3D, we have $C=\frac{15625}{512\pi}$ and $\gamma=H/h = 2.018932$.\\
+The kernel function $f(u)$ reads:
+
+\begin{align}
+  M_5(u) &=     \nonumber\\
+  &\left\lbrace\begin{array}{rcl}
+  6u^4 - \frac{12}{5}u^2 + \frac{46}{125} & \mbox{if} & u < \frac{1}{5} \\
+  -4u^4 + 8u^3  - \frac{24}{5}u^2 + \frac{8}{25}u + \frac{44}{125} &  \mbox{if} &  \frac{1}{5} \leq u < \frac{3}{5}\\
+  u^4 - 4u^3 + 6u^2 - 4u + 1 &  \mbox{if} &  \frac{3}{5} \leq u \\
+  \end{array}
+  \right.
+  \nonumber
+\end{align}
+
+
+\subsubsection{Quintic spline ($M_6$) kernel}
+
+In 3D, we have $C=\frac{2187}{40\pi}$ and $\gamma=H/h = 2.195775$.\\
+The kernel function $f(u)$ reads:
+
+\begin{align}
+  M_6(u) &=     \nonumber\\
+  &\left\lbrace\begin{array}{rcl}
+  -10u^5 + 10u^4 - \frac{20}{9}u^2 + \frac{22}{81} & \mbox{if} & u < \frac{1}{3} \\
+  5u^5 - 15u^4 + \frac{50}{3}u^3 - \frac{70}{9}u^2 + \frac{25}{27}u + \frac{17}{81} &  \mbox{if} &  \frac{1}{3} \leq u < \frac{2}{3}\\
+  -1u^5 + 5u^4 - 10u^3 + 10u^2 - 5u + 1. & \mbox{if} & u \geq \frac{2}{3}
+  \end{array}
+  \right.
+      \nonumber
+\end{align}
+
+
+\subsubsection{Wendland C2 kernel}
+
+In 3D, we have $C=\frac{21}{2\pi}$ and $\gamma=H/h = 1.936492$.\\
+The kernel function $f(u)$ reads:
+
+\begin{align}
+  \Psi_{i,j}(u) &= 4u^5 - 15u^4 + 20u^3 - 10u^2 + 1.
+    \nonumber
+\end{align}
+
+
+\subsubsection{Wendland C4 kernel}
+
+In 3D, we have $C=\frac{495}{32\pi}$ and $\gamma=H/h = 2.207940$.\\
+The kernel function $f(u)$ reads:
+
+\begin{align}
+  \Psi_{i,j}(u) &= \frac{35}{3}u^8 - 64u^7 + 140u^6     \nonumber\\
+  & - \frac{448}{3}u^5 + 70u^4 - \frac{28}{3}u^2 + 1
+    \nonumber
+\end{align}
+
+
+\subsubsection{Wendland C6 kernel}
+
+In 3D, we have $C=\frac{1365}{64\pi}$ and $\gamma=H/h = 2.449490$.\\
+The kernel function $f(u)$ reads:
+
+\begin{align}
+  \Psi_{i,j}(u) &= 32u^{11} - 231u^{10} + 704u^9 - 1155u^8     \nonumber\\
+  & + 1056u^7 - 462u^6 + 66u^4 - 11u^2 + 1
+    \nonumber
+\end{align}
+
+
+\subsubsection{Summary}
+
+All kernels available in \swift are shown on Fig.~\ref{fig:sph:kernels}.
+
+\begin{figure}
+\includegraphics[width=\columnwidth]{kernels.pdf}
+\caption{The kernel functions available in \swift for a mean
+  inter-particle separation $\langle x\rangle=1.5$ and a resolution
+  $\eta=1.2348$. The corresponding kernel support radii $H$ (shown by
+  arrows) and number of neighours $N_{\rm ngb}$ are indicated on the
+  figure. A Gaussian kernel with the same smoothing length is shown
+  for comparison. Note that all these kernels have the \emph{same
+    resolution} despite having vastly different number of neighbours.}
+\label{fig:sph:kernels}
+\end{figure}
+
+\begin{figure}
+\includegraphics[width=\columnwidth]{kernel_derivatives.pdf}
+\caption{The first and secon derivatives of the kernel functions
+  available in \swift for a mean inter-particle separation $\langle
+  x\rangle=1.5$ and a resolution $\eta=1.2348$.  A Gaussian kernel
+  with the same smoothing length is shown for comparison.}
+\label{fig:sph:kernel_derivatives}
+\end{figure}
+
+
+\section{Kernel Derivatives}
+
+The derivatives of the kernel function have relatively simple
+expressions and are shown on Fig.~\ref{fig:sph:kernel_derivatives}.
+
+\begin{eqnarray*}
+ \vec\nabla_x W(\vec{x},h) &=& \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|} \\
+ \frac{\partial W(\vec{x},h)}{\partial h} &=&- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + 
+\frac{|\vec{x}|}{h}f'\left(\frac{|\vec{x}|}{h}\right)\right]
+\end{eqnarray*}
+
+Note that for all the kernels listed above, $f'(0) = 0$. 
+
+\end{document}
diff --git a/theory/kernel/kernels.py b/theory/kernel/kernels.py
index d7bdbe2bf9ba49a30f4c8a2ae136c4843ce5c2cf..184379e5eafbcd12a1a47560ee88e02066da3942 100644
--- a/theory/kernel/kernels.py
+++ b/theory/kernel/kernels.py
@@ -11,24 +11,24 @@ from matplotlib.font_manager import FontProperties
 import numpy
 
 params = {
-    'axes.labelsize': 8,
+    'axes.labelsize': 10,
     'axes.titlesize': 8,
-    'font.size': 8,
+    'font.size': 10,
     'legend.fontsize': 9,
-    'xtick.labelsize': 8,
-    'ytick.labelsize': 8,
+    'xtick.labelsize': 10,
+    'ytick.labelsize': 10,
     'xtick.major.pad': 2.5,
     'ytick.major.pad': 2.5,
     'text.usetex': True,
-'figure.figsize' : (3.15,3.15),
-'figure.subplot.left'    : 0.12,
+'figure.figsize' : (4.15,4.15),
+'figure.subplot.left'    : 0.14,
 'figure.subplot.right'   : 0.99  ,
 'figure.subplot.bottom'  : 0.08  ,
 'figure.subplot.top'     : 0.99  ,
 'figure.subplot.wspace'  : 0.  ,
 'figure.subplot.hspace'  : 0.  ,
 'lines.markersize' : 6,
-'lines.linewidth' : 2.,
+'lines.linewidth' : 1.5,
 'text.latex.unicode': True
 }
 rcParams.update(params)
@@ -36,147 +36,277 @@ rc('font',**{'family':'sans-serif','sans-serif':['Times']})
 
 
 #Parameters
-eta = 1.2349
-h = 2.1
+eta = 1.2348422195325 # Resolution (Gives 48 neighbours for a cubic spline kernel)
+dx  = 1.5#4 #2.7162  # Mean inter-particle separation
 
 #Constants
 PI = math.pi
 
-#Cubic Spline
-cubic_kernel_degree = 3
-cubic_kernel_ivals = 2
-cubic_kernel_gamma = 2.
-cubic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 6.0858
-cubic_kernel_coeffs = array([[3./(4.*PI) , -3./(2.*PI), 0.,     1./PI],
-                             [-1./(4.*PI),  3./(2.*PI), -3./PI, 2./PI],
-                             [0.,           0.,         0.,     0.]])
-def cubic_W(x):
-    if size(x) == 1:
-        x = array([x])
-    ind = (minimum(x, cubic_kernel_ivals)).astype(int)
-    coeffs = cubic_kernel_coeffs[ind,:]
-    w = coeffs[:,0] * x + coeffs[:,1]
-    for k in range(2, cubic_kernel_degree+1):
-        w = x * w + coeffs[:,k]
-    return w
-
-
-#Quartic Spline
-quartic_kernel_degree = 4
-quartic_kernel_ivals = 3
-quartic_kernel_gamma = 2.5
-quartic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 8.2293
-quartic_kernel_coeffs = array([[3./(10.*PI) , 0.,           -3./(4.*PI) , 0.          ,  23./(32.*PI)],
-                               [-1./(5.*PI) , 1./PI       , -3./(2.*PI) ,1./(4.*PI)   ,  11./(16.*PI)],
-                               [1./(20.*PI) , -1./(2.*PI) , 15./(8.*PI) , -25./(8.*PI), 125./(64.*PI)],
-                               [ 0. , 0.,           0.,         0.,     0.]])
-def quartic_W(x):
-    if size(x) == 1:
-        x = array([x])
-    ind = (minimum(x+0.5, quartic_kernel_ivals)).astype(int)
-    coeffs = quartic_kernel_coeffs[ind,:]
-    w = coeffs[:,0] * x + coeffs[:,1]
-    for k in range(2, quartic_kernel_degree+1):
-        w = x * w + coeffs[:,k]
-    return w
-
-
-# Wendland kernel
-wendland2_kernel_degree = 5
-wendland2_kernel_ivals = 1
-wendland2_kernel_gamma = 2
-wendland2_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 7.261825
-wendland2_kernel_coeffs = 3.342253804929802 * array([[1./8, -30./32, 80./32, -80./32., 0., 1.],
-                                                     [ 0. , 0.,  0.,   0., 0., 0.]]) / 8.
-
-print wendland2_kernel_coeffs
-def wendland2_W(x):
-    if size(x) == 1:
-        x = array([x])
-    ind = (minimum(0.5*x, wendland2_kernel_ivals)).astype(int)
-    coeffs = wendland2_kernel_coeffs[ind,:]
-    w = coeffs[:,0] * x + coeffs[:,1]
-    for k in range(2, wendland2_kernel_degree+1):
-        w = x * w + coeffs[:,k]
-    return w 
-
-#def wendland2_W(x):
-#    if size(x) == 1:
-#        x = array([x])
-#    x /= 1.936492
-#    x[x>1.] = 1.
-#    oneminusu = 1.-x
-#    oneminusu4 = oneminusu * oneminusu * oneminusu * oneminusu
-#    return 3.342253804929802 * oneminusu4 * (1. + 4.*x) / h**3
-
-
-#Find H
-r = arange(0, 3.5*h, 1./1000.)
-xi = r/h
-cubic_Ws = cubic_W(xi)
-quartic_Ws = quartic_W(xi)
-wendland2_Ws = wendland2_W(xi)
-for j in range(size(r)):
-    if cubic_Ws[j] == 0:
-        cubic_H = r[j]
-        break
-for j in range(size(r)):
-    if quartic_Ws[j] == 0:
-        quartic_H = r[j]
-        break
-for j in range(size(r)):
-    if wendland2_Ws[j] == 0:
-        wendland2_H = r[j]
-        break
-
-    
-print "H=", cubic_H
-print "H=", quartic_H
-print "H=", wendland2_H
-
-
-# Compute sigma -----------------------------------------
-cubic_norm = 4.*PI*integrate.quad(lambda x: x**2*cubic_W(x), 0, cubic_H)[0]
-quartic_norm = 4.*PI*integrate.quad(lambda x: x**2*quartic_W(x), 0, quartic_H)[0]
-wendland2_norm = 4.*PI*integrate.quad(lambda x: x**2*wendland2_W(x), 0, wendland2_H)[0]
-
-print cubic_norm
-print quartic_norm
-print wendland2_norm
-
-
-# Plot kernels ------------------------------------------
-r = arange(0, 3.5*h, 1./100.)
-xi = r/h
-
-cubic_Ws = cubic_W(xi)
-quartic_Ws = quartic_W(xi)
-wendland2_Ws = wendland2_W(xi)
-
+# Compute expected moothing length
+h = eta * dx
+
+# Get kernel support (Dehnen & Aly 2012, table 1) for 3D kernels
+H_cubic   = 1.825742 * h
+H_quartic = 2.018932 * h
+H_quintic = 2.195775 * h
+H_WendlandC2 = 1.936492 * h
+H_WendlandC4 = 2.207940 * h
+H_WendlandC6 = 2.449490 * h
+
+# Get the number of neighbours within kernel support:
+N_H_cubic = 4./3. * PI * H_cubic**3 / (dx)**3
+N_H_quartic = 4./3. * PI * H_quartic**3 / (dx)**3
+N_H_quintic = 4./3. * PI * H_quintic**3 / (dx)**3
+N_H_WendlandC2 = 4./3. * PI * H_WendlandC2**3 / (dx)**3
+N_H_WendlandC4 = 4./3. * PI * H_WendlandC4**3 / (dx)**3
+N_H_WendlandC6 = 4./3. * PI * H_WendlandC6**3 / (dx)**3
+
+
+print "Smoothing length: h =", h, "Cubic spline kernel support size:   H =", H_cubic, "Number of neighbours N_H =", N_H_cubic
+print "Smoothing length: h =", h, "Quartic spline kernel support size: H =", H_quartic, "Number of neighbours N_H =", N_H_quartic
+print "Smoothing length: h =", h, "Quintic spline kernel support size: H =", H_quintic, "Number of neighbours N_H =", N_H_quintic
+print "Smoothing length: h =", h, "Wendland C2 kernel support size:    H =", H_WendlandC2, "Number of neighbours N_H =", N_H_WendlandC2
+print "Smoothing length: h =", h, "Wendland C4 kernel support size:    H =", H_WendlandC4, "Number of neighbours N_H =", N_H_WendlandC4
+print "Smoothing length: h =", h, "Wendland C6 kernel support size:    H =", H_WendlandC6, "Number of neighbours N_H =", N_H_WendlandC6
+
+# Get kernel constants (Dehen & Aly 2012, table 1) for 3D kernel
+C_cubic   = 16. / PI
+C_quartic = 5**6 / (512 * PI)
+C_quintic = 3**7 / (40 * PI)
+C_WendlandC2 = 21. / (2 * PI)
+C_WendlandC4 = 495. / (32 * PI)
+C_WendlandC6 = 1365. / (64 * PI)
+
+# Get the reduced kernel definitions (Dehen & Aly 2012, table 1) for 3D kernel
+#def plus(u) : return maximum(0., u)
+def cubic_spline(r):   return where(r > 1., 0., where(r < 0.5,
+                                                      3.*r**3 - 3.*r**2 + 0.5,
+                                                      -r**3 + 3.*r**2 - 3.*r + 1.) )
+
+#return plus(1. - r)**3 - 4.*plus(1./2. - r)**3
+def quartic_spline(r): return where(r > 1., 0., where(r < 0.2,
+                                                      6.*r**4 - 2.4*r**2 + 46./125.,
+                                                      where(r < 0.6,
+                                                            -4.*r**4 + 8.*r**3  - (24./5.)*r**2 + (8./25.)*r + 44./125.,
+                                                            1.*r**4 - 4.*r**3 + 6.*r**2 - 4.*r + 1.)))
+
+#return plus(1. - r)**4 - 5.*plus(3./5. - r)**4 + 10.*plus(1./5. - r)**4
+def quintic_spline(r): return where(r > 1., 0., where(r < 1./3.,
+                                                      -10.*r**5 + 10.*r**4 - (20./9.)*r**2 + (22./81.),
+                                                      where(r < 2./3.,
+                                                            5.*r**5 - 15.*r**4 + (50./3.)*r**3 - (70./9.)*r**2 + (25./27.)*r + (17./81.),
+                                                            -1.*r**5 + 5.*r**4 - 10.*r**3 + 10.*r**2 - 5.*r + 1.)))
+                                                            
+#return plus(1. - r)**5 - 6.*plus(2./3. - r)**5 + 15.*plus(1./3. - r)**5
+def wendlandC2(r):     return where(r > 1., 0., 4.*r**5 - 15.*r**4 + 20.*r**3 - 10*r**2 + 1.)
+def wendlandC4(r):     return where(r > 1., 0.,  (35./3.)*r**8 - 64.*r**7 + 140.*r**6 - (448./3.)*r**5 + 70.*r**4 - (28. /3.)*r**2 + 1.)
+def wendlandC6(r):     return where(r > 1., 0., 32.*r**11 - 231.*r**10 + 704.*r**9 - 1155.*r**8 + 1056.*r**7 - 462.*r**6 + 66.*r**4 - 11.*r**2 + 1.)
+def Gaussian(r,h): return (1./(0.5*pi*h**2)**(3./2.)) * exp(- 2.*r**2 / (h**2))
+
+
+# Kernel definitions (3D)
+def W_cubic_spline(r):   return C_cubic      * cubic_spline(r / H_cubic)     / H_cubic**3
+def W_quartic_spline(r): return C_quartic    * quartic_spline(r / H_quartic) / H_quartic**3
+def W_quintic_spline(r): return C_quintic    * quintic_spline(r / H_quintic) / H_quintic**3
+def W_WendlandC2(r):     return C_WendlandC2 * wendlandC2(r / H_WendlandC2)  / H_WendlandC2**3
+def W_WendlandC4(r):     return C_WendlandC4 * wendlandC4(r / H_WendlandC4)  / H_WendlandC4**3
+def W_WendlandC6(r):     return C_WendlandC6 * wendlandC6(r / H_WendlandC6)  / H_WendlandC6**3
+
+# PLOT STUFF
+figure()
+subplot(211)
+xx = linspace(0., 5*h, 1000)
+maxY = 1.2*Gaussian(0, h)
+
+# Plot the kernels
+plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm %14s\\quad H=\\infty}$"%("Gaussian~~~~~~"))
+plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm %14s\\quad H=%4.3f}$"%("Cubic~spline~~", H_cubic))
+plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quartic~spline", H_quartic))
+plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quintic~spline", H_quintic))
+plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C2~", H_WendlandC2))
+plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C4~", H_WendlandC4))
+plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C6~", H_WendlandC6))
+
+# Indicate the position of H
+arrow(H_cubic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='b', ec='b', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
+arrow(H_quartic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='c', ec='c', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
+arrow(H_quintic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='g', ec='g', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
+arrow(H_WendlandC2, 0.12*maxY , 0., -0.12*maxY*0.9, fc='r', ec='r', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
+arrow(H_WendlandC4, 0.12*maxY , 0., -0.12*maxY*0.9, fc='m', ec='m', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
+arrow(H_WendlandC6, 0.12*maxY , 0., -0.12*maxY*0.9, fc='y', ec='y', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
+
+# Show h
+plot([h, h], [0., maxY], 'k:', linewidth=0.5)
+text(h, maxY*0.35, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom')
+
+# Show <x>
+plot([dx, dx], [0., maxY], 'k:', linewidth=0.5)
+text(dx, maxY*0.35, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom')
+
+xlim(0., 2.5*h)
+ylim(0., maxY)
+gca().xaxis.set_ticklabels([])
+ylabel("$W(r,h)$", labelpad=1.5)
+legend(loc="upper right", handlelength=1.2, handletextpad=0.2)
+
+
+# Same but now in log space
+subplot(212, yscale="log")
+plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
+plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$")
+plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$")
+plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$")
+plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$")
+plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$")
+plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$")
+
+# Show h
+plot([h, h], [0., 1.], 'k:', linewidth=0.5)
+
+# Show <x>
+plot([dx, dx], [0., 1.], 'k:', linewidth=0.5)
+
+
+# Show plot properties
+text(h/5., 1e-3, "$\\langle x \\rangle = %3.1f$"%(dx), va="top", backgroundcolor='w')
+text(h/5.+0.06, 3e-4, "$\\eta = %5.4f$"%(eta), va="top", backgroundcolor='w')
+
+# Show number of neighbours
+text(1.9*h, 2e-1/2.9**0, "$N_{\\rm ngb}=\\infty$", fontsize=10)
+text(1.9*h, 2e-1/2.9**1, "$N_{\\rm ngb}=%3.1f$"%(N_H_cubic), color='b', fontsize=9)
+text(1.9*h, 2e-1/2.9**2, "$N_{\\rm ngb}=%3.1f$"%(N_H_quartic), color='c', fontsize=9)
+text(1.9*h, 2e-1/2.9**3, "$N_{\\rm ngb}=%3.1f$"%(N_H_quintic), color='g', fontsize=9)
+text(1.9*h, 2e-1/2.9**4, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC2), color='r', fontsize=9)
+text(1.9*h, 2e-1/2.9**5, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC4), color='m', fontsize=9)
+text(1.9*h, 2e-1/2.9**6, "$N_{\\rm ngb}=%3.0f$"%(N_H_WendlandC6), color='y', fontsize=9)
+
+xlim(0., 2.5*h)
+ylim(1e-5, 0.7)
+xlabel("$r$", labelpad=0)
+ylabel("$W(r,h)$", labelpad=0.5)
+
+savefig("kernels.pdf")
+
+
+
+
+################################
+# Now, let's work on derivatives
+################################
+
+# Get the derivative of the reduced kernel definitions for 3D kernels
+def d_cubic_spline(r):   return where(r > 1., 0., where(r < 0.5,
+                                                        9.*r**2 - 6.*r,
+                                                        -3.*r**2 + 6.*r - 3.) )
+
+def d_quartic_spline(r): return where(r > 1., 0., where(r < 0.2,
+                                                        24.*r**3 - 4.8*r,
+                                                        where(r < 0.6,
+                                                              -16.*r**3 + 24.*r**2  - (48./5.)*r + (8./25.),
+                                                              4.*r**3 - 12.*r**2 + 12.*r - 4.)))
+
+def d_quintic_spline(r): return where(r > 1., 0., where(r < 1./3.,
+                                                        -50.*r**4 + 40.*r**3 - (40./9.)*r,
+                                                        where(r < 2./3.,
+                                                              25.*r**4 - 60.*r**3 + 50.*r**2 - (140./9.)*r + (25./27.),
+                                                              -5.*r**4 + 20.*r**3 - 30.*r**2 + 20.*r - 5.)))
+
+def d_wendlandC2(r):     return where(r > 1., 0., 20.*r**4 - 60.*r**3 + 60.*r**2 - 20.*r)
+def d_wendlandC4(r):     return where(r > 1., 0., 93.3333*r**7 - 448.*r**6 + 840.*r**5 - 746.667*r**4 + 280.*r**3 - 18.6667*r)
+def d_wendlandC6(r):     return where(r > 1., 0., 352.*r**10 - 2310.*r**9 + 6336.*r**8 - 9240.*r**7 + 7392.*r**6 - 2772.*r**5 + 264.*r**3 - 22.*r)
+def d_Gaussian(r,h): return (-8.*sqrt(2.)/(PI**(3./2.) * h**5)) * r * exp(- 2.*r**2 / (h**2))
+
+# Get the second derivative of the reduced kernel definitions for 3D kernels
+def d2_cubic_spline(r):   return where(r > 1., 0., where(r < 0.5,
+                                                         18.*r - 6.,
+                                                         -6.*r + 6.) )
+
+def d2_quartic_spline(r): return where(r > 1., 0., where(r < 0.2,
+                                                         72.*r**2 - 4.8,
+                                                         where(r < 0.6,
+                                                               -48.*r**2 + 48.*r  - (48./5.),
+                                                               12.*r**2 - 24.*r + 12.)))
+
+def d2_quintic_spline(r): return where(r > 1., 0., where(r < 1./3.,
+                                                         -200.*r**3 + 120.*r**2 - (40./9.),
+                                                         where(r < 2./3.,
+                                                               100.*r**3 - 180.*r**2 + 100.*r - (140./9.),
+                                                               -20.*r**3 + 60.*r**2 - 60.*r + 20)))
+def d2_wendlandC2(r):     return where(r > 1., 0., 80.*r**3 - 180.*r**2 + 120.*r - 20.)
+def d2_wendlandC4(r):     return where(r > 1., 0., 653.3333*r**6 - 2688.*r**5 + 4200.*r**4 - 2986.667*r**3 + 840.*r**2 - 18.6667)
+def d2_wendlandC6(r):     return where(r > 1., 0., 3520.*r**9 - 20790.*r**8 + 50688.*r**7 - 64680.*r**6 + 44352.*r**5 - 13860.*r**4 + 792.*r**2 - 22)
+def d2_Gaussian(r,h): return (32*sqrt(2)/(PI**(3./2.)*h**7)) * r**2 * exp(-2.*r**2 / (h**2)) - 8.*sqrt(2.)/(PI**(3./2.) * h**5) * exp(- 2.*r**2 / (h**2))
+
+
+# Derivative of kernel definitions (3D)
+def dW_cubic_spline(r):   return C_cubic      * d_cubic_spline(r / H_cubic)     / H_cubic**4
+def dW_quartic_spline(r): return C_quartic    * d_quartic_spline(r / H_quartic) / H_quartic**4
+def dW_quintic_spline(r): return C_quintic    * d_quintic_spline(r / H_quintic) / H_quintic**4
+def dW_WendlandC2(r):     return C_WendlandC2 * d_wendlandC2(r / H_WendlandC2)  / H_WendlandC2**4
+def dW_WendlandC4(r):     return C_WendlandC4 * d_wendlandC4(r / H_WendlandC4)  / H_WendlandC4**4
+def dW_WendlandC6(r):     return C_WendlandC6 * d_wendlandC6(r / H_WendlandC6)  / H_WendlandC6**4
+
+# Second derivative of kernel definitions (3D)
+def d2W_cubic_spline(r):   return C_cubic      * d2_cubic_spline(r / H_cubic)     / H_cubic**5
+def d2W_quartic_spline(r): return C_quartic    * d2_quartic_spline(r / H_quartic) / H_quartic**5
+def d2W_quintic_spline(r): return C_quintic    * d2_quintic_spline(r / H_quintic) / H_quintic**5
+def d2W_WendlandC2(r):     return C_WendlandC2 * d2_wendlandC2(r / H_WendlandC2)  / H_WendlandC2**5
+def d2W_WendlandC4(r):     return C_WendlandC4 * d2_wendlandC4(r / H_WendlandC4)  / H_WendlandC4**5
+def d2W_WendlandC6(r):     return C_WendlandC6 * d2_wendlandC6(r / H_WendlandC6)  / H_WendlandC6**5
 
 
 figure()
+subplot(211)
+
+plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7)
+plot(xx, d_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
+plot(xx, dW_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$")
+plot(xx, dW_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$")
+plot(xx, dW_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$")
+plot(xx, dW_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$")
+plot(xx, dW_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$")
+plot(xx, dW_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$")
+
+maxY = d_Gaussian(h/2, h)
+
+# Show h
+plot([h, h], [2*maxY, 0.1], 'k:', linewidth=0.5)
+
+# Show <x>
+plot([dx, dx], [2*maxY, 0.1], 'k:', linewidth=0.5)
+
+
+xlim(0., 2.5*h)
+gca().xaxis.set_ticklabels([])
+ylim(1.2*maxY, -0.1*maxY)
+xlabel("$r$", labelpad=0)
+ylabel("$\\partial W(r,h)/\\partial r$", labelpad=0.5)
+legend(loc="lower right")
 
-text(h-0.1, cubic_Ws[0]/20., "h", ha="right",va="center")
-arrow(h, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='k', ec='k', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1)
 
 
-plot(r,cubic_Ws, 'b-' ,label="Cubic")
-plot(r, quartic_Ws, 'r-', label="Quartic")
-plot(r, wendland2_Ws, 'g-', label="Wendland C2")
+subplot(212)
 
-text(cubic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='b')
-arrow(cubic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='b', ec='b', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1)
+maxY = d2_Gaussian(h,h)
+plot([h, h], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5)
+text(h, -3.*maxY, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom')
 
-text(quartic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r')
-arrow(quartic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='r', ec='r', length_includes_head=True, head_length=quartic_Ws[0]/30., head_width=0.1)
+plot([dx, dx], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5)
+text(dx, -3.*maxY, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom')
 
-text(wendland2_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r')
-arrow(wendland2_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='g', ec='g', length_includes_head=True, head_length=wendland2_Ws[0]/30., head_width=0.1)
+plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7)
+plot(xx, d2_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
+plot(xx, d2W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$")
+plot(xx, d2W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$")
+plot(xx, d2W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$")
+plot(xx, d2W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$")
+plot(xx, d2W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$")
+plot(xx, d2W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$")
 
+xlim(0., 2.5*h)
+ylim(-3.2*maxY, 1.4*maxY)
+xlabel("$r$", labelpad=0)
+ylabel("$\\partial^2 W(r,h)/\\partial r^2$", labelpad=0.5)
 
-xlabel("r", labelpad=0)
-ylabel("W(r,h)", labelpad=0)
-legend(loc="upper right")
-savefig("kernel.pdf")
 
+savefig("kernel_derivatives.pdf")
diff --git a/theory/kernel/spline_3.nb b/theory/kernel/spline_3.nb
deleted file mode 100644
index d59c7f43846fd6217c2b98e193e410f6b6268cc5..0000000000000000000000000000000000000000
--- a/theory/kernel/spline_3.nb
+++ /dev/null
@@ -1,871 +0,0 @@
-(* Content-type: application/vnd.wolfram.mathematica *)
-
-(*** Wolfram Notebook File ***)
-(* http://www.wolfram.com/nb *)
-
-(* CreatedBy='Mathematica 8.0' *)
-
-(*CacheID: 234*)
-(* Internal cache information:
-NotebookFileLineBreakTest
-NotebookFileLineBreakTest
-NotebookDataPosition[       157,          7]
-NotebookDataLength[     36970,        862]
-NotebookOptionsPosition[     35595,        809]
-NotebookOutlinePosition[     35934,        824]
-CellTagsIndexPosition[     35891,        821]
-WindowFrame->Normal*)
-
-(* Beginning of Notebook Content *)
-Notebook[{
-
-Cell[CellGroupData[{
-Cell[BoxData[
- RowBox[{"\[IndentingNewLine]", 
-  RowBox[{
-   RowBox[{
-    RowBox[{"f", "[", "q_", "]"}], ":=", 
-    RowBox[{
-     RowBox[{"1", "/", "Pi"}], "*", 
-     RowBox[{"If", "[", 
-      RowBox[{
-       RowBox[{"q", ">", "2"}], ",", "0", ",", 
-       RowBox[{"If", "[", 
-        RowBox[{
-         RowBox[{"q", ">", "1"}], ",", 
-         RowBox[{
-          RowBox[{"1", "/", "4"}], "*", 
-          RowBox[{
-           RowBox[{"(", 
-            RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], ",", 
-         RowBox[{
-          RowBox[{
-           RowBox[{"1", "/", "4"}], "*", 
-           RowBox[{
-            RowBox[{"(", 
-             RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], "-", 
-          RowBox[{
-           RowBox[{"(", 
-            RowBox[{"1", "-", "q"}], ")"}], "^", "3"}]}]}], "]"}]}], 
-      "]"}]}]}], "\[IndentingNewLine]", 
-   RowBox[{
-    RowBox[{"W", "[", 
-     RowBox[{"r_", ",", "h_"}], "]"}], "=", 
-    RowBox[{
-     RowBox[{"1", "/", 
-      RowBox[{"h", "^", "3"}]}], " ", "*", 
-     RowBox[{"f", "[", 
-      RowBox[{"r", "/", "h"}], "]"}]}]}]}]}]], "Input",
- CellChangeTimes->{{3.560154174311659*^9, 3.5601543108245993`*^9}}],
-
-Cell[BoxData[
- FractionBox[
-  RowBox[{"If", "[", 
-   RowBox[{
-    RowBox[{
-     FractionBox["r", "h"], ">", "2"}], ",", "0", ",", 
-    RowBox[{"If", "[", 
-     RowBox[{
-      RowBox[{
-       FractionBox["r", "h"], ">", "1"}], ",", 
-      RowBox[{
-       FractionBox["1", "4"], " ", 
-       SuperscriptBox[
-        RowBox[{"(", 
-         RowBox[{"2", "-", 
-          FractionBox["r", "h"]}], ")"}], "3"]}], ",", 
-      RowBox[{
-       RowBox[{
-        FractionBox["1", "4"], " ", 
-        SuperscriptBox[
-         RowBox[{"(", 
-          RowBox[{"2", "-", 
-           FractionBox["r", "h"]}], ")"}], "3"]}], "-", 
-       SuperscriptBox[
-        RowBox[{"(", 
-         RowBox[{"1", "-", 
-          FractionBox["r", "h"]}], ")"}], "3"]}]}], "]"}]}], "]"}], 
-  RowBox[{
-   SuperscriptBox["h", "3"], " ", "\[Pi]"}]]], "Output",
- CellChangeTimes->{{3.560154211258333*^9, 3.560154216293594*^9}, {
-  3.560154312540955*^9, 3.560154319804675*^9}}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"Plot", "[", 
-  RowBox[{
-   RowBox[{"W", "[", 
-    RowBox[{"r", ",", "1"}], "]"}], ",", 
-   RowBox[{"{", 
-    RowBox[{"r", ",", "0", ",", "2.5"}], "}"}]}], "]"}]], "Input",
- CellChangeTimes->{{3.560154325145775*^9, 3.560154343883732*^9}, {
-  3.560154674704236*^9, 3.56015467532159*^9}}],
-
-Cell[BoxData[
- GraphicsBox[{{}, {}, 
-   {Hue[0.67, 0.6, 0.6], LineBox[CompressedData["
-1:eJxF1nkwVW/8B3BLqWyFFspW2UopWZLqvLWoLGVLi2RfE0pJlvqKQvaQpULZ
-okUIlSi7EMW9IS3Knsi+XPe653d/M7/53fPPmdc8M+d5f+Z5PzNnvY27sT0X
-BweHPScHx/++k6/b8CjbuRCuH+pdt1tSUU2R1j8prYN1M1e0trFM1r4LlJY+
-jXKzZW8UWRaMKc7j7bBB/sUdHbIsJ5dHSC2Wdsb24vQUCZa3jNpFko7u8EsX
-FBBh+YdgCpPR7oGRQhkZHpZlspapz9M9UWWf0DtjQcX5vZ6us1LeOKwsatDH
-8kvqr4ypA354F3PKroXleRf97+OO/+HqYl7Ztyw/5Px+e1b7Bl4t+nknjeWD
-TXdkZ9oDUMkX9DSY5cHEwxVTzjfx5+5Lj3Msh9stmE/Sb8FVUnRQl2UK3Slu
-TOo2Ctbo/1rE8pU6ye2j+aFY9t91u59nqVgbS20cORCO7y3Pk4pYttmsxf3X
-MQoy8vViZ1nmovt8mvKLhoK0qMNWlqm/TCWmtO/g8/gmB7o5FVef8b2ZaI+B
-OL/5zWiWdWP6eSYexuK4Xlf6SZbFr1YcH3eOg7HhX29xlssPeI2N0u/ieaKt
-fuoZKpZ865YfkUpCVwqRctuMio7yMs/hwSQIhFy8RbD8JCux6m/+PWgIvVEY
-P03FMY9jlkMHHqBqTC/LgOX4ZSXxA46pcLAaih4/SUWhl67HJdGHiKxRPB/H
-8oOhnWKTfg9BZggsU2PZ7bOw44T2I3x+fzXU/QQVwg8+cI23p4Hvt+n7+uNU
-0AWLcy7sScfBuEVvzVjuvZFuOPYwHVwPuq79MaGi2PF6yqhzBrbE/zrDwbKZ
-iqrmP3omkO8VLmJExaOG1It/pXJgaX3ozGd9KqYSizZ3hedgHZ9lsTLLhxwb
-e1ppOVAxWRIQrUfFMPesaQnlCUKORbkd0qVCY+8xzdvBz1DSrVEUe5iKzy8Y
-XPKjL5CwtPVd4T5WXiyzEFDKw0vfvvARLSp2y8VpBtvl4UXTk68bWY6cypn0
-acmDdQplIoSgQvXOF3ubp/kI7jm9R203Ff81bNFVtnyJrTdFJ1arUpGd/0ru
-6d2XYH7QXaWgQkVr4n5u2Y+s9a1GNeo7WP11PFUquqsQ1B6bT/rbqWjgvqnE
-IVIEKbppxpktVKze+034U20xmKfM1iyWoSL3xe1v55VKQOHrMIwQomJIeChc
-17oE2985ljusoEL+ii6hEFeC6+srZYnlrH7v4XvUQyuB/9lX1H5+KmLqwx3M
-at7ixdnadqmlrL72RI0fNi9DqXC4iDKTglS+xkAh7XJcHrU592WQAmKvr7m/
-RTmkg+dm3QYo+O6mqDbqVY5vvWdiePopEG0N6//4pBxbTc+Pbeuh4E6ivk7I
-igq8eNtm7/KDgkDZZkHyWwW2qQ603WqhwBEtScMeVVh1Kb3z6GsKONJmSr6F
-VUF2Ii6zoJiCxEXi3xsyqpDuRslaVUTBh3oHyZy2KpAHR4615VOgcJyRZq9Z
-DfJJxJH9TykYdJJ/9pOzBlI+et41yRSci7n27nN0LTRW8Uddu0GBW9+mnsLc
-euyuULjeeYCCXxZfl1bU14PvnDBP1j4KTDpClJp661HoG5DlDgo0Gge8+9Y2
-wCrKRXVBkwLu/MwVq4Mb0Hkh6QHnDlY+v/WEl2UjMn9vtMqToqBCRCxp14om
-pGSOVzyltYJ3/U35Lo3P4FpuY6Wb3YrIFY2bH1lRIGfo5PeQ2QKbb8ffc39s
-A6+L1pTTthZcrLlp4y7fCd3FtWGvDD4j/wK/iajOD3zvaFEe72rGI1+Xoe0n
-f2GUNjNr/OsjAlIZRlPDv3F171fnqdEGcFB+aZX69KBgmmuF88IHyF12uHp8
-cx84pd7sdPpai97phBN8Zf2YLspPMPpaDd2+c9diLg5iv78ZbWlhJY71RitK
-Sg5hTajZO7PMcgxV2fh/eP4XFrF1a5/Hl0FJqE3igOUI+N4r83dVvwEvpq2/
-T/5DX/vFigD3YhweMZvdYDqGGef27YXSL9FRacdptmUcG3hEZDnU81AUUm0h
-PT2OGErPw+NhT+Gw2PNXc/MEGuqUHtq0ZcHWw7eI/94k5lYVBkhMp2Gbo2Kz
-vvUUBtLyt9xXT8Un4S18nrumodSuoqTSm4i3uQH3t3DPQKnzmPOy1ljw6Juu
-TeCZgWAkL29JWSyM/sgncfLOYOVKu6irObEY2NgU37ZiBs90BlWlbsRCJGlN
-jL/kDJhadHth5Vi4BD4Loe6agW2Uj3p4bAzWnWrz9Lswg92yp/Y/s7kDX87N
-hh9/zOA8b/EGeaNI9B/k65D7PQOevS3azdsiYRgybHmjdwZ+r2h9EYKRkFn+
-wl397wxGh1y9T3yMwEdx1aiHczPYI1E1xqcTAUkNovmyyCyWaEppcRwOR6Wb
-sb6Eziz+5sSc/nQ8FLzffY64Fc4iiH/ZzuXFQVD3yhGIejWL2Uu7ch0fBMFa
-uKP1Rcks6Ku9r1MCgvD6iNrZ8fJZ+K0d5u8zCIJD0ejFy02s79nyrRoduoWK
-SLv7Pv2zaLS94ZoocwtX9x37FyQ6Bxvln2ICWYHoy1ofl+o3h6eUSdW+Ln8M
-3/xN4/afg5aCuPbKen9M2DyydAqcw7P3d0dPFviDQ2q9onLoHMrC1P+J3/LH
-2gTpysqEOXC6HIh4oOgPw2Cpsb6COWx4an2COncdpY4S+lv+zIGRL3GSftoP
-Vdo/8qOH52Cxv2b0jLIfGjYmr5kenQNX1jOnnqV+aO8S7ymbmUNLdG6ew2tf
-TJwU9zFYRMOBxOyoPWK+UDiyLttDigaZyErNVd3eiFMQW/TGlIYGodlPs1Fe
-MLeQe9xzigbuoshIDzcvyMSp6Aqa07B+oldO4KgXCsmj0bY2NIybbsp+xOsF
-aluAuKA7DaLB8QcPBV+ByK1hVdsQGs7lKDy6EOSJmN/v7QXe0nC/1Lyh6/4l
-mK1pWqrxjoY2SXtrjeuXsOFo51ObChpWzvOP5FpdQsHrqfFXdTT470zInJa9
-hNaoTddtqDQY3Vk7OZDvASEiNv7VCA1vOrlKjT9dRPQ9hzpr6Xmsjw3tb5O6
-AK8rAZf8Ns7j/Podj0x4LsDCOEUqQW4ecl9mstq+umMLb5vXxy3zaOTNbGy7
-7I56b22FnbvmEXTb3Pb0czdwnZa5zW88jw6Df03HFFzhuaZb91XgPEq/GCbv
-0D4H88mFmZageVz/4ZJeIHoOBz+JpQ/fnsfmzvc89sPOEA42oq+PnoezTYuH
-VZwzcmfKn4Y/mIf7tZby3H4nDH5J5bcpmofeKYZadJwjzOLONvMPzOO3V15M
-ykp7iL/kXecyNI9gJwbf2JAdulpeOdaPzOOx8e77VyvsYLdcmDNoijVfW4Kz
-krsd3EJrVUhOOkzGJusUmmwR4L8taVycjlVCvg80Ym2Qc57Trs2YjswNnUo1
-h6xwPiw3T/UEHbTnBvkFG62g9OTMQsxpOnbq9r9p5bBCwUBRvIEVHeFjc/Em
-XpYotTlX/8GVjrmFP72Wjhb4fIqi9DaEtV9RQcXcaXPMHcykpb6jw695aeyW
-4FPwe5saWF1BR6i2cDjD8BQ4dtwT+FNNh+zJsUjxdaewRCpq/Y5GOv6GLKvR
-yzuJ1bSrOtXtdNj9bLhi9fMEVJ7rJw2O0XFUyFJom44p3FZNaShvZCAo3ndT
-5UljTIT9qzSVY8CwRfOGtKIxPLn+6PtsYiAy+YN3HdMIfqM/rKq2McC3aZvV
-vywjhNXXhZjuYWBH9f6gpHlDZF+73+FtykBb/Ms8GwsD9PTtv1oZzICq5pBa
-dIIu9qc2p20OY+Cqos7fYB1dPDxl1hQTyYDAWt6fogwdWDRe3GB7l4Hz1ACp
-RbY66Mx7+JE7nYFsHp6I0Z1H0OrLlNZ+x0Dow8WDdxe0oawWqve8goEEQzKz
-87U2ov+turKqhoG8uJTtLy5r45j11sa+RgZK3qUYLP53EA2HzD2DvjIg0X1m
-m+nAAVQKldTXTTFwJ9NXj5zch/WN2lPb5hj474YOz/yrffC/2SKZSGdAL4RD
-wtdvH4jZwUtOXAtQYsxzCi7Zh5LvaySXrVjAqpp2GYcNWih47Omhq7iAF2q1
-h19170HaXuV1TdYLyE7PdL7coY7KY7yBdLsFKE/Qv0bfUke3Zc/QJqcFxPLm
-l15RUcfGgLslQW4LeD5uxXn/jhoyammntXwXsGsp7eHmk6rINKhMfBm3ALWG
-VVxRPDtQa3WfozthAe3rlY9+eq+M/ouXnVbcX8C6x4ZP672VIRcnp+H6iJU3
-tubD4Oh2ZHWEtsvlLoB8b3jApX4bsq1NVt+rWwCXf4mCfZciPnhsuf6hYQH6
-r+Vv73NSxGDg4v6ZpgWIiAwaXprYDIWsV0Um1AWM/y50+MS7GTlD60wFfi/g
-fUXFDxU9BTy51BvrP7+AxR7vw8ilsmi4WUbPXVhAZs7yRr0sGQzdjbf9wcHE
-psaFguaDMlB8fURFcwkT96/xNF24tRHPGM9aJ0WYKLzGbz62cgOe3/IUctzK
-BEcG95h5mySG95wNuLWdieK8g+/Fd0liy9TByXQVJtQLFN/MJEvgic3KL792
-MTHy/FrekKs4svEy0ewQEwYa2w+5yq3F4My9Zd46TMSmpHInJYlBPjfAJ0Gf
-idYtXycqBMWQJW5sTjVmgvy6Z4jBXIMM2pjUMUsm0jfVfMmfWYXevI7o8zZM
-HPW/WWLtvQobnco5w+yZqFHe0TG+sBJpbVE9dS5MdJcN7cvgX4nUl0qP911l
-ImTrJ8dh1t/7PdfzSmp3mBCeKAotmxZAp8zxVJM4JoYH9x55ryIAse+7V3gk
-MJFipRIn4MmPRF2+idxkJtwXhf304OTDXfknRQpPmCgNGHK7q7UU1J935A4/
-Z+JG97eW7YlLsDLeO8E+jwlThcCWnxM8iFmk451WzAT/vTW5pvmLEf17YI94
-FRN+XqaCvUbc+Jz46blmLROCrpMhOhVcWG74SvJ0PRNluVG5wSpciHwXxBH/
-iYmYIwOcF6U5EX5ftnr5dyZ2eErPnFZnEh+NBdSUupig9+8mxfYtEHy805n6
-3UwcKO31WWPEIEKvVgffHmSielquI9p/ngg2tdXjnmadr6tT5MC6WSLN4Lu5
-/RwTkRGOfzv3zhClOqZudXQmsgN2f3SznyYm9h6+E8ZFYvHNHXJKNZOEuZxi
-u8gKEprCQncX14wRXtIZg54iJKJ5XM7NCo0RMWsl5ttXkzjDdaXkb90/ok5w
-ucQDCRLSB9cZJm8dJnbMTtjIKJLo8rU9RXUZII6Ou1wOUiLxa/2viCvG/YTT
-395bg8okGOphyrz7+4jkrrbsZxokBH4J/Li4u4d4/dWgRHAPidP8xeP0g91E
-K+VD4wWQ0Pu6NbLnxG9i6YeSf6qHWHl5z4y5yXURGytVOBJ0SNjR+DJf+vwg
-iNJnQjR9EvzhKQ/Cfn4jLuWlqJaZkFBlPgluonQQPxMDfLStSVjUKBcGnKcQ
-tBh62GM7Ek3+5f26aq3EyojLycucSCRnFFWe4GshdG44lDe5kTi/QvH4xtIm
-ws63q2WbB4ne8f0ppwsbieuep3rueJIod372qK66nih01uU54Uei7+ecJl2x
-lvhkW7Xm9X8kjkJyUdKNamLo7J5NawNJpM+LjUiOVBJSxkr6P2+TiDwsdiuW
-u5zQ1H98ViuCROCVd82FZmXE8UPS7mnRJHJSOB5e+lJChGoKxzgkkMjYma/W
-t7uYyFQNS/9wj4SJW4bVEdlColxpUdHmFBJq7ift9yoXEJ0K12rDH5E4mczv
-9cAij5jeMNP+L4OEfSF/jO+p58QKCfc/htms/EKCEpPKT4jNawbnC56SeCKa
-Nm1w+DFhxdcp4VVAYo/K3U2+4mmEz2KTbV+LSKhn0tOPv0gl7pKNWrvfkCg2
-F63mCLtP5NEOGieXkggVDfU/U5dANEyW2ZLvSXyxNcuqsYwj+kbUPa2rWPvV
-xnA9NLhDcAy+CKqqJbE0ybs6XiaCWNutkCjbwOoHWSVjdCiEUPv+KCe4iUQr
-ZU35dnogsULJae7FZxKyysJHZj39ieH/lA53UEj4JkuN51f6EB9apu5ytJNY
-tvvx4gwXTyJj49tehU4SDmv3bS9rv0D4e95QMfpBQki9P+vvqDNhXnc4wPsX
-iUqNoPRwH1tCQ0yw5VEPifDZb+oPbMyJlS5UqYZ+Ekv8jB/vGzYhxkrvuU38
-YZ0PTVC77aku8VHQumztCKtvozL2U7VaRLaVPP+BMRIR2YpB+2xViJsFI2Yu
-kyTWxe7/6N28kbBaVJgTO8O6X7836N7MXEHsOeEz95bG6nM5c/eB2Mm9otla
-h3sZJLiHXIV3SVbtnffj1uPh4NDi+L9nMNPB4x4n223N9UlK3GwXSEcPnuBh
-+1y1eNBjfra/8qlXHBFju1Y1afD7WrYLzzKWXxRnO/pFpUWSFNtHTAwYf2TZ
-fn3PaWfYDrYTN91/3qTH9tTkoczAo2wbvJt4sMuAbR4T3fBMY7Y9/Wjnrp1m
-2/jTSYWtjmzze65MDw9g+7/yiES9N2wfXWea2SDD+f/e51Z3Rug226niNjIf
-p9mWqXocf+wMF3verZWbW9+x7TC5qKV+M/f/2yZ/Qvp2FNvnA0wYunfY9jQp
-bOePZTtk2jMyOp7t3F00ekIy27QKsi3rKdvRrfwR1XVs30t3dQqqZzv9cvOB
-I41sF6+Onm9sZvu7mYgT5Qvb8j1iB7p72N5e6COZ0ce25q1vNPsBtvXlk/P+
-DLHt4SItOT7Ott+eG7SCSbZvCXRTL0+znfgiPWxuju00/0WOJfNsPzWy3+/H
-YLtwQ60EwWT73aQcjSTZ/h8gYzo2
-     "]]}},
-  AspectRatio->NCache[GoldenRatio^(-1), 0.6180339887498948],
-  Axes->True,
-  AxesOrigin->{0, 0},
-  PlotRange->{{0, 2.5}, {0., 0.31830988618378947`}},
-  PlotRangeClipping->True,
-  PlotRangePadding->{
-    Scaled[0.02], 
-    Scaled[0.02]}]], "Output",
- CellChangeTimes->{3.5601543446066847`*^9, 3.5601546760449047`*^9}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"N", "[", 
-  RowBox[{"Expand", "[", 
-   RowBox[{
-    RowBox[{"(", 
-     RowBox[{
-      RowBox[{
-       RowBox[{"1", "/", "4"}], "*", 
-       RowBox[{
-        RowBox[{"(", 
-         RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], "-", 
-      RowBox[{
-       RowBox[{"(", 
-        RowBox[{"1", "-", "q"}], ")"}], "^", "3"}]}], ")"}], "/", "Pi"}], 
-   "]"}], "]"}]], "Input",
- CellChangeTimes->{{3.560154431542004*^9, 3.560154500452031*^9}}],
-
-Cell[BoxData[
- RowBox[{"0.3183098861837907`", "\[VeryThinSpace]", "-", 
-  RowBox[{"0.477464829275686`", " ", 
-   SuperscriptBox["q", "2"]}], "+", 
-  RowBox[{"0.238732414637843`", " ", 
-   SuperscriptBox["q", "3"]}]}]], "Output",
- CellChangeTimes->{{3.560154427870244*^9, 3.560154500989884*^9}}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"N", "[", 
-  RowBox[{"Expand", "[", 
-   RowBox[{
-    RowBox[{"(", 
-     RowBox[{
-      RowBox[{"1", "/", "4"}], "*", 
-      RowBox[{
-       RowBox[{"(", 
-        RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], ")"}], "/", "Pi"}], 
-   "]"}], "]"}]], "Input",
- CellChangeTimes->{{3.560154530785256*^9, 3.56015454752137*^9}}],
-
-Cell[BoxData[
- RowBox[{"0.6366197723675814`", "\[VeryThinSpace]", "-", 
-  RowBox[{"0.954929658551372`", " ", "q"}], "+", 
-  RowBox[{"0.477464829275686`", " ", 
-   SuperscriptBox["q", "2"]}], "-", 
-  RowBox[{"0.07957747154594767`", " ", 
-   SuperscriptBox["q", "3"]}]}]], "Output",
- CellChangeTimes->{{3.560154539254085*^9, 3.560154548437131*^9}}]
-}, Open  ]],
-
-Cell[BoxData[{
- RowBox[{
-  RowBox[{"DWr", "[", 
-   RowBox[{"r_", ",", "h_"}], "]"}], ":=", 
-  RowBox[{
-   RowBox[{
-    RowBox[{"Derivative", "[", 
-     RowBox[{"1", ",", "0"}], "]"}], "[", "W", "]"}], "[", 
-   RowBox[{"r", ",", "h"}], "]"}]}], "\[IndentingNewLine]", 
- RowBox[{
-  RowBox[{"DWh", "[", 
-   RowBox[{"r_", ",", "h_"}], "]"}], ":=", 
-  RowBox[{
-   RowBox[{
-    RowBox[{"Derivative", "[", 
-     RowBox[{"0", ",", "1"}], "]"}], "[", "W", "]"}], "[", 
-   RowBox[{"r", ",", "h"}], "]"}]}]}], "Input",
- CellChangeTimes->{{3.5601545811631327`*^9, 3.5601545907204247`*^9}, {
-  3.5601546570572557`*^9, 3.56015471264272*^9}, {3.5601550735178423`*^9, 
-  3.560155113042481*^9}, {3.560155146451144*^9, 3.560155154786213*^9}, {
-  3.5601552200011473`*^9, 3.56015522178111*^9}}],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"Plot", "[", 
-  RowBox[{
-   RowBox[{"DWr", "[", 
-    RowBox[{"r", ",", "1"}], "]"}], ",", 
-   RowBox[{"{", 
-    RowBox[{"r", ",", "0", ",", "2.5"}], "}"}]}], "]"}]], "Input",
- CellChangeTimes->{{3.5601551158258877`*^9, 3.560155135295669*^9}}],
-
-Cell[BoxData[
- GraphicsBox[{{}, {}, 
-   {Hue[0.67, 0.6, 0.6], LineBox[CompressedData["
-1:eJxF12k4VV/UAHBD+hdCpaRkLOlWQhMaFpEpSaZSSIYylFKUKSFkyFgiIRJp
-FFFR3WXKTNx7K5m5gykqlPne97zP8z7vPV/O8/twzll7n73WXlvO4aKZMx8P
-D08oLw/P/97TAx0Wqzq5H7j3xPWAopIbuYoqa3xc1hAWv5m6y5rVR041+aas
-rDUY7NFaQZo6jullMTICsq5wTFA3jPPrHJ7f731hSsYXtvOHPWntu4bUOZe7
-v2UiwfaxQlTk51v4X0f/plGZ+/AOLkrzxyRjVv1DzxGZp3A9K0Z5bt0TfJUf
-2XFeuRSuVhjOu616h+eg9f7Py5VwpemYZpFXGXowN9OLXtVBlLhJZPLXKhSU
-C93Uo94C0y3Tx9Ml6jBWrIGUZU+F2QnV26WpjejQYYH8jd+AfvXY7JHBLyj4
-oO+C549vkBV4wGfv5BcsPOkh1cP6Bjkdg55iPC3I337Lt5T3O7yy/Vs+JNSC
-j9tKd3ju+Q57bk1MuCu0IOurbF73o++w6VLow1fHWtCt5Wd8iW8bpNyfFt/2
-vAU9P4c6XNzUDreNSL4iZq0ozdBb/G9HOwifW9z00qoVG/iWPgvQagfTEyPp
-hqdacaNWzJ9I63ZYVfLl7VWnVvxRmhT0OLodkq/wfc692orar3Mzfvxqh8RG
-UXO71FZc/qC2Xed9B5iK1O/a0tOK5JKowPqqDqitKCOdpLeie5ux/LHWDuA3
-LksLG2jFz6taXe2GO6Co88dU669W9I3/Me0j1Qmrpq5GafFQsC9sePWroE6o
-FbWW85alYMElYfM1hl3EfH5ctc+Ogo2pK31fWnVBwGl1+8MOFGRVrX140KkL
-PMWfR1qdpeBayc0j5290Qekfb197DwqGlh+6WV7cBc99sxoPBlLQasWNN27y
-3bANj2prp1Nw9s2flZ/muqFYdqnTxjYKindPa5gt7YHM664xrzsoqLyEx35g
-dQ9weMqT9vRQ0NFG5MVytR6Q9LfU0WAR8SzaonvWpQcK94mvEJqkYIalk5fo
-1x4I/Tt4fVCEiln+7sMqx3vh6f5fsVHaVFQpWzp9y7YXSo3vqtvpUrFsUZ5A
-j2MvJLqwDZT1qdgbw5CNudQLYdVyGZXGVJTOtD0+FNkL0etyJCuPUzG16mhV
-1sdeECRlWK66QMXEZTszVsj3Ab3Num9xEhXlzSjPXZX6QC8o0/FcMhUL7l0q
-KVPug/M7XxhX3adii8xLmsfePmBXrDC/kkFFETVFoQaLPjjgK6Wa+4SKUVZr
-fG7e6oM9RvJqp0uoGPJw/tjkzz7gD5zSl26noqnKe88tE33gVrl5ireTiK/8
-SoLDTB9kP12k2t9FxdL+4ZYWgX4IO1Gkm9JHxfGNP46+lO4HCa25kfZBKjq8
-KD5y1rQfHHZv0iz9R8zHfs8L6Vb98C2CfufsNBXZTVtjaDb9IB1g4CM6S4z3
-V3bjQdd+kDok3Ge1QEWKWuJhmZB+YNr7q33kp6F2iYdh25t+eIRTtF5RGooY
-kVxFS/tBQebAD8vlNOxsZ0bolfWDr8w2l5oVNPSZt6ktauyHO3wCipmraPga
-DusnMPvhrlmanNI6GspWbzpkJEGHMknXNYkbaThmRXcKXk8Hs3CNmi+KNPw4
-kBH6XoEOxXcKeQSVaHhi6eoqRRU6jMRnrPQi0TDeeJEOvyEdNq6yK5HeTkMe
-aq/WRz86lOv6L2reQ8PQIXKWXzAdwofWhAyr03ApTzqfRgQdgtu9Rvk0abhi
-m3VVcRIdlmWdi9m0j4Ybb7Xq57+mQ1qZ+jho0fBZen7ehXd04K2p7dqrTUPl
-opilW8l0eCAp8W7HQRqq9xk25DXQwS17TE1Cl4ZGeytMHrHo4GcYefSlPg2b
-j2Xm24/S4UaB0NMIAxqauwSKyUzSocTZ+PdpQxraJGlSHvAyoKFwiQHfYRp6
-/Cq0TJJigG3PeanVJjQcF0h4a67AAIGnffQWwtekLkqsIDGgiyx+/9ZRGgYZ
-bvkRu4cBql+KGwdNaXgnO9smwowBh0LoJ/zMabimNPiTnjUDYl8axYpb0DCt
-5bS0gD0DhDZeKnhGOHdhXW/wBQZkTP8tq7ekIUl8VkvLiwHFFlYvLaxomE9q
-y2L7MYAz73WrnfD743cd/SMYsFttdFH7cRru87hcpRHHAGnUyzM/QcPyUNON
-00kM2Jtip1FHuL5AeMArmwGi2+s25FnT0KR2WH/HMwbU/Fx3Y/lJGlK7a/P+
-vGZA4b+tNVcJdwqFuXmQGbD5V6TsrlM0tJd3bNj6mQEFXV9VYwkz1bW3jjQw
-4NKaXhU64THnhdFzPxgQdnn7dJANDa8EdJgo9jKgM8GrrI7wdGJJPoPFAPeF
-a1dFbGnIW+bteWaSAQolJc+jCYd/M6fIzDHg8NJpUhVhwVHVHd28THiaM5k8
-TTiOX+xu2hImaMTmjyvZ0VB87djkSVEm8FQq7bUknKLSaCm5mgmzOvae1wmv
-13/29rsUE0jLrZKyCD+yjZC4p8CEKJLgk3LCil5nfSxITJCND8jtIrxNs8f8
-oQoTepJOrJ0kvJPnxPbh3UzA6Cq+/07TcG91i+Cu/Uwo8yw7vZrwwduGrBs6
-TGDoG6vKEzY0qyivNyS+J+JyjUTYdM3e9FWmTIivWqm2nfDx7jc+9lZMkHA/
-ekaFsN3jrRbPbZhQwbtOQJmws1vO9n8OTIiLuLpeifB5FWkhbVcm+HHOPJMm
-fOXfPVb0RSZEnO16vpyw30fRim/eTHj/aVCOl3BwSES6XAAThARChUeJ+CMM
-eH3PhzAhcF+++1fCcSJ+Fu8imCDm5AalhO/RxrfzxTGhzr/41gPC6anuQkeS
-mJB1M17Xl/BjewYr+QETUv3nvM0JP1e0rejPYkKx4791JMKFP7+mb8tjwvje
-4N0LxP8pKTTx9XnFBPNFaZ8bCZf51FhUFjGh9aNeXQrh5kUlQtblTNjNPqys
-SPhrvepAdg0TpG7lxg0Q66Mz/lnFWBMT5HgSnHMID0ul+Ya2MyGxYtXF1YT/
-9ItbfullAkfo9aM6Yr1N58WorB1gQuShdiNfwgK7bgzkTzBhaWT3dBOxfoXn
-pipmZ4j1kPiefZnwivJLGYd4WLAsRt5XnLDsEQfLDmEWJB/LbTAm8mGf86HK
-xYos0JK6MJBI5JPOFnLGsa0s2P+kOmEdYaM/u/3S1FhgJ//6dSaRfyeuK6mq
-AQt4h9SMM4l89UoSemh7ggXd76ZmvIh89z8V6pdnxwJWZYtBnxkNQ+QWLCec
-WLCqTEPYiHD8yzHhSE8WUCI+/BY7RsMXn1v9iqJY0HcqUMWLqC+Mv8lWQp9Y
-YLiENzqeqFePtHdc6apkQWhNx5dCop6djmmOy69nwW4IHG7Ro2H7BoE68zYW
-iJxYm8VziIat5pf3pk2wwGd7/kplol6SC4xlt5EGgOR9WKScqL8BCwP7OCoD
-IPAxKz+TqM8ahjetW/cMgM0HVAzYTcOi3pJE70MD4EU3u6e4k6i/opsEyPYD
-cObHVz9Dor4nX+AdNkkegPU2JtY1G2hoUZK2WC5jANJIssedFIj5FlBXmHg8
-AIkFJaQFORrGpnnYJBcOwNigjrm8DLEfNHQ09zQNwOmFoqeakjS8tPntm0uL
-BkHx09Lz2cLEfDPdrid6DoJuynU9y99U1Luu8ELXZxDeqz6Y9B6j4sFVne3/
-AgfB5YHCw8SfVNQ8dET91O1B8O/xGkJivyXlbJ9QeDII10iS5sO9VBRynnQp
-7hyEdw0ufoEtVGxkXLdo0x8C170bJB+/oGJtwO6bUSZD4Nz5wEL3GRWrxH8V
-7LMcgrQjgbd7if7go669aJbDEJRl3vwjnE3FF48P1rteH4KOpfHxu4j+Isbp
-P625wiGwGciXrw6logkjfou09DBEfRFKZpwgvreFQbPZQFgoYP1uKyoevrIn
-8AFpGI75blUMNaeiAV93y5o9w6AxfIS8yoQYn+yWqytNh8FV7s5SUR0q7rL5
-XLYkZBiOMhWFT26lohRtxnKCOQxKlVfKj81TMHXdEY7qz2HYoj4ct2OGgpKO
-mXmXxofhX3dm7/J/FFw9rjc3yh6GvvLmJVW/KCgmdjdzUGIEpLu2erH7Kchv
-rDzSZTgCkq7jbxtqKThc6RBU+3IEZL8/+lmTQEGZhDa32KIR0FwW0P0zhoIW
-diaWFh9GQDF5smhZFAXJ0xqk3toRcM+tFNUJoWDituW0KfoIlBT+CAi4TMG9
-ybhp09qfYBPC3Ekyo+BtN6kvYeE/oWnBX8pThOg3l39br3N6FHScH9e98GtF
-VgwrJMl5FJw61hpJE/13uuDUwID7KDBVMnbf9mxF4UVrCm/7jIKAX4O5rUsr
-Dv+z1vuWMAr1oWUZNMtWzOns8nCtGgW7APFrEiqtKJXHxLjNY5Az4ag61teC
-gvD3TOfEGFTzZTX2aLag/ujJKXnL3+C8X3DHlHETtlU48Z7c+gdoAvpp9/bX
-Y3FElZ3s3z+wdeOM+bvIGjwr4N3b3DwOt86mW4hUV6HjZf9i4dQJqCvbEqRp
-XYHbz21pNj4zCXVROtfm15fhlxVbhbw1/gLVzfuAwfsP+OFVyIOt/P9gp6bW
-gXMt79Cfl2Ta2PUPMrfBYvfhNyjY6WfgUTQFbbOsNrsLr5GZK3f3YcA0ZJjk
-lhVOPMO7SpKLSixnoG6l6D2+3TkYn3q25ozsLNwczU/+6JOJJ+/aNgsPzMIy
-Ez4X2ZYUlHojuM59eBY+Vz87Y5+fgj2t787Vjc7CvNSdxW9jU9BJdAVv+OQs
-CDsomD06koIeUdU7OLxzEG9y8HdgYzKGBG2//0dqDhQLfA0Mmu/h0/O8Tt/M
-5iAlrdllXf9dnNbNmXlInoOzwstdK40SMODDw5tV5XOgNnFB46BqAvKopS4b
-qpoDqnY1iyqRgP/JxMmpNcxB2mrXtu3MeFw942NY9X0O5DOuSdQHxeOOl8b3
-B3/Pga7RpaVi1+LQY9WkuqrCPORsjN1jHngbx6PHKiwV58F3UE5iielt9OYb
-MvbbPA9L/F+87pe7jQG/uuwrt8/DStHc5UurozG6ribCch/hCVL0WrFozLv+
-oM3Xch7mnvnbmD6PRDrzoE/FrXnY3K7u8x87HA8+bH5Eip4Hw1bR9H8t4Zh5
-4mRTYuw8qP4hS6x4HI52DZ7yjknzIP48R6TZKBzbX2c28mfPQ8p/SXwnU8OQ
-4s+WPUSeh2THOb4K7VCsWF5aVzM5D25PWM6UgmCUazg0uX16HtqKKw4ZRwVj
-UGirdMrcPPCMXvUfdwjGA1ODV1z4FiD467nBSvFgLO2UkF4qtgAQ517r5xeE
-hU+8LxttWQCbw3+1PdMC8dF+1XVNZxag/qOHy9A2P6wwEbw557QADSX7FS6N
-+WL/afrwZpcFkC54oqyb74sKIUml4R4L8CDWeA9N1RcfV89Ya/kvgJ1Au+hd
-TR/MOVqR8ubuAtT08XcnnryKeWfMV6fWLECL5Z2x27TLWHt5a2Bt/QK8SSv/
-+CnjMg7eFGD9a1oAVldUx17Xy6iU+67YnLYA6kfHBGU5nvh0eJ3lsr4FCBqd
-VolQ9sRnVxh3gmYXoFdVc2f+nov4Msx7+bltbFhldbCcae+KP/fZhoSpsIn+
-UELSUtAVt07qTmTvYEO7quNy6yIXfOYg/rVXgw3NTjZ0aUEXzIM3KSf12FD0
-5bvwo49n8fHMbxmT02ywKR33Dt/vhIzXbfHnHdiQ4rv5K/x2RAWXMt5oZza8
-MflGi8l2xEff4ug17myQ0kwsHhByxIdvlJ9o+7DhwOo784n0M5h64bzyrgQ2
-BFV7asfZnib2W4uH5nfZELuQJXLvqx1Kdu4Vu5zMBlN7slS1iR2mGAmNv0pn
-g27eh7ksXVtM2vSsWOkZG6oO1lYZHDiF8X0D+6Qq2WCYtfmmmetxbEn58lKz
-mk2cp6ROj05YoajpO2nrOjZovpjzL7hhhbHkcJ57X9hwzDnQpzjVEm8/2Fgl
-2smG4b6kgBfd5njL0vEw/182TCxaVmCVYYqPjnbaOE+z4bDO84c7d5jiR0NL
-j5o5NoRfTU3uTTyK4/v1E6L5OCCQr17CO3kEbRS3fF8pxoGq0OayZd+N8Jrs
-40HvlRzIu693NWe1ESauXT/7fTUH7shV506fMMQaEdH1aes58Caqw6eVqY9q
-U+MOG7ZwQLD8dJ+x7CE88sfdK1yZAwoeKy+WeeuiywgjbFCVA7qxzdd+Nelg
-es+3vBfqHKjfdGr2RvhBXFJbOrZTjwPj5KZKtzVa2J0S4nfoDIc476i0NAfu
-wZnEuegnTsT734cbbwzejeIxXulLXTiwwXqEPB6xCw2Dz5Y1eXCg5khf2Hje
-DixyNVpsFcABB86NtDCSCn5xrJR4f4OI9+v9xE0Fyjhsu2/z2pscKPc51qa6
-ZhvKmCkbd0dyIENhrZLYfySM0lyReDaZA1KCy39MnNuAOTujs2tTOcDQvn/n
-xjoFLFNeVEzK4EDDIdCc+SGHf+X/fR97zIHrTnXJ539Jo9j6i0OmeRwY3Fl7
-YeKlFJIkBmcLn3NgWwaflFXIWrQXal9/rZADwuok3vzzq9FPwHz7j2IOdLlK
-qOeEiGMSp0FrbwkHzHZo6V97tQLrJz45cpADH6QjO23jRJA5utv7TCUHWAe1
-hDe7CSHPYH54ZTUHgpM9fj6/tAR3dWY9vdXEgVqKCrmNhx/FlF2m81uI55HH
-UuUND/68oazfRuWA3tsyJQ2JBXJt62QSz3cOBB5+OK0pPkN+rPCBodTOARlI
-lzxr+5cc5B2841gXB0gJpdGl43/INjX6Ib69HNh3cyz9Ut4oWV1SpDWLzoHJ
-zNxqMv8gWdydJlPPIuLxT084PdVP/v0x1WN8iAPkjbkLOYu7yI0iZz6tHeWA
-dHOslNrRr+Q8+03COr85YNA7xKmObyKHFo6edJ/gQKpEwVEvRgXZflHR0zv/
-OHAhlGXaplZM3mflN/1hhgOeQeVKpeRH5DV5WvqMeQ68EL9l8u5KCHk2gP/w
-Yh4eLZ7/uwZzzl5O5eX6W3PdfWV+rgtl4wetFnPtViUV/kSY6x9Cu8sNJLmu
-3nl/sHMt10W286KeUlzH51fY3Zfh2sD86PzQRq7fp7rsiVbjOmXzg5dNh7me
-nNDLuXmE66Pk8TSNo1wvNje6nWPGtXfAjNt1a67NvhxX2naOa2Fv8ezbIVzf
-KItJOVzC9ZF1ljn1G3j/39oeNaeWR3L9UMphQ+NfrjdUPrlncoqPO95tFSQK
-meuzE4ta60j8/2+HgnHZyDiuz4eYzxslcO1tXvRd+A7XEX+9Y+Pvcf1KY2Yu
-OZ3rmXLOt9znXMdThGOqarhOzb7gEl7HdbZXs45BA9dvV8fPNjRz3XlypQv1
-K9eb6JI6/XSuVYr8pB8zudYM65hxHuDaeFP666Fhri+7y0r/+cN1wL7gmcIJ
-rsOW9dO8/nKdkp8dPT3N9aOgRedKZ7l+fsz5YMA810Xy1esPsLkmTyjOcDhc
-/w8yc+em
-     "]]}},
-  AspectRatio->NCache[GoldenRatio^(-1), 0.6180339887498948],
-  Axes->True,
-  AxesOrigin->{0, 0},
-  PlotRange->{{0, 2.5}, {-0.3183098745627588, 0.}},
-  PlotRangeClipping->True,
-  PlotRangePadding->{
-    Scaled[0.02], 
-    Scaled[0.02]}]], "Output",
- CellChangeTimes->{{3.560155132339073*^9, 3.560155136020277*^9}}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"Plot", "[", 
-  RowBox[{
-   RowBox[{"DWh", "[", 
-    RowBox[{"r", ",", "1"}], "]"}], ",", 
-   RowBox[{"{", 
-    RowBox[{"r", ",", "0", ",", "2.5"}], "}"}], ",", 
-   RowBox[{"PlotRange", "\[Rule]", "Full"}]}], "]"}]], "Input",
- CellChangeTimes->{{3.560154655600813*^9, 3.560154655644041*^9}, {
-  3.560155161815674*^9, 3.5601551791128883`*^9}, {3.560158982837762*^9, 
-  3.560159022468958*^9}}],
-
-Cell[BoxData[
- GraphicsBox[{{}, {}, 
-   {Hue[0.67, 0.6, 0.6], LineBox[CompressedData["
-1:eJxF13k0FX/4B3BLqaRSStJCpUilTbK/lRZJpdAiUSJrtihfKSLJGiJLQiFb
-4V5UlowsIWtIksi+3mu71879ze+c3/nd+WfO65zPPDOf5/M8n5nZamh90ZiL
-g4Mji5OD43/Prx8Z8hwwslD2e58aV+ZPJ4rrRTUui55GqZlVRglp1rd8d1HR
-q3g5rNqeR3pl0Md03iZD9Kap7k8l/brAT2SxqBm8N7m9DSe9Z9jIn2VijTMJ
-AvIupP+ujFqY+2UHoUK+aX3SYu+WyczMOiC/9XG7LGlLJYc7kyL/YZ21J5OP
-dEbDvziGqjMYXQeO/PGjEzMWGi2jJi4ANSQxlnQMZ4vX5InHKFIrPXOb9PGq
-wB0Tv9xwRfjPtu2k+8JOfWWYPUFnWPe+Zl864Ws0rzc+64H9MrP2PqTrZ02D
-R0S88Cg4PK3Fh07cK92yf5jiDVm5M0nOpIVfNFTQVH2xrvhqpyBpQ0kV7kGT
-5/ig1SYq700nuGadahjOAViv6CJR5EUnGv7pbGacCIRAXZXjSdKO75dnj/0K
-Amd12azCMzqhHtTDMxbzAinrow6metKJTY5ftUfNgnGq8W6OMOkC1fsjw7Mh
-iP/PrqTDg04s+dMhThMJR07lFNc+dzrRVPDFYagvHE8PrJy760Ynkt+FFQ1S
-IjAXXnop4zGdOGd3zmBANRKWSo5yW13pxMtlOS97TaLRcXQFPduZTmTeV7e7
-KxSDgSMuUnUP6ETkwJEN484xuFMg297lRCesateYjJ14A3nNhvhpRzqxJrKM
-a/TXW7xs7e/+Z08nZld+TLJRjAVfS8NY5V060fU4VnMkJha5M8luGXZ04qPJ
-o6hhsziI3zI/ZGNDJ3QPScvTZ+MhoG0nkGhBJ958j7YdFEmCs5Z3tMBNOsEI
-y5Js803C3OTSNd4GdOKkSUVn3XQSzErsZGeu04kh7kmdnPpkXPB5k1KqSydk
-lc7Je3m+x66X0Q7rtelEbdocl/hwGnqU8vp9T5DPi2X6K6TSYbVppClalU4o
-7AyW9zQi3e3w7v1ROuHPSBp3+pEO6V7+3gwlOiEd+NPYMIUCB+fPav8dphMu
-3/eoHzDIQIa7ZVv7djqRSPm0MyUkAzJZX0dfbKUTdWHHuHdUZiAzqIZPRYSs
-X5MreUJymdjOlWbyVJhOfOd+IsUhkAUTj/SZCn46Iaj0Z03Nt48wz9DrVZyj
-EalpXn8spXJgf+2caVk1jRhYM+CrfjMHds5GaowKGiF+T11ZIjgHX5WoyhvL
-aUSM4vI3ndM52FEeck+3iEYElfve1i3JRbRw9/moTzTiXufz0VN6X2Bo1O2j
-FE0jopdXuK8+UYAuOzvVfnMaoaz0QM9VvwDnGT5CziY0osVq9+Hh+wWw0ulR
-4zWiEUJ1Pj2VyQUoa10oErpOIwLDNE4/4/+KMzvEbyycoxHuO6pXsv58hZ9S
-iAD9AI0wwY/wIbsitDnRXK0YQwTH24mcPz5FOLuRj9NjZIgIW7Sp5XtcEU41
-92uEDg0RZeW3tyQ1FkHk1iLNtK4hQkJ77q2xfDFyLd4ofmgYIvpMxd+3cpYg
-lfr7XkfGEGEe9DC/NuAbHPsOJ6tYDxFW3bs6M1PL0e90qEupeZD4p/976dfy
-crhvyPw51DBIaDU9k6rqKkdvjYBVaM0gIVvR+1+38Hfcj//xuq14kOCmxPML
-en5H3V/6UcW0QSLMeavyfYMKPKint0u4DxJfBTaEy/FX4a0WK7ldfJDg3fpE
-vE22FpxpMhRD4wHCn79C8s2NeqyukGhOrO8jDP9oE9yVjfhyWWJH9nAPYVvy
-xNBavBkWUjvPBdG6CIoNn5bQ6b8IWvGSbwujg3jzwGJg/+V/UJY/9C36yz/C
-LXruAmOoHdPdhy6vPP2X4Kj/p5Ln1Al78TUvrPf+Jnba33bUluxGV3R+SF9l
-A9HFDL20/EsP3jZaK6u21BLq3eYPg2z7MHshT1jgXTlxritg95YtA5B//O71
-gWuFxECRoWvZh0HoLalMePvkMyES2GTunzmIgNxHo1u0PhPa+ud0tHMHwWdm
-UJO99TORPyUn+a9sEHlTm1+dIT4RQXtXN0x2DsJx4GNO++xHQiGUEBcXHkIt
-f8C1Aacswtd8U43H0yHoyUTvZQVQCanVjZtVDWhQC9Uv9FdNJnr8etxCjGlw
-5awVezmRRLzmnezttaCBo4Xe8ScpieBbJET1daRhf+fg9LbVScTAxNWTjYE0
-jBzvUovtSCDiW/5amRXToJjQKOwdGE9sSuwmnu+i45ndolWtIm+IBrEJsY59
-dDDCy2LHmmMInzc83tIydGgrjf04/TKGmIkQ1/59jPT0HreCFTFEk59Z3zY9
-Olxj3GrjWa+JF3dp/B/96QgL/cO82B5O8IJ5s2WcDtnlBxwE+YIIx6MSSz/O
-0KFSk8Wj6xVI9KheS33OOYwbCR1TqksCiUK1rzPHVg0jMfrZFb7FAYTTRb8X
-SZLDKOCLGxrh9yUGjXeU3Ls5jJEPTpYC+k+ISl8didU1w1AZ1W0wGjQhTtF0
-J7fpjMC1ulN280N3RK11r3e5MoL9PkzLB3JPwFBISWu5NgKbXB/7g8wniPGe
-NXlpOIICf7GmFPOnmBGPbFpmM4L0qK09P8564b3h3+wR7xGI6gp+E2b6g7/J
-wDm/YAT/ZiWs/LuCcZvleWVj8QjCwlyfuMiGIG9nurRj6QiEvgtaivqGwMyB
-g3agmhx/j77R7uBLFK55cz3+zwhufPs1dflBKOzPdij7TpDxs6cenFgIR1Oh
-EafunlHwf09Qv10ehZGjzcIP941C6FrXg+cro7Hk63npmIOjUKMb/HbQioYM
-IW/SIzuKMPOntk4t0QjO5a+yOzGKIZumAqpDDM5n5oX56I/imbuyksWjN/gW
-v3b/l4BRjCxbUDe4H4tWMe/T/16Mgs9PQaAlJBbMWJYhd+go7Ctv1stmxkLs
-7UDI6dejKE54YxI0Egu3qILZn0nk+K5H60pN4qAcallKLxxF4ur9h1Q045H1
-rFhflDkKxiG75vxlCUhUrxDynBqFYlXmNZ+dCYjgq6ujzY7Ckfqh8ZBqAlwD
-2k7mco1hz2aRMSHnBGiEzUhd4h/D/lCv04eHEtCRsJ/ls3sMIyrfqgtLErGy
-NDJ68uYYIt3Ff77QTwanV+xVfeMxVGZsaZ65lwyGerJAiekY7Pmpr/A8Gb+r
-P3kGWY9BaDmvnyGRjLjGOuu9D8n4kkszVm1JgXzPUpVbYWOoPfj0hsDPFNxe
-7PCvunoMjh51ndskP0BYpPzX7A8yfkP/eR6lD6iW3Vwj8XMMZbQZ46rzHyBj
-WfLF7c8YRFcOfOFx+ACe+nWvZPrG0Me5QV88/wPiYz7qRHGOg8/9jankmVR0
-KUx9vyM9Dh6dVzRPzTSE6WgURhwZh+lxmR8TBmnQsI7JLpUfh+Oz0ip16zRk
-vlVL3Hp0HGtj3uOtXxqeLAv3+Hl2HJE8Uaf4y9Mg9ktORclkHIqEm+N/m9Nx
-y+5BFl/EOOS/BjkGeqRD5mBr1q7X4+BeZLHT0ycdvGMqH0/GjEOqpO+lTWA6
-0m15Prm+G8dqA9ufW16nY84m8DODSsaTZRn0Z6TjpXVCbkvFODw/nzdWaEuH
-2T7evOnqcbjINJdYd6VDcdgyT7BuHGFxzgjrT0en1cEvmk3jiKbFvygZT8d+
-qy/5xV3jYP1XdyloCQXllvVf38+Pwz3zJ9f0Hgoi98gUlnMwEOUl99D2AAU2
-Q2GFPdwMRBaIm7QepkDQUr9IlJeBmBFEhyhTYGjRXxwsyMBkdMIRp/MUzJmx
-Sp33MaBk/7JI1JqCml2GZeEHGbiTN5PfYUfB2/7iso+HGVBouuYafo+C02be
-5SMKDIiLU8y7H1Lw0nRdBfllAEWzEftJHwr2meyu1rjJQOVHbuFl7yhoNROx
-/mDEQFtKrKBeEgW+lgL8K00Z4DPcqPr2PQX9trMXaqwYODa069TSDApinSt+
-XnBmoNh2IE+BoOCiC3GP6sKAuZn1tEQhBRxuGesF3Bk49Gs2clkJBdc9X11t
-8GKgZrhQOv47BeuDLP5eCmWgbuUB05M/KfgWbPDoUwQD9ylCUTW/KLAP1RIR
-imJA2+WInUYzBT8iFW7+jmPg6H0q/9Y2CnwSlndfozLg96O4V6aPAvlklkde
-FgPJpb611wco6Hs/vnNzNgO8Ubn+jkMUnKD+MW0lGNj9rfud1wgFrLyUoRtV
-DATOBy3ZOEVBKhHt97WWgbDhz3rt0xToFb6Q2tbAAOOeQ/6rWQqySx/YdDYz
-cFBUvWhogQLT79arj7cycGWflL07BxWCVbeoce0M6KWqnF3JRcXdujMM4z4G
-HjU+e8GxmIptPxHybZCBLkvvlaY8VNT+OiQjPszA+7MXq4qWULH370bHPiYD
-w2WVB67wUtHbOzhrzs3EWPSLy8yVVLwcaIus4GFilzudwlpFxXFavdIeXiZ+
-GMNzgZ+KN2O5LjR+JjgWXcv/vYYKTWaa6Lm1TFS8H937UYCKhcnYr6nrmXi+
-T1Dacy0V1+Z9uG22MOH5OkZkiSAVvByucbVbmVhyr27NR9KfuexPHNjBBCe3
-5Oer66nkfmHaEyjBhKiCn+AY6XVL9TzHdjOxeF2HtIsQFcW8mhJa+5g457lS
-mHMDFXYrjpdnHGQi6tlU5X3Sovyy5mtlmIhY9VS7k3TNmj3LHeSYaF/z+pOq
-MBXKd+70v1Jkwubb7dgXpFNLU0sLwYRa1Oee36S3bBuJ7z/GxNOivZ6CG6nw
-dz7whP8kEycMbnqfJr3QaGd45DQTMRF0ph3pOwcyVfQ1mKBE2dQEkW7xYW7x
-OE/eP9htUxJpjR6Z+ZSL5P3iiY4s0nkqjn/qdJjQnikUzya951V29vQVJgzy
-xEeppCOZM6Giekykr/U6Hkt6uabivVMGTFzf4ybuTdop+aG2lSETXntSo01I
-DywiDoYYM6GhnEhVIK1rwLE6z5SJIu/lt3lIf88+OtxhQeZP4S5RSs5Pfq17
-1TJrJqQdnYpdSCdbFafst2PCwiLPSYq0cPli78sOTJQoNfTWk/nz2n7K9JEj
-E3pC11fbkJ56+Oxk/AMy/2Jjc1ykTZvKxSofMZHlLZXuS66Pmp9G+wYPJnSX
-8z55Qq7f514/QuUZE5X329LGyPUWP1bz2sSHCbHOsuzLpHkmL+hmBTLB0D3+
-iGcdFfcuvJBtCWbiXTbfcU2yfnpSGgS5w5hIzdVZCCDrq+TG5XrNKCb2nmu8
-PbmaCuncMMr9N0w07z8uupl03Lrm51FxZD6uRXfJkfX65LuexlAyE8O7veOu
-kPXNEIuSFEhl4pKYZoruCipuubQtlacwMb7Upl6Lj4pj0oYlnp+YkFOQjNpN
-9gfVPzY2NYcJ4T8zZ3iXkf3V3/X45xey3pYHHfhH9hNnlIny9mImuIpcc2zJ
-/itYcudTfi0TKrxXbuwh+3W/YWpIdz1ZX6k6ZiVkP8fkDd/l+8XEz93jjTrz
-FLjY2u3X/ctE32WnCb0ZCpT/3E9i9jMxOUbpXMSgIDfV7dUe7gls2+utX9BN
-AY+GjnAozwRm3r7xSuyk4EK/eDgn7wTO3LTm8mynoHd71ctG/gnESOZM7/xL
-gUD4+iDXLRM4ub7r23g9BRbu7581yE0gIvCYcCm5n34UdVkCpQlsXCP93/ov
-FHDmX3iapDKB84krwvRzKAidmnB/dGoCa70WH/yRSUHRnaMuEjpkPNW2KDVy
-/954pdHB2WYC5vXjEU8DKLjNTGT03J2A2MEWzZN+FFCCHty9cH8ClXdbZee9
-KDhVJWq789EEtgt8ydV0p+DuMQvLWp8JSPRnaz93oKBiD8ctsYQJJO62WGN7
-hYIHnJKalX8nYFXvQJ1aT0HP8eVNO9snECsUlFAgQIHmsyGDx10TEFTTmnNd
-RYHYqjRrmcEJGDWVtg3wUFC5Sfp5zNQEqO2BZ4yY6dgiq1xtLzCJuVTz6PHa
-dBRaXdTYfHoSvowjjzLcyPd5i5OaVeYkwq6sDDSKS4PM/aQVzz9N4m3d807d
-8DTcXNNUl5YziRkFrX8n/dPwWe3w9dGCSXBW3lvE4ZiG21nDtvZVk9hkvVpy
-Sj0NX/2NXjn1TCJgT/jjZcOpcDx6jv5UaAp2f9cJz+9LRfe7rcHRzlPIaWul
-+IW+R7DEhkXZOtOQbxbTXF6QiICI26U3RWeQOnSfK+x3HHSDr1fz9c5AeCvH
-qFFfDKaOx09H589Cv2JBuNEwHJ3dxxwLPeegPLzJKpIzCMeiq99K+sxh+4Yx
-0b2dgYi5olsV5D+Hc3ybraqKA8nrbLfdCpkDtn9RU3oWiOb0mEru2Dlc7dku
-9HdVIOoeLIieyJ/DIx2n3sPbAlC4Oqe8lDEHYuTRqh2JvnirdGBj1c15CKdG
-B2t1e6DwHK/7rNE8DJ31Z4yyPNBh0Dmwy3Qe211ODr728MB2t5Ccp1bzZL3y
-invu9EDct+mrKg/m0aCVslrR/AnizxeGZQTPo60iesWVaTck3tQSjCidR83v
-r3avjriizG7Po7Lv87Ds7A6d5HNFn/vinomqecjKDqznKnGBxLtPWVoN8xDI
-jDgot8kFSQMbdVa0z+P90+uJE9UPkXy364XrzDwea/Cos048wAcPh9UmexeQ
-tfCVt8r5HoYUr7t57F9AoNoukxnpe9jDOD4ee2gBOzcsGfGiOSDZcO3Pf3IL
-cIms7ag1cEAiMsJ0Ty7gjrpq9ZLT9oibHhE5Z7AAe+W+cmMpO3SlNwVYGi4g
-YMWHlNYhW2w3LeD0MV7A0ohACSLFlvxPft5ZarGA87JG5z9L2iI6QyrhqOMC
-ppN+1QXvtUHEHUupw4ELqPlk4+zhb4lmMe1oreAFZO9rEckTssSGFgV+u9AF
-HH3+u7Ey1gJh6svHUl8v4E90q4xLnjlCxJOzJJIX4HbSsFB+yhQB7b2Km4oW
-sC7qx9b4cPKlrnPrDDdzAZEoeL9qjQHenm/RM55agOaP49mRmvrIO61jVTq7
-gFMd9tL6gdcxpnQq0IeLhfixWGOPDXrQ27n7lwA/C6sGG+RDjl/FfdG4PgcB
-FpYHahyfCL6CIOHNM78EWXAMXxoR3nsZpStXbY7czIKrqvCG1BeXcHByzFBs
-NwuK6xVoyku0cXbUwv6pFAt+Q5b/Osy0YDrY5dF3gIX0Jc7zjdUX8bqtMfG9
-LAvqjxdiP0ZfwNKyHLr0SRbuyF3WXpp7Dq1hbk4nbrKQn5Cf9jnkJKaDZn0S
-jFjgoGaVx787gbV+9q+XmZLjf37/YJ53HKcf3y6osmLB+EwfxWviGDLN1Hku
-ObNwUVvRblYdqLlVtP6zCwt55qa31+YqYeC64i5hdxY4j0obrz2sCJGLUhqt
-XiyoMlqUuk/KwVt+TdDtUBaqO6r/8+04hHhpn9iyCBbOm+i0R789iAKpRVmS
-USzsk2sr/2F5AMxtE7/oceT1QtepVyEF/s3W/ZqJLDz6tyG3lH8PJNf3zVBT
-WAj/oaKYPLELN5Y3b75PZeH35/1bFnHshNNirX2/s1ig39stH7pdDCGsChWF
-bDKfA+PHG69tw/fxL7dYBAtGHLHyIuu2oJsm43CziAXluv0aZ7g2gqMv7WnR
-NxayJLdKKG8QwuGWN0meVSxo+Izs7i1eA34p06m0WhYkvCSsqx/zY8hF6lRT
-PQtWl2lrRr/xoewHI4TjFwuVOv58U/VLEbc9t0uimYXaB3wdrZKL4erw+NCF
-vyw0HVFJrqvghF7pKbf//rEQNMI9xWE1ryy7YeWPN50scF3WbpH8Pam81qJB
-5HsPC6Uu2/Y9/D2mPJIXYTXWz4Lac2XFuNYh5cqVN78I01j4JDj5YYlMt3Li
-DXE+1REWKqJiNlPv/lV+QqXpWoyz8L77sn34zzrlG4syk15MsHBiIHWv3s4S
-ZcVLTlO50yz0cfgMG36iKgslqpzqmiPrN6Y+dqtNkPKMM/cZHg4OFY7/O/ri
-b9tFcLLdWF0eLsXNNlU0oO8SD9vmxZueJvCx/Xu5zFe1DWx/kw7vaxFmO/P6
-3CrbTWwHpBXqh4uwraZ1fq5/B9ufI0yP+BxkO2zXqw9VZ9hmjJ+Mdz/L9vn8
-sUi582zzaKn7xl9k28F52vzhVbYv1lyW2GvCNp/D2lhfN7ZdCvzCzmSzfXaj
-Tvx3Mc7/91Gr0murvdiO3mQoVslkW6wo4eW5a1zs+e4tlKzLZ/v2+KIf5ZLc
-/29Dypio13O2Ld205tQD2XbQyvzF94LtZ0wH/4CXbKfKTc+GvmZ7+iur8V0K
-2wF1fH7FpWxHxN4xfVrOdqx9tapaBdsfBQNmKqrZbtEVMK3/ybZ45wbVjk62
-92c6bYnrZlve48+0cS/bGuKv0/sH2LazEN0yOsq2s+Ljaeo42x4rOhrsmWyH
-pcX6TE2x/dZ1kUnODNspF4yPOc+xnbnt22blBbbzx3dOs1hs/w+pgDiQ
-     "]]}},
-  AspectRatio->NCache[GoldenRatio^(-1), 0.6180339887498948],
-  Axes->True,
-  AxesOrigin->{0, 0},
-  PlotRange->{{0, 2.5}, {-0.9549296585513659, 0.07073552342169737}},
-  PlotRangeClipping->True,
-  PlotRangePadding->{
-    Scaled[0.02], 
-    Scaled[0.02]}]], "Output",
- CellChangeTimes->{{3.560155172170507*^9, 3.5601551796044407`*^9}, {
-  3.560158983450157*^9, 3.5601590230207157`*^9}}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"DWr", "[", 
-  RowBox[{"r", ",", "h"}], "]"}]], "Input",
- CellChangeTimes->{{3.5601552083271513`*^9, 3.560155253227319*^9}, {
-  3.560160694674526*^9, 3.560160694745482*^9}, {3.560161180197549*^9, 
-  3.5601611810400257`*^9}}],
-
-Cell[BoxData[
- FractionBox[
-  RowBox[{"If", "[", 
-   RowBox[{
-    RowBox[{
-     FractionBox["r", "h"], ">", "2"}], ",", "0", ",", 
-    RowBox[{"If", "[", 
-     RowBox[{
-      RowBox[{
-       FractionBox["r", "h"], ">", "1"}], ",", 
-      RowBox[{"-", 
-       FractionBox[
-        RowBox[{"3", " ", 
-         SuperscriptBox[
-          RowBox[{"(", 
-           RowBox[{"2", "-", 
-            FractionBox["r", "h"]}], ")"}], "2"]}], 
-        RowBox[{"4", " ", "h"}]]}], ",", 
-      RowBox[{
-       FractionBox[
-        RowBox[{"3", " ", 
-         SuperscriptBox[
-          RowBox[{"(", 
-           RowBox[{"1", "-", 
-            FractionBox["r", "h"]}], ")"}], "2"]}], "h"], "-", 
-       FractionBox[
-        RowBox[{"3", " ", 
-         SuperscriptBox[
-          RowBox[{"(", 
-           RowBox[{"2", "-", 
-            FractionBox["r", "h"]}], ")"}], "2"]}], 
-        RowBox[{"4", " ", "h"}]]}]}], "]"}]}], "]"}], 
-  RowBox[{
-   SuperscriptBox["h", "3"], " ", "\[Pi]"}]]], "Output",
- CellChangeTimes->{{3.560155230596974*^9, 3.560155253885023*^9}, 
-   3.5601606952114277`*^9, 3.56016118252979*^9}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"N", "[", 
-  RowBox[{"Expand", "[", 
-   RowBox[{
-    RowBox[{"-", 
-     FractionBox["3", "4"]}], " ", 
-    RowBox[{
-     SuperscriptBox[
-      RowBox[{"(", 
-       RowBox[{"2", "-", "q"}], ")"}], "2"], "/", "Pi"}]}], "]"}], 
-  "]"}]], "Input",
- CellChangeTimes->{{3.560160709698295*^9, 3.560160723505558*^9}, {
-   3.560161185019305*^9, 3.560161189166279*^9}, 3.560237508328278*^9}],
-
-Cell[BoxData[
- RowBox[{
-  RowBox[{"-", "0.954929658551372`"}], "+", 
-  RowBox[{"0.954929658551372`", " ", "q"}], "-", 
-  RowBox[{"0.238732414637843`", " ", 
-   SuperscriptBox["q", "2"]}]}]], "Output",
- CellChangeTimes->{{3.560160720336149*^9, 3.560160724559553*^9}, 
-   3.5601612038241377`*^9, 3.5602375092839746`*^9}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"N", "[", 
-  RowBox[{"Expand", "[", 
-   RowBox[{
-    RowBox[{"(", 
-     RowBox[{
-      RowBox[{"3", " ", 
-       SuperscriptBox[
-        RowBox[{"(", 
-         RowBox[{"1", "-", "q"}], ")"}], "2"]}], "-", 
-      RowBox[{
-       FractionBox["3", "4"], " ", 
-       SuperscriptBox[
-        RowBox[{"(", 
-         RowBox[{"2", "-", "q"}], ")"}], "2"]}]}], ")"}], "/", "Pi"}], "]"}], 
-  "]"}]], "Input",
- CellChangeTimes->{{3.5601608470246563`*^9, 3.560160853545632*^9}, {
-  3.560161190598509*^9, 3.5601612011456413`*^9}}],
-
-Cell[BoxData[
- RowBox[{
-  RowBox[{
-   RowBox[{"-", "0.954929658551372`"}], " ", "q"}], "+", 
-  RowBox[{"0.716197243913529`", " ", 
-   SuperscriptBox["q", "2"]}]}]], "Output",
- CellChangeTimes->{3.560160854392119*^9, 3.560161202029501*^9}]
-}, Open  ]],
-
-Cell[CellGroupData[{
-
-Cell[BoxData[
- RowBox[{"DWh", "[", 
-  RowBox[{"r", ",", "h"}], "]"}]], "Input"],
-
-Cell[BoxData[
- RowBox[{
-  FractionBox[
-   RowBox[{"If", "[", 
-    RowBox[{
-     RowBox[{
-      FractionBox["r", "h"], ">", "2"}], ",", "0", ",", 
-     RowBox[{"If", "[", 
-      RowBox[{
-       RowBox[{
-        FractionBox["r", "h"], ">", "1"}], ",", 
-       FractionBox[
-        RowBox[{"3", " ", "r", " ", 
-         SuperscriptBox[
-          RowBox[{"(", 
-           RowBox[{"2", "-", 
-            FractionBox["r", "h"]}], ")"}], "2"]}], 
-        RowBox[{"4", " ", 
-         SuperscriptBox["h", "2"]}]], ",", 
-       RowBox[{
-        RowBox[{"-", 
-         FractionBox[
-          RowBox[{"3", " ", "r", " ", 
-           SuperscriptBox[
-            RowBox[{"(", 
-             RowBox[{"1", "-", 
-              FractionBox["r", "h"]}], ")"}], "2"]}], 
-          SuperscriptBox["h", "2"]]}], "+", 
-        FractionBox[
-         RowBox[{"3", " ", "r", " ", 
-          SuperscriptBox[
-           RowBox[{"(", 
-            RowBox[{"2", "-", 
-             FractionBox["r", "h"]}], ")"}], "2"]}], 
-         RowBox[{"4", " ", 
-          SuperscriptBox["h", "2"]}]]}]}], "]"}]}], "]"}], 
-   RowBox[{
-    SuperscriptBox["h", "3"], " ", "\[Pi]"}]], "-", 
-  FractionBox[
-   RowBox[{"3", " ", 
-    RowBox[{"If", "[", 
-     RowBox[{
-      RowBox[{
-       FractionBox["r", "h"], ">", "2"}], ",", "0", ",", 
-      RowBox[{"If", "[", 
-       RowBox[{
-        RowBox[{
-         FractionBox["r", "h"], ">", "1"}], ",", 
-        RowBox[{
-         FractionBox["1", "4"], " ", 
-         SuperscriptBox[
-          RowBox[{"(", 
-           RowBox[{"2", "-", 
-            FractionBox["r", "h"]}], ")"}], "3"]}], ",", 
-        RowBox[{
-         RowBox[{
-          FractionBox["1", "4"], " ", 
-          SuperscriptBox[
-           RowBox[{"(", 
-            RowBox[{"2", "-", 
-             FractionBox["r", "h"]}], ")"}], "3"]}], "-", 
-         SuperscriptBox[
-          RowBox[{"(", 
-           RowBox[{"1", "-", 
-            FractionBox["r", "h"]}], ")"}], "3"]}]}], "]"}]}], "]"}]}], 
-   RowBox[{
-    SuperscriptBox["h", "4"], " ", "\[Pi]"}]]}]], "Output",
- CellChangeTimes->{3.560161212213023*^9}]
-}, Open  ]]
-},
-WindowSize->{740, 867},
-WindowMargins->{{Automatic, -1324}, {Automatic, 61}},
-FrontEndVersion->"8.0 for Linux x86 (64-bit) (November 7, 2010)",
-StyleDefinitions->"Default.nb"
-]
-(* End of Notebook Content *)
-
-(* Internal cache information *)
-(*CellTagsOutline
-CellTagsIndex->{}
-*)
-(*CellTagsIndex
-CellTagsIndex->{}
-*)
-(*NotebookFileOutline
-Notebook[{
-Cell[CellGroupData[{
-Cell[579, 22, 1154, 36, 88, "Input"],
-Cell[1736, 60, 937, 30, 57, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[2710, 95, 309, 8, 30, "Input"],
-Cell[3022, 105, 7339, 126, 236, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[10398, 236, 459, 15, 30, "Input"],
-Cell[10860, 253, 294, 6, 30, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[11191, 264, 343, 11, 30, "Input"],
-Cell[11537, 277, 346, 7, 30, "Output"]
-}, Open  ]],
-Cell[11898, 287, 774, 20, 50, "Input"],
-Cell[CellGroupData[{
-Cell[12697, 311, 265, 7, 30, "Input"],
-Cell[12965, 320, 7987, 137, 221, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[20989, 462, 414, 10, 30, "Input"],
-Cell[21406, 474, 9020, 153, 223, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[30463, 632, 247, 5, 30, "Input"],
-Cell[30713, 639, 1094, 35, 65, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[31844, 679, 404, 12, 54, "Input"],
-Cell[32251, 693, 318, 7, 30, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[32606, 705, 541, 17, 54, "Input"],
-Cell[33150, 724, 238, 6, 30, "Output"]
-}, Open  ]],
-Cell[CellGroupData[{
-Cell[33425, 735, 79, 2, 30, "Input"],
-Cell[33507, 739, 2072, 67, 118, "Output"]
-}, Open  ]]
-}
-]
-*)
-
-(* End of internal cache information *)