diff --git a/.gitignore b/.gitignore
index 123f413aea9986f353bfa3310b113b1219233fcb..4f81f60dd7d2fd85074759e4fcc4f3e0ee21b187 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,9 +81,12 @@ tests/testMatrixInversion
 tests/testCbrt
 
 theory/latex/swift.pdf
-theory/SPH/kernel/kernels.pdf
-theory/SPH/kernel/kernel_derivatives.pdf
-theory/SPH/kernel/kernel_definitions.pdf
+theory/SPH/Kernels/kernels.pdf
+theory/SPH/Kernels/kernel_derivatives.pdf
+theory/SPH/Kernels/kernel_definitions.pdf
+theory/SPH/Flavours/sph_flavours.pdf
+theory/SPH/EoS/eos.pdf
+theory/SPH/*.pdf
 theory/paper_pasc/pasc_paper.pdf
 
 m4/libtool.m4
diff --git a/INSTALL.swift b/INSTALL.swift
index a07d5b24c2d8c75778e2a24d90f77724459ab61f..8e1635b0715426512503fd9dcde32f59a7ad1b62 100644
--- a/INSTALL.swift
+++ b/INSTALL.swift
@@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available
 before you can build it.
 
 
-HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write
-particle data. One of the commands "h5cc" or "h5pcc" should be
-available. If "h5pcc" is located them a parallel HDF5 built for the version
-of MPI located should be provided. If the command is not available then it
-can be located using the "--with-hfd5" configure option. The value should
-be the full path to the "h5cc" or "h5pcc" commands.
+ - HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and
+         write particle data. One of the commands "h5cc" or "h5pcc"
+         should be available. If "h5pcc" is located them a parallel
+         HDF5 built for the version of MPI located should be
+         provided. If the command is not available then it can be
+         located using the "--with-hfd5" configure option. The value
+         should be the full path to the "h5cc" or "h5pcc" commands.
 
 
-MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE.
-Before running configure the "mpirun" command should be available in the
-shell. If your command isn't called "mpirun" then define the "MPIRUN"
-environment variable, either in the shell or when running configure.
+ - MPI: to run on more than one node an MPI library that fully
+        supports MPI_THREAD_MULTIPLE.  Before running configure the
+        "mpirun" command should be available in the shell. If your
+        command isn't called "mpirun" then define the "MPIRUN"
+        environment variable, either in the shell or when running
+        configure.
 
-The MPI compiler can be controlled using the MPICC variable, much like
-the CC one. Use this when your MPI compiler has a none-standard name.
+	The MPI compiler can be controlled using the MPICC variable,
+	much like the CC one. Use this when your MPI compiler has a
+	none-standard name.
 
 
-METIS: a build of the METIS library can be optionally used to optimize the
-load between MPI nodes (requires an MPI library). This should be found in
-the standard installation directories, or pointed at using the
-"--with-metis" configuration option.  In this case the top-level
-installation directory of the METIS build should be given. Note to use
-METIS you should at least supply "--with-metis".
+ - libtool: The build system relies on libtool.
 
 
-libNUMA: a build of the NUMA library can be used to pin the threads to
-the physical core of the machine SWIFT is running on. This is not always
-necessary as the OS scheduler may do a good job at distributing the threads
-among the different cores on each computing node.
+                           Optional Dependencies
+                           =====================
 
 
-DOXYGEN: the doxygen library is required to create the SWIFT API
-documentation.
+ - METIS: a build of the METIS library can be optionally used to
+          optimize the load between MPI nodes (requires an MPI
+          library). This should be found in the standard installation
+          directories, or pointed at using the "--with-metis"
+          configuration option.  In this case the top-level
+          installation directory of the METIS build should be
+          given. Note to use METIS you should at least supply
+          "--with-metis".
+
+
+ - libNUMA: a build of the NUMA library can be used to pin the threads
+            to the physical core of the machine SWIFT is running
+            on. This is not always necessary as the OS scheduler may
+            do a good job at distributing the threads among the
+            different cores on each computing node.
+
+
+ - TCMalloc: a build of the TCMalloc library (part of gperftools) can
+             be used to obtain faster allocations than the standard C
+             malloc function part of glibc. The option "-with-tcmalloc"
+	     should be passed to the configuration script to use it.
+
+
+ - gperftools: a build of gperftools can be used to obtain good
+               profiling of the code. The option "-with-profiler"
+               needs to be passed to the configuration script to use
+               it.
+
+
+ - DOXYGEN: the doxygen library is required to create the SWIFT API
+            documentation.
 
 
 
diff --git a/configure.ac b/configure.ac
index 977a86a03d5e4c82139dfd56d3f1ee7afe3ea6e8..f6e2ea0db8e9829b719a0190eea7a1d891bfbbd6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -355,7 +355,7 @@ fi
 AC_SUBST([TCMALLOC_LIBS])
 AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"])
 
-#  Check for -lprofiler usually part of the gpreftools along with tcmalloc.
+#  Check for -lprofiler usually part of the gperftools along with tcmalloc.
 have_profiler="no"
 AC_ARG_WITH([profiler],
    [AS_HELP_STRING([--with-profiler],
diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml
index 80714d87f4afa7d7e4d41ce7bf56faed856208ef..cd7db4f86f24f4d78db29a3dfe34bc536af6ed7d 100644
--- a/examples/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_12/eagle_12.yml
@@ -27,7 +27,7 @@ Statistics:
 SPH:
   resolution_eta:        1.2348   # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
   delta_neighbours:      0.1      # The tolerance for the targetted number of neighbours.
-  max_smoothing_length:  0.1    # Maximal smoothing length allowed (in internal units).
+  max_smoothing_length:  1.0      # Maximal smoothing length allowed (in internal units).
   CFL_condition:         0.1      # Courant-Friedrich-Levy condition for time integration.
 
 # Parameters related to the initial conditions
diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh
index 9f90ca395a5c8cf83e67928b3fdbd4d8529ac254..4ac513f09cb8ac8dcefc256a68478e215b8bc320 100755
--- a/examples/ExternalPointMass/run.sh
+++ b/examples/ExternalPointMass/run.sh
@@ -7,4 +7,4 @@ then
     python makeIC.py 10000
 fi
 
-../swift -g -t 2 externalPointMass.yml 2>&1 | tee output.log
+../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log
diff --git a/examples/IsothermalPotential/GravityOnly/README b/examples/IsothermalPotential/GravityOnly/README
index 90fb1872aa2301cab133b8a20a8cd8de724d4553..1621aaa8ab90420dcf69b5b9caea394d619b62cf 100644
--- a/examples/IsothermalPotential/GravityOnly/README
+++ b/examples/IsothermalPotential/GravityOnly/README
@@ -1,6 +1,3 @@
-;
-; this probelm generates a set of gravity particles in an isothermal
-; potential and follows their orbits. Tests verify consdevation of
-; energy and angular momentum
-;
-;
+This example generates a set of particles in an isothermal potential
+and follows their orbits. IDL scripts verify the conservation of
+energy and angular momentum.
diff --git a/examples/main.c b/examples/main.c
index 775cadf85a600d0478ddae64d1947d124cae0512..06efe75b32f790b10d1125fb5010bc3d1b4f9e24 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -120,6 +120,10 @@ int main(int argc, char *argv[]) {
     error("MPI_Comm_size failed with error %i.", res);
   if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS)
     error("Call to MPI_Comm_rank failed with error %i.", res);
+
+  /* Make sure messages are stamped with the correct rank. */
+  engine_rank = myrank;
+
   if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) !=
       MPI_SUCCESS)
     error("Call to MPI_Comm_set_errhandler failed with error %i.", res);
@@ -131,6 +135,7 @@ int main(int argc, char *argv[]) {
     message("WARNING: you should use the non-MPI version of this program.");
   }
   fflush(stdout);
+
 #endif
 
 /* Let's pin the main thread */
@@ -262,12 +267,21 @@ int main(int argc, char *argv[]) {
     message(
         "Executing a dry run. No i/o or time integration will be performed.");
 
-  /* Report CPU frequency. */
+  /* Report CPU frequency.*/
   cpufreq = clocks_get_cpufreq();
   if (myrank == 0) {
     message("CPU frequency used for tick conversion: %llu Hz", cpufreq);
   }
 
+/* Report host name(s). */
+#ifdef WITH_MPI
+  if (myrank == 0 || verbose > 1) {
+    message("Rank %d running on: %s", myrank, hostname());
+  }
+#else
+  message("Running on: %s", hostname());
+#endif
+
   /* Do we choke on FP-exceptions ? */
   if (with_fp_exceptions) {
     feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
@@ -563,8 +577,8 @@ int main(int argc, char *argv[]) {
           fprintf(file_thread, " %03i 0 0 0 0 %lli %lli 0 0 0 0 %lli\n", myrank,
                   e.tic_step, e.toc_step, cpufreq);
           int count = 0;
-          for (int l = 0; l < e.sched.nr_tasks; l++)
-            if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) {
+          for (int l = 0; l < e.sched.nr_tasks; l++) {
+            if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) {
               fprintf(
                   file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n",
                   myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type,
@@ -579,11 +593,10 @@ int main(int argc, char *argv[]) {
                   (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount
                                                 : 0,
                   e.sched.tasks[l].flags);
-              fflush(stdout);
-              count++;
             }
-          message("rank %d counted %d tasks", myrank, count);
-
+            fflush(stdout);
+            count++;
+          }
           fclose(file_thread);
         }
 
@@ -599,8 +612,8 @@ int main(int argc, char *argv[]) {
       /* Add some information to help with the plots */
       fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1,
               1, e.tic_step, e.toc_step, 0, 0, 0, cpufreq);
-      for (int l = 0; l < e.sched.nr_tasks; l++)
-        if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit)
+      for (int l = 0; l < e.sched.nr_tasks; l++) {
+        if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) {
           fprintf(
               file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n",
               e.sched.tasks[l].rid, e.sched.tasks[l].type,
@@ -610,6 +623,8 @@ int main(int argc, char *argv[]) {
               (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count,
               (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount,
               (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount);
+        }
+      }
       fclose(file_thread);
 #endif
     }
diff --git a/m4/ax_cc_maxopt.m4 b/m4/ax_cc_maxopt.m4
index d2a0937232e0e8bf9c3e7e79c20267bfa1d75880..d5b87c903c5af46b767d7b49e40963345c8128af 100644
--- a/m4/ax_cc_maxopt.m4
+++ b/m4/ax_cc_maxopt.m4
@@ -128,6 +128,8 @@ if test "$ac_test_CFLAGS" != "set"; then
 		    *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*|*4?6[[ef]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;
 		    *000?f[[346]]?:*:*:*|?f[[346]]?:*:*:*|f[[346]]?:*:*:*) icc_flags="-xSSE3 -xP -xO -xN -xW -xK" ;;
 		    *00??f??:*:*:*|??f??:*:*:*|?f??:*:*:*|f??:*:*:*) icc_flags="-xSSE2 -xN -xW -xK" ;;
+		    *5?6E?:*:*:*) icc_flags="-xCORE-AVX512" ;;
+		    *5?67?:*:*:*) icc_flags="-xMIC-AVX512" ;;
                   esac ;;
               esac ;;
           esac
diff --git a/src/Makefile.am b/src/Makefile.am
index f57246bb588914280b7812478ad4b23a3ecd1f9c..34e6e0ea2617474bb3c29482c1779a7af81de2e0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
     common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
     physical_constants.h physical_constants_cgs.h potential.h version.h \
     hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \
-    sourceterms_struct.h cbrt.h
+    sourceterms_struct.h statistics.h cbrt.h
 
 
 # Common source files
@@ -53,7 +53,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
     units.c common_io.c single_io.c multipole.c version.c map.c \
     kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
     physical_constants.c potential.c hydro_properties.c \
-    runner_doiact_fft.c threadpool.c cooling.c sourceterms.c
+    runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
+    statistics.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h cbrt.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
diff --git a/src/cbrt.h b/src/cbrt.h
index 99ec97503d02c2cc5bd3cbd4ae191a0ab61a9f07..2686caa90e7a18dae571e7b8381adbaa8cd63e60 100644
--- a/src/cbrt.h
+++ b/src/cbrt.h
@@ -68,11 +68,9 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) {
   exponent_new = -exponent_new / 3;
   const int exponent_rem = exponent + 3 * exponent_new;
   cast.as_uint = (exponent_new + 127) << 23;
-  float exponent_scale = cast.as_float;
-  exponent_scale *=
-      exponent_rem > 0
-          ? (exponent_rem > 1 ? 5.61231024154687e-01f : 7.07106781186548e-01f)
-          : 8.90898718140339e-01f;
+  static const float scale[3] = {8.90898718140339e-01f, 7.07106781186548e-01f,
+                                 5.61231024154687e-01f};
+  const float exponent_scale = cast.as_float * scale[exponent_rem];
 
   // Scale the result and set the correct sign.
   res = copysignf(res * exponent_scale, x_in);
diff --git a/src/cell.c b/src/cell.c
index 8728f32cb04f2f1de389386b632371a27d523bb9..a54906d667dd312fd35a7584047113a45183b46c 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -52,6 +52,7 @@
 #include "gravity.h"
 #include "hydro.h"
 #include "hydro_properties.h"
+#include "scheduler.h"
 #include "space.h"
 #include "timers.h"
 
@@ -763,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) {
 
   c->force = NULL;
   c->nr_force = 0;
+
+  c->grav = NULL;
+  c->nr_grav = 0;
 }
 
 /**
@@ -823,6 +827,7 @@ void cell_check_multipole(struct cell *c, void *data) {
         error("Multipole CoM are different (%12.15e vs. %12.15e", ma.CoM[k],
               mb.CoM[k]);
 
+#if const_gravity_multipole_order >= 2
     if (fabsf(ma.I_xx - mb.I_xx) / fabsf(ma.I_xx + mb.I_xx) > 1e-5 &&
         ma.I_xx > 1e-9)
       error("Multipole I_xx are different (%12.15e vs. %12.15e)", ma.I_xx,
@@ -847,6 +852,7 @@ void cell_check_multipole(struct cell *c, void *data) {
         ma.I_yz > 1e-9)
       error("Multipole I_yz are different (%12.15e vs. %12.15e)", ma.I_yz,
             mb.I_yz);
+#endif
   }
 }
 
@@ -892,3 +898,137 @@ int cell_is_drift_needed(struct cell *c, int ti_current) {
   /* No neighbouring cell has active particles. Drift not necessary */
   return 0;
 }
+
+/**
+ * @brief Un-skips all the tasks associated with a given cell and checks
+ * if the space needs to be rebuilt.
+ *
+ * @param c the #cell.
+ * @param s the #scheduler.
+ *
+ * @return 1 If the space needs rebuilding. 0 otherwise.
+ */
+int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
+
+  /* Un-skip the density tasks involved with this cell. */
+  for (struct link *l = c->density; l != NULL; l = l->next) {
+    struct task *t = l->t;
+    const struct cell *ci = t->ci;
+    const struct cell *cj = t->cj;
+    scheduler_activate(s, t);
+
+    /* Set the correct sorting flags */
+    if (t->type == task_type_pair) {
+      if (!(ci->sorted & (1 << t->flags))) {
+        atomic_or(&ci->sorts->flags, (1 << t->flags));
+        scheduler_activate(s, ci->sorts);
+      }
+      if (!(cj->sorted & (1 << t->flags))) {
+        atomic_or(&cj->sorts->flags, (1 << t->flags));
+        scheduler_activate(s, cj->sorts);
+      }
+    }
+
+    /* Check whether there was too much particle motion */
+    if (t->type == task_type_pair || t->type == task_type_sub_pair) {
+      if (t->tight &&
+          (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin ||
+           ci->dx_max > space_maxreldx * ci->h_max ||
+           cj->dx_max > space_maxreldx * cj->h_max))
+        return 1;
+
+#ifdef WITH_MPI
+      /* Activate the send/recv flags. */
+      if (ci->nodeID != engine_rank) {
+
+        /* Activate the tasks to recv foreign cell ci's data. */
+        scheduler_activate(s, ci->recv_xv);
+        scheduler_activate(s, ci->recv_rho);
+        scheduler_activate(s, ci->recv_ti);
+
+        /* Look for the local cell cj's send tasks. */
+        struct link *l = NULL;
+        for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_xv task.");
+        scheduler_activate(s, l->t);
+
+        for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_rho task.");
+        scheduler_activate(s, l->t);
+
+        for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_ti task.");
+        scheduler_activate(s, l->t);
+
+      } else if (cj->nodeID != engine_rank) {
+
+        /* Activate the tasks to recv foreign cell cj's data. */
+        scheduler_activate(s, cj->recv_xv);
+        scheduler_activate(s, cj->recv_rho);
+        scheduler_activate(s, cj->recv_ti);
+        /* Look for the local cell ci's send tasks. */
+        struct link *l = NULL;
+        for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_xv task.");
+        scheduler_activate(s, l->t);
+
+        for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_rho task.");
+        scheduler_activate(s, l->t);
+
+        for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
+             l = l->next)
+          ;
+        if (l == NULL) error("Missing link to send_ti task.");
+        scheduler_activate(s, l->t);
+      }
+#endif
+    }
+  }
+
+  /* Unskip all the other task types. */
+  for (struct link *l = c->gradient; l != NULL; l = l->next)
+    scheduler_activate(s, l->t);
+  for (struct link *l = c->force; l != NULL; l = l->next)
+    scheduler_activate(s, l->t);
+  for (struct link *l = c->grav; l != NULL; l = l->next)
+    scheduler_activate(s, l->t);
+  if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost);
+  if (c->ghost != NULL) scheduler_activate(s, c->ghost);
+  if (c->init != NULL) scheduler_activate(s, c->init);
+  if (c->kick != NULL) scheduler_activate(s, c->kick);
+  if (c->cooling != NULL) scheduler_activate(s, c->cooling);
+  if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
+
+  return 0;
+}
+
+/**
+ * @brief Set the super-cell pointers for all cells in a hierarchy.
+ *
+ * @param c The top-level #cell to play with.
+ * @param super Pointer to the deepest cell with tasks in this part of the tree.
+ */
+void cell_set_super(struct cell *c, struct cell *super) {
+
+  /* Are we in a cell with some kind of self/pair task ? */
+  if (super == NULL && c->nr_tasks > 0) super = c;
+
+  /* Set the super-cell */
+  c->super = super;
+
+  /* Recurse */
+  if (c->split)
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super);
+}
diff --git a/src/cell.h b/src/cell.h
index f87420c86c5765f42fe04e8dee95558edff90cf9..ae22de0cb268bb5240c480b2eed9435ec218adf8 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -38,6 +38,7 @@
 
 /* Avoid cyclic inclusions */
 struct space;
+struct scheduler;
 
 /* Max tag size set to 2^29 to take into account some MPI implementations
  * that use 2^31 as the upper bound on MPI tags and the fact that
@@ -130,14 +131,9 @@ struct cell {
   /* Parent cell. */
   struct cell *parent;
 
-  /* Super cell, i.e. the highest-level supercell that has hydro interactions.
-   */
+  /* Super cell, i.e. the highest-level supercell that has pair/self tasks */
   struct cell *super;
 
-  /* Super cell, i.e. the highest-level supercell that has gravity interactions.
-   */
-  struct cell *gsuper;
-
   /* The task computing this cell's sorts. */
   struct task *sorts;
   int sortsize;
@@ -162,9 +158,6 @@ struct cell {
   /* Tasks for gravity tree. */
   struct task *grav_up, *grav_down;
 
-  /* Task for external gravity */
-  struct task *grav_external;
-
   /* Task for cooling */
   struct task *cooling;
 
@@ -183,12 +176,6 @@ struct cell {
   /* ID of the previous owner, e.g. runner. */
   int owner;
 
-  /* Momentum of particles in cell. */
-  double mom[3], ang_mom[3];
-
-  /* Mass, potential, internal  and kinetic energy of particles in this cell. */
-  double mass, e_pot, e_int, e_kin, e_rad, entropy;
-
   /* Number of particles updated in this cell. */
   int updated, g_updated;
 
@@ -240,5 +227,7 @@ int cell_are_neighbours(const struct cell *restrict ci,
 void cell_check_multipole(struct cell *c, void *data);
 void cell_clean(struct cell *c);
 int cell_is_drift_needed(struct cell *c, int ti_current);
+int cell_unskip_tasks(struct cell *c, struct scheduler *s);
+void cell_set_super(struct cell *c, struct cell *super);
 
 #endif /* SWIFT_CELL_H */
diff --git a/src/const.h b/src/const.h
index ef26525b7865bac4c0164f7678398ca307fc1d67..11ee65bfdfabbf43962c721b8599928c25fef7cc 100644
--- a/src/const.h
+++ b/src/const.h
@@ -85,7 +85,7 @@
 #define SLOPE_LIMITER_CELL_WIDE
 
 /* Self gravity stuff. */
-#define const_gravity_multipole_order 2
+#define const_gravity_multipole_order 1
 #define const_gravity_a_smooth 1.25f
 #define const_gravity_r_cut 4.5f
 #define const_gravity_eta 0.025f
diff --git a/src/engine.c b/src/engine.c
index 8de8bfe7116367502142df7d4bb0776c8bd2be4b..6f360907a35d4d8c37e0f9ee33bbefa7b3a1978f 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -63,6 +63,7 @@
 #include "runner.h"
 #include "serial_io.h"
 #include "single_io.h"
+#include "statistics.h"
 #include "timers.h"
 #include "tools.h"
 #include "units.h"
@@ -111,119 +112,57 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) {
   res->next = atomic_swap(l, res);
 }
 
-/**
- * @brief Generate the gravity hierarchical tasks for a hierarchy of cells -
- * i.e. all the O(Npart) tasks.
- *
- * Tasks are only created here. The dependencies will be added later on.
- *
- * @param e The #engine.
- * @param c The #cell.
- * @param gsuper The gsuper #cell.
- */
-void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
-                                            struct cell *gsuper) {
-
-  struct scheduler *s = &e->sched;
-  const int is_with_external_gravity =
-      (e->policy & engine_policy_external_gravity) ==
-      engine_policy_external_gravity;
-  const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt;
-
-  /* Is this the super-cell? */
-  if (gsuper == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) {
-
-    /* This is the super cell, i.e. the first with gravity tasks attached. */
-    gsuper = c;
-
-    /* Local tasks only... */
-    if (c->nodeID == e->nodeID) {
-
-      /* Add the init task. */
-      if (c->init == NULL)
-        c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0,
-                                    c, NULL, 0);
-
-      /* Add the kick task that matches the policy. */
-      if (is_fixdt) {
-        if (c->kick == NULL)
-          c->kick = scheduler_addtask(s, task_type_kick_fixdt,
-                                      task_subtype_none, 0, 0, c, NULL, 0);
-      } else {
-        if (c->kick == NULL)
-          c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0,
-                                      0, c, NULL, 0);
-      }
-
-      if (is_with_external_gravity)
-        c->grav_external = scheduler_addtask(
-            s, task_type_grav_external, task_subtype_none, 0, 0, c, NULL, 0);
-    }
-  }
-
-  /* Set the super-cell. */
-  c->gsuper = gsuper;
-
-  /* Recurse. */
-  if (c->split)
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL)
-        engine_make_gravity_hierarchical_tasks(e, c->progeny[k], gsuper);
-}
-
 /**
  * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
  * i.e. all the O(Npart) tasks.
  *
  * Tasks are only created here. The dependencies will be added later on.
  *
+ * Note that there is no need to recurse below the super-cell.
+ *
  * @param e The #engine.
  * @param c The #cell.
- * @param super The super #cell.
  */
-void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c,
-                                          struct cell *super) {
+void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
 
   struct scheduler *s = &e->sched;
-  const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt;
-  const int is_with_cooling =
-      (e->policy & engine_policy_cooling) == engine_policy_cooling;
-  const int is_with_sourceterms =
-      (e->policy & engine_policy_sourceterms) == engine_policy_sourceterms;
-
-  /* Is this the super-cell? */
-  if (super == NULL && (c->density != NULL || (c->count > 0 && !c->split))) {
+  const int is_fixdt = (e->policy & engine_policy_fixdt);
+  const int is_hydro = (e->policy & engine_policy_hydro);
+  const int is_with_cooling = (e->policy & engine_policy_cooling);
+  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
 
-    /* This is the super cell, i.e. the first with density tasks attached. */
-    super = c;
+  /* Are we in a super-cell ? */
+  if (c->super == c) {
 
     /* Local tasks only... */
     if (c->nodeID == e->nodeID) {
 
       /* Add the init task. */
-      if (c->init == NULL)
-        c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0,
-                                    c, NULL, 0);
+      c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c,
+                                  NULL, 0);
 
       /* Add the kick task that matches the policy. */
       if (is_fixdt) {
-        if (c->kick == NULL)
-          c->kick = scheduler_addtask(s, task_type_kick_fixdt,
-                                      task_subtype_none, 0, 0, c, NULL, 0);
+        c->kick = scheduler_addtask(s, task_type_kick_fixdt, task_subtype_none,
+                                    0, 0, c, NULL, 0);
       } else {
-        if (c->kick == NULL)
-          c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0,
-                                      0, c, NULL, 0);
+        c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, 0,
+                                    c, NULL, 0);
       }
 
       /* Generate the ghost task. */
-      c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0,
-                                   c, NULL, 0);
+      if (is_hydro)
+        c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
+                                     0, c, NULL, 0);
+
 #ifdef EXTRA_HYDRO_LOOP
       /* Generate the extra ghost task. */
-      c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
-                                         task_subtype_none, 0, 0, c, NULL, 0);
+      if (is_hydro)
+        c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
+                                           task_subtype_none, 0, 0, c, NULL, 0);
 #endif
+
+      /* Cooling task */
       if (is_with_cooling)
         c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
                                        0, 0, c, NULL, 0);
@@ -232,16 +171,19 @@ void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c,
         c->sourceterms = scheduler_addtask(s, task_type_sourceterms,
                                            task_subtype_none, 0, 0, c, NULL, 0);
     }
-  }
 
-  /* Set the super-cell. */
-  c->super = super;
+  } else { /* We are above the super-cell so need to go deeper */
 
-  /* Recurse. */
-  if (c->split)
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL)
-        engine_make_hydro_hierarchical_tasks(e, c->progeny[k], super);
+#ifdef SWIFT_DEBUG_CHECKS
+    if (c->super != NULL) error("Incorrectly set super pointer");
+#endif
+
+    /* Recurse. */
+    if (c->split)
+      for (int k = 0; k < 8; k++)
+        if (c->progeny[k] != NULL)
+          engine_make_hierarchical_tasks(e, c->progeny[k]);
+  }
 }
 
 /**
@@ -1289,6 +1231,30 @@ void engine_make_gravity_tasks(struct engine *e) {
   }
 }
 
+void engine_make_external_gravity_tasks(struct engine *e) {
+
+  struct space *s = e->s;
+  struct scheduler *sched = &e->sched;
+  const int nodeID = e->nodeID;
+  struct cell *cells = s->cells_top;
+  const int nr_cells = s->nr_cells;
+
+  for (int cid = 0; cid < nr_cells; ++cid) {
+
+    struct cell *ci = &cells[cid];
+
+    /* Skip cells without gravity particles */
+    if (ci->gcount == 0) continue;
+
+    /* Is that neighbour local ? */
+    if (ci->nodeID != nodeID) continue;
+
+    /* If the cell is local, build a self-interaction */
+    scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0,
+                      ci, NULL, 0);
+  }
+}
+
 /**
  * @brief Constructs the top-level pair tasks for the first hydro loop over
  * neighbours
@@ -1364,59 +1330,101 @@ void engine_make_hydroloop_tasks(struct engine *e) {
 /**
  * @brief Counts the tasks associated with one cell and constructs the links
  *
- * For each hydrodynamic task, construct the links with the corresponding cell.
- * Similarly, construct the dependencies for all the sorting tasks.
+ * For each hydrodynamic and gravity task, construct the links with
+ * the corresponding cell.  Similarly, construct the dependencies for
+ * all the sorting tasks.
  *
  * @param e The #engine.
  */
 void engine_count_and_link_tasks(struct engine *e) {
 
-  struct scheduler *sched = &e->sched;
-
-  for (int ind = 0; ind < sched->nr_tasks; ind++) {
+  struct scheduler *const sched = &e->sched;
+  const int nr_tasks = sched->nr_tasks;
 
-    struct task *t = &sched->tasks[ind];
+  for (int ind = 0; ind < nr_tasks; ind++) {
 
-    if (t->skip) continue;
+    struct task *const t = &sched->tasks[ind];
+    struct cell *const ci = t->ci;
+    struct cell *const cj = t->cj;
 
     /* Link sort tasks together. */
-    if (t->type == task_type_sort && t->ci->split)
+    if (t->type == task_type_sort && ci->split)
       for (int j = 0; j < 8; j++)
-        if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) {
-          t->ci->progeny[j]->sorts->skip = 0;
-          scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t);
+        if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) {
+          scheduler_addunlock(sched, ci->progeny[j]->sorts, t);
         }
 
-    /* Link density tasks to cells. */
+    /* Link self tasks to cells. */
     if (t->type == task_type_self) {
-      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
       }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+      }
+      if (t->subtype == task_subtype_external_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+      }
+
+      /* Link pair tasks to cells. */
     } else if (t->type == task_type_pair) {
-      atomic_inc(&t->ci->nr_tasks);
-      atomic_inc(&t->cj->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
+      atomic_inc(&cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
-        engine_addlink(e, &t->cj->density, t);
-        atomic_inc(&t->cj->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+        engine_addlink(e, &cj->density, t);
+        atomic_inc(&cj->nr_density);
       }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&cj->nr_grav);
+      }
+
+      /* Link sub-self tasks to cells. */
     } else if (t->type == task_type_sub_self) {
-      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+      }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+      }
+      if (t->subtype == task_subtype_external_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
       }
+
+      /* Link sub-pair tasks to cells. */
     } else if (t->type == task_type_sub_pair) {
-      atomic_inc(&t->ci->nr_tasks);
-      atomic_inc(&t->cj->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
+      atomic_inc(&cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
-        engine_addlink(e, &t->cj->density, t);
-        atomic_inc(&t->cj->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+        engine_addlink(e, &cj->density, t);
+        atomic_inc(&cj->nr_density);
+      }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&cj->nr_grav);
+      }
+      if (t->subtype == task_subtype_external_grav) {
+        error("Found a sub-pair/external-gravity task...");
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&cj->nr_grav);
       }
     }
   }
@@ -1434,11 +1442,27 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched,
                                                     struct cell *c) {
 
   /* init --> gravity --> kick */
-  scheduler_addunlock(sched, c->gsuper->init, gravity);
-  scheduler_addunlock(sched, gravity, c->gsuper->kick);
+  scheduler_addunlock(sched, c->super->init, gravity);
+  scheduler_addunlock(sched, gravity, c->super->kick);
 
   /* grav_up --> gravity ( --> kick) */
-  scheduler_addunlock(sched, c->gsuper->grav_up, gravity);
+  scheduler_addunlock(sched, c->super->grav_up, gravity);
+}
+
+/**
+ * @brief Creates the dependency network for the external gravity tasks of a
+ * given cell.
+ *
+ * @param sched The #scheduler.
+ * @param gravity The gravity task to link.
+ * @param c The cell.
+ */
+static inline void engine_make_external_gravity_dependencies(
+    struct scheduler *sched, struct task *gravity, struct cell *c) {
+
+  /* init --> external gravity --> kick */
+  scheduler_addunlock(sched, c->super->init, gravity);
+  scheduler_addunlock(sched, gravity, c->super->kick);
 }
 
 /**
@@ -1467,9 +1491,6 @@ void engine_link_gravity_tasks(struct engine *e) {
     /* Get a pointer to the task. */
     struct task *t = &sched->tasks[k];
 
-    /* Skip? */
-    if (t->skip) continue;
-
     /* Multipole construction */
     if (t->type == task_type_grav_up) {
       scheduler_addunlock(sched, t, gather);
@@ -1480,19 +1501,27 @@ void engine_link_gravity_tasks(struct engine *e) {
 
       /* Gather the multipoles --> mm interaction --> kick */
       scheduler_addunlock(sched, gather, t);
-      scheduler_addunlock(sched, t, t->ci->gsuper->kick);
+      scheduler_addunlock(sched, t, t->ci->super->kick);
 
       /* init --> mm interaction */
-      scheduler_addunlock(sched, t->ci->gsuper->init, t);
+      scheduler_addunlock(sched, t->ci->super->init, t);
     }
 
-    /* Self-interaction? */
+    /* Self-interaction for self-gravity? */
     if (t->type == task_type_self && t->subtype == task_subtype_grav) {
 
       engine_make_gravity_dependencies(sched, t, t->ci);
 
     }
 
+    /* Self-interaction for external gravity ? */
+    else if (t->type == task_type_self &&
+             t->subtype == task_subtype_external_grav) {
+
+      engine_make_external_gravity_dependencies(sched, t, t->ci);
+
+    }
+
     /* Otherwise, pair interaction? */
     else if (t->type == task_type_pair && t->subtype == task_subtype_grav) {
 
@@ -1501,7 +1530,7 @@ void engine_link_gravity_tasks(struct engine *e) {
         engine_make_gravity_dependencies(sched, t, t->ci);
       }
 
-      if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) {
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
 
         engine_make_gravity_dependencies(sched, t, t->cj);
       }
@@ -1516,6 +1545,15 @@ void engine_link_gravity_tasks(struct engine *e) {
       }
     }
 
+    /* Sub-self-interaction for external gravity ? */
+    else if (t->type == task_type_sub_self &&
+             t->subtype == task_subtype_external_grav) {
+
+      if (t->ci->nodeID == nodeID) {
+        engine_make_external_gravity_dependencies(sched, t, t->ci);
+      }
+    }
+
     /* Otherwise, sub-pair interaction? */
     else if (t->type == task_type_sub_pair && t->subtype == task_subtype_grav) {
 
@@ -1523,7 +1561,7 @@ void engine_link_gravity_tasks(struct engine *e) {
 
         engine_make_gravity_dependencies(sched, t, t->ci);
       }
-      if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) {
+      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
 
         engine_make_gravity_dependencies(sched, t, t->cj);
       }
@@ -1600,9 +1638,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
   for (int ind = 0; ind < nr_tasks; ind++) {
     struct task *t = &sched->tasks[ind];
 
-    /* Skip? */
-    if (t->skip) continue;
-
     /* Self-interaction? */
     if (t->type == task_type_self && t->subtype == task_subtype_density) {
 
@@ -1790,12 +1825,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
       }
 #endif
     }
-
-    /* External gravity tasks should depend on init and unlock the kick */
-    else if (t->type == task_type_grav_external) {
-      scheduler_addunlock(sched, t->ci->init, t);
-      scheduler_addunlock(sched, t, t->ci->kick);
-    }
     /* Cooling tasks should depend on kick and unlock sourceterms */
     else if (t->type == task_type_cooling) {
       scheduler_addunlock(sched, t->ci->kick, t);
@@ -1871,17 +1900,26 @@ void engine_maketasks(struct engine *e) {
   /* Add the gravity mm tasks. */
   if (e->policy & engine_policy_self_gravity) engine_make_gravity_tasks(e);
 
+  /* Add the external gravity tasks. */
+  if (e->policy & engine_policy_external_gravity)
+    engine_make_external_gravity_tasks(e);
+
+  if (e->sched.nr_tasks == 0 && (s->nr_gparts > 0 || s->nr_parts > 0))
+    error("We have particles but no hydro or gravity tasks were created.");
+
   /* Split the tasks. */
   scheduler_splittasks(sched);
 
-  /* Allocate the list of cell-task links. The maximum number of links
-     is the number of cells (s->tot_cells) times the number of neighbours (27)
-     times the number of interaction types (2, density and force). */
+  /* Allocate the list of cell-task links. The maximum number of links is the
+   * number of cells (s->tot_cells) times the number of neighbours (26) times
+   * the number of interaction types, so 26 * 3 (density, force, grav) pairs
+   * and 4 (density, force, grav, ext_grav) self.
+   */
   if (e->links != NULL) free(e->links);
 #ifdef EXTRA_HYDRO_LOOP
-  e->size_links = s->tot_cells * 27 * 3;
+  e->size_links = s->tot_cells * (26 * 4 + 4);
 #else
-  e->size_links = s->tot_cells * 27 * 2;
+  e->size_links = s->tot_cells * (26 * 3 + 4);
 #endif
   if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
     error("Failed to allocate cell-task links.");
@@ -1894,25 +1932,24 @@ void engine_maketasks(struct engine *e) {
   /* Count the number of tasks associated with each cell and
      store the density tasks in each cell, and make each sort
      depend on the sorts of its progeny. */
-  if (e->policy & engine_policy_hydro) engine_count_and_link_tasks(e);
+  engine_count_and_link_tasks(e);
 
-  /* Append hierarchical tasks to each cells */
-  if (e->policy & engine_policy_hydro)
-    for (int k = 0; k < nr_cells; k++)
-      engine_make_hydro_hierarchical_tasks(e, &cells[k], NULL);
+  /* Now that the self/pair tasks are at the right level, set the super
+   * pointers. */
+  for (int k = 0; k < nr_cells; k++) cell_set_super(&cells[k], NULL);
 
-  if ((e->policy & engine_policy_self_gravity) ||
-      (e->policy & engine_policy_external_gravity))
-    for (int k = 0; k < nr_cells; k++)
-      engine_make_gravity_hierarchical_tasks(e, &cells[k], NULL);
+  /* Append hierarchical tasks to each cells */
+  for (int k = 0; k < nr_cells; k++)
+    engine_make_hierarchical_tasks(e, &cells[k]);
 
   /* Run through the tasks and make force tasks for each density task.
      Each force task depends on the cell ghosts and unlocks the kick task
      of its super-cell. */
   if (e->policy & engine_policy_hydro) engine_make_extra_hydroloop_tasks(e);
 
-  /* Add the dependencies for the self-gravity stuff */
-  if (e->policy & engine_policy_self_gravity) engine_link_gravity_tasks(e);
+  /* Add the dependencies for the gravity stuff */
+  if (e->policy & (engine_policy_self_gravity | engine_policy_external_gravity))
+    engine_link_gravity_tasks(e);
 
 #ifdef WITH_MPI
 
@@ -1957,7 +1994,7 @@ void engine_maketasks(struct engine *e) {
 }
 
 /**
- * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ * @brief Mark tasks to be un-skipped and set the sort flags accordingly.
  *        Threadpool mapper function for fixdt version.
  *
  * @param map_data pointer to the tasks
@@ -1968,11 +2005,15 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements,
                                    void *extra_data) {
   /* Unpack the arguments. */
   struct task *tasks = (struct task *)map_data;
-  int *rebuild_space = (int *)extra_data;
+  size_t *rebuild_space = &((size_t *)extra_data)[0];
+  struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[1]);
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct task *t = &tasks[ind];
 
+    /* All tasks are unskipped (we skip by default). */
+    scheduler_activate(s, t);
+
     /* Pair? */
     if (t->type == task_type_pair || t->type == task_type_sub_pair) {
 
@@ -1999,28 +2040,7 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements,
 }
 
 /**
- * @brief Mark any sort tasks as initially skipped.
- *        Threadpool mapper function.
- *
- * @param map_data pointer to the tasks
- * @param num_elements number of tasks
- * @param extra_data unused
- */
-void engine_marktasks_sorts_mapper(void *map_data, int num_elements,
-                                   void *extra_data) {
-  /* Unpack the arguments. */
-  struct task *tasks = (struct task *)map_data;
-  for (int ind = 0; ind < num_elements; ind++) {
-    struct task *t = &tasks[ind];
-    if (t->type == task_type_sort) {
-      t->flags = 0;
-      t->skip = 1;
-    }
-  }
-}
-
-/**
- * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ * @brief Mark tasks to be un-skipped and set the sort flags accordingly.
  *        Threadpool mapper function.
  *
  * @param map_data pointer to the tasks
@@ -2031,18 +2051,20 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
                              void *extra_data) {
   /* Unpack the arguments. */
   struct task *tasks = (struct task *)map_data;
-  const int ti_end = ((int *)extra_data)[0];
-  int *rebuild_space = &((int *)extra_data)[1];
+  const int ti_end = ((size_t *)extra_data)[0];
+  size_t *rebuild_space = &((size_t *)extra_data)[1];
+  struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]);
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct task *t = &tasks[ind];
 
     /* Single-cell task? */
     if (t->type == task_type_self || t->type == task_type_ghost ||
-        t->type == task_type_sub_self) {
+        t->type == task_type_extra_ghost || t->type == task_type_cooling ||
+        t->type == task_type_sourceterms || t->type == task_type_sub_self) {
 
       /* Set this task's skip. */
-      t->skip = (t->ci->ti_end_min > ti_end);
+      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
     }
 
     /* Pair? */
@@ -2059,19 +2081,24 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
            cj->dx_max > space_maxreldx * cj->h_max))
         *rebuild_space = 1;
 
-      /* Set this task's skip. */
-      if ((t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end)) == 1)
+      /* Set this task's skip, otherwise nothing to do. */
+      if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end)
+        scheduler_activate(s, t);
+      else
         continue;
 
+      /* If this is not a density task, we don't have to do any of the below. */
+      if (t->subtype != task_subtype_density) continue;
+
       /* Set the sort flags. */
-      if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
+      if (t->type == task_type_pair) {
         if (!(ci->sorted & (1 << t->flags))) {
           atomic_or(&ci->sorts->flags, (1 << t->flags));
-          ci->sorts->skip = 0;
+          scheduler_activate(s, ci->sorts);
         }
         if (!(cj->sorted & (1 << t->flags))) {
           atomic_or(&cj->sorts->flags, (1 << t->flags));
-          cj->sorts->skip = 0;
+          scheduler_activate(s, cj->sorts);
         }
       }
 
@@ -2081,9 +2108,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       if (ci->nodeID != engine_rank) {
 
         /* Activate the tasks to recv foreign cell ci's data. */
-        ci->recv_xv->skip = 0;
-        ci->recv_rho->skip = 0;
-        ci->recv_ti->skip = 0;
+        scheduler_activate(s, ci->recv_xv);
+        scheduler_activate(s, ci->recv_rho);
+        scheduler_activate(s, ci->recv_ti);
 
         /* Look for the local cell cj's send tasks. */
         struct link *l = NULL;
@@ -2091,45 +2118,46 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
              l = l->next)
           ;
         if (l == NULL) error("Missing link to send_xv task.");
-        l->t->skip = 0;
+        scheduler_activate(s, l->t);
 
         for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
              l = l->next)
           ;
         if (l == NULL) error("Missing link to send_rho task.");
-        l->t->skip = 0;
+        scheduler_activate(s, l->t);
 
         for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID;
              l = l->next)
           ;
         if (l == NULL) error("Missing link to send_ti task.");
-        l->t->skip = 0;
+        scheduler_activate(s, l->t);
 
       } else if (cj->nodeID != engine_rank) {
 
         /* Activate the tasks to recv foreign cell cj's data. */
-        cj->recv_xv->skip = 0;
-        cj->recv_rho->skip = 0;
-        cj->recv_ti->skip = 0;
+        scheduler_activate(s, cj->recv_xv);
+        scheduler_activate(s, cj->recv_rho);
+        scheduler_activate(s, cj->recv_ti);
+
         /* Look for the local cell ci's send tasks. */
         struct link *l = NULL;
         for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID;
              l = l->next)
           ;
         if (l == NULL) error("Missing link to send_xv task.");
-        l->t->skip = 0;
+        scheduler_activate(s, l->t);
 
         for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
              l = l->next)
           ;
         if (l == NULL) error("Missing link to send_rho task.");
-        l->t->skip = 0;
+        scheduler_activate(s, l->t);
 
         for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID;
              l = l->next)
           ;
         if (l == NULL) error("Missing link to send_ti task.");
-        l->t->skip = 0;
+        scheduler_activate(s, l->t);
       }
 
 #endif
@@ -2137,25 +2165,26 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
 
     /* Kick? */
     else if (t->type == task_type_kick) {
-      t->skip = (t->ci->ti_end_min > ti_end);
       t->ci->updated = 0;
       t->ci->g_updated = 0;
+      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
     }
 
     /* Init? */
     else if (t->type == task_type_init) {
-      /* Set this task's skip. */
-      t->skip = (t->ci->ti_end_min > ti_end);
+      if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t);
     }
 
-    /* None? */
-    else if (t->type == task_type_none)
-      t->skip = 1;
+    /* Tasks with no cells should not be skipped? */
+    else if (t->type == task_type_grav_gather_m ||
+             t->type == task_type_grav_fft) {
+      scheduler_activate(s, t);
+    }
   }
 }
 
 /**
- * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ * @brief Mark tasks to be un-skipped and set the sort flags accordingly.
  *
  * @return 1 if the space has to be rebuilt, 0 otherwise.
  */
@@ -2169,31 +2198,16 @@ int engine_marktasks(struct engine *e) {
   if (e->policy & engine_policy_fixdt) {
 
     /* Run through the tasks and mark as skip or not. */
+    size_t extra_data[2] = {rebuild_space, (size_t)&e->sched};
     threadpool_map(&e->threadpool, engine_marktasks_fixdt_mapper, s->tasks,
-                   s->nr_tasks, sizeof(struct task), 1000, &rebuild_space);
+                   s->nr_tasks, sizeof(struct task), 1000, extra_data);
     return rebuild_space;
 
     /* Multiple-timestep case */
   } else {
 
     /* Run through the tasks and mark as skip or not. */
-    int extra_data[2] = {e->ti_current, rebuild_space};
-    threadpool_map(&e->threadpool, engine_marktasks_sorts_mapper, s->tasks,
-                   s->nr_tasks, sizeof(struct task), 10000, NULL);
-
-#ifdef WITH_MPI
-    if (e->policy & engine_policy_mpi) {
-
-      /* Skip all sends and recvs, we will unmark if needed. */
-      for (int k = 0; k < s->nr_tasks; k++) {
-        struct task *t = &s->tasks[k];
-        if (t->type == task_type_send || t->type == task_type_recv) {
-          t->skip = 1;
-        }
-      }
-    }
-#endif
-
+    size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched};
     threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks,
                    s->nr_tasks, sizeof(struct task), 10000, extra_data);
     rebuild_space = extra_data[1];
@@ -2214,16 +2228,21 @@ int engine_marktasks(struct engine *e) {
  */
 void engine_print_task_counts(struct engine *e) {
 
-  struct scheduler *sched = &e->sched;
+  const ticks tic = getticks();
+  struct scheduler *const sched = &e->sched;
+  const int nr_tasks = sched->nr_tasks;
+  const struct task *const tasks = sched->tasks;
 
   /* Count and print the number of each task type. */
   int counts[task_type_count + 1];
   for (int k = 0; k <= task_type_count; k++) counts[k] = 0;
-  for (int k = 0; k < sched->nr_tasks; k++)
-    if (!sched->tasks[k].skip)
-      counts[(int)sched->tasks[k].type] += 1;
-    else
+  for (int k = 0; k < nr_tasks; k++) {
+    if (tasks[k].skip)
       counts[task_type_count] += 1;
+    else
+      counts[(int)tasks[k].type] += 1;
+  }
+  message("Total = %d", nr_tasks);
 #ifdef WITH_MPI
   printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i",
          e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]);
@@ -2237,6 +2256,10 @@ void engine_print_task_counts(struct engine *e) {
   fflush(stdout);
   message("nr_parts = %zu.", e->s->nr_parts);
   message("nr_gparts = %zu.", e->s->nr_gparts);
+
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
 }
 
 /**
@@ -2289,7 +2312,7 @@ void engine_prepare(struct engine *e, int nodrift) {
   TIMER_TIC;
 
   /* Run through the tasks and mark as skip or not. */
-  int rebuild = (e->forcerebuild || engine_marktasks(e));
+  int rebuild = e->forcerebuild;
 
 /* Collect the values of rebuild from all nodes. */
 #ifdef WITH_MPI
@@ -2399,6 +2422,10 @@ void engine_collect_kick(struct cell *c) {
         ti_end_min = min(ti_end_min, cp->ti_end_min);
         updated += cp->updated;
         g_updated += cp->g_updated;
+
+        /* Collected, so clear for next time. */
+        cp->updated = 0;
+        cp->g_updated = 0;
       }
     }
   }
@@ -2434,6 +2461,10 @@ void engine_collect_timestep(struct engine *e) {
       ti_end_min = min(ti_end_min, c->ti_end_min);
       updates += c->updated;
       g_updates += c->g_updated;
+
+      /* Collected, so clear for next time. */
+      c->updated = 0;
+      c->g_updated = 0;
     }
 
 /* Aggregate the data from the different nodes. */
@@ -2476,76 +2507,28 @@ void engine_collect_timestep(struct engine *e) {
 void engine_print_stats(struct engine *e) {
 
   const ticks tic = getticks();
-  const struct space *s = e->s;
 
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0;
-  double entropy = 0.0, mass = 0.0;
-  double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0};
+  struct statistics stats;
+  stats_init(&stats);
 
-  /* Collect the cell data. */
-  for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells_top[k].nodeID == e->nodeID) {
-      struct cell *c = &s->cells_top[k];
-      mass += c->mass;
-      e_kin += c->e_kin;
-      e_int += c->e_int;
-      e_pot += c->e_pot;
-      e_rad += c->e_rad;
-      entropy += c->entropy;
-      mom[0] += c->mom[0];
-      mom[1] += c->mom[1];
-      mom[2] += c->mom[2];
-      ang_mom[0] += c->ang_mom[0];
-      ang_mom[1] += c->ang_mom[1];
-      ang_mom[2] += c->ang_mom[2];
-    }
+  /* Collect the stats on this node */
+  stats_collect(e->s, &stats);
 
 /* Aggregate the data from the different nodes. */
 #ifdef WITH_MPI
-  {
-    double in[12] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.};
-    double out[12];
-    out[0] = e_kin;
-    out[1] = e_int;
-    out[2] = e_pot;
-    out[3] = e_rad;
-    out[4] = mom[0];
-    out[5] = mom[1];
-    out[6] = mom[2];
-    out[7] = ang_mom[0];
-    out[8] = ang_mom[1];
-    out[9] = ang_mom[2];
-    out[10] = mass;
-    out[11] = entropy;
-    if (MPI_Reduce(out, in, 11, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD) !=
-        MPI_SUCCESS)
-      error("Failed to aggregate stats.");
-    e_kin = out[0];
-    e_int = out[1];
-    e_pot = out[2];
-    e_rad = out[3];
-    mom[0] = out[4];
-    mom[1] = out[5];
-    mom[2] = out[6];
-    ang_mom[0] = out[7];
-    ang_mom[1] = out[8];
-    ang_mom[2] = out[9];
-    mass = out[10];
-    entropy = out[11];
-  }
-#endif
+  struct statistics global_stats;
+  stats_init(&global_stats);
 
-  const double e_tot = e_kin + e_int + e_pot;
+  if (MPI_Reduce(&stats, &global_stats, 1, statistics_mpi_type,
+                 statistics_mpi_reduce_op, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
+    error("Failed to aggregate stats.");
+#else
+  struct statistics global_stats = stats;
+#endif
 
   /* Print info */
-  if (e->nodeID == 0) {
-    fprintf(e->file_stats,
-            " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e "
-            "%14e\n",
-            e->time, mass, e_tot, e_kin, e_int, e_pot, e_rad, entropy, mom[0],
-            mom[1], mom[2], ang_mom[0], ang_mom[1], ang_mom[2]);
-    fflush(e->file_stats);
-  }
+  if (e->nodeID == 0)
+    stats_print_to_file(e->file_stats, &global_stats, e->time);
 
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -2654,7 +2637,10 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
   /* Add the tasks corresponding to external gravity to the masks */
   if (e->policy & engine_policy_external_gravity) {
 
-    mask |= 1 << task_type_grav_external;
+    mask |= 1 << task_type_self;
+    mask |= 1 << task_type_sub_self;
+
+    submask |= 1 << task_subtype_external_grav;
   }
 
   /* Add MPI tasks if need be */
@@ -2753,13 +2739,7 @@ void engine_step(struct engine *e) {
     fflush(e->file_timesteps);
   }
 
-  /* Save some statistics */
-  if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) {
-    engine_print_stats(e);
-    e->timeLastStatistics += e->deltaTimeStatistics;
-  }
-
-  /* Drift only the necessary particles, that all means all particles
+  /* Drift only the necessary particles, that means all particles
    * if we are about to repartition. */
   int repart = (e->forcerepart != REPART_NONE);
   e->drift_all = repart || e->drift_all;
@@ -2823,7 +2803,11 @@ void engine_step(struct engine *e) {
 
   /* Add the tasks corresponding to external gravity to the masks */
   if (e->policy & engine_policy_external_gravity) {
-    mask |= 1 << task_type_grav_external;
+
+    mask |= 1 << task_type_self;
+    mask |= 1 << task_type_sub_self;
+
+    submask |= 1 << task_subtype_external_grav;
   }
 
   /* Add the tasks corresponding to cooling to the masks */
@@ -2844,13 +2828,19 @@ void engine_step(struct engine *e) {
     submask |= 1 << task_subtype_tend;
   }
 
-  if (e->verbose) engine_print_task_counts(e);
+  // if (e->verbose) engine_print_task_counts(e);
 
   /* Send off the runners. */
   TIMER_TIC;
   engine_launch(e, e->nr_threads, mask, submask);
   TIMER_TOC(timer_runners);
 
+  /* Save some statistics */
+  if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) {
+    engine_print_stats(e);
+    e->timeLastStatistics += e->deltaTimeStatistics;
+  }
+
   TIMER_TOC2(timer_step);
 
   clocks_gettime(&time2);
@@ -2876,9 +2866,10 @@ void engine_drift(struct engine *e) {
   const ticks tic = getticks();
   threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top,
                  e->s->nr_cells, sizeof(struct cell), 1, e);
+
   if (e->verbose)
-    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
-            clocks_getunit());
+    message("took %.3f %s (including task unskipping).",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
 }
 
 /**
@@ -3409,13 +3400,14 @@ void engine_init(struct engine *e, struct space *s,
             nr_nodes * nr_threads);
     e->file_timesteps = fopen(timestepsfileName, "w");
     fprintf(e->file_timesteps,
-            "# Branch: %s\n# Revision: %s\n# Compiler: %s, Version: %s \n# "
+            "# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, "
+            "Version: %s \n# "
             "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic "
             "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f "
             "+/- %.2f\n# Eta: %f\n",
-            git_branch(), git_revision(), compiler_name(), compiler_version(),
-            e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, kernel_name,
-            e->hydro_properties->target_neighbours,
+            hostname(), git_branch(), git_revision(), compiler_name(),
+            compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION,
+            kernel_name, e->hydro_properties->target_neighbours,
             e->hydro_properties->delta_neighbours,
             e->hydro_properties->eta_neighbours);
 
@@ -3505,6 +3497,7 @@ void engine_init(struct engine *e, struct space *s,
 /* Construct types for MPI communications */
 #ifdef WITH_MPI
   part_create_mpi_types();
+  stats_create_MPI_type();
 #endif
 
   /* Initialize the threadpool. */
diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h
index 0bdf56fe844309998db1fad4cf9581edb6b88305..c999e20d401570ac6518291df8cf315569fe78bd 100644
--- a/src/hydro/Gadget2/hydro.h
+++ b/src/hydro/Gadget2/hydro.h
@@ -411,8 +411,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
 
   p->force.h_dt *= p->h * hydro_dimension_inv;
 
-  p->entropy_dt *=
-      0.5f * hydro_gamma_minus_one * pow_minus_gamma_minus_one(p->rho);
+  p->entropy_dt =
+      0.5f * gas_entropy_from_internal_energy(p->rho, p->entropy_dt);
 }
 
 /**
diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h
index a383a2fdce9e64a5673d0136184368220dc08458..bd970795bdf070a7bd7915cc4f493218dbf319d1 100644
--- a/src/hydro/Gizmo/hydro.h
+++ b/src/hydro/Gizmo/hydro.h
@@ -524,6 +524,7 @@ __attribute__((always_inline)) INLINE static void hydro_set_internal_energy(
   /* conserved.energy is NOT the specific energy (u), but the total thermal
      energy (u*m) */
   p->conserved.energy = u * p->conserved.mass;
+  p->primitives.P = hydro_gamma_minus_one * p->primitives.rho * u;
 }
 
 /**
@@ -540,4 +541,5 @@ __attribute__((always_inline)) INLINE static void hydro_set_entropy(
 
   p->conserved.energy = gas_internal_energy_from_entropy(p->primitives.rho, S) *
                         p->conserved.mass;
+  p->primitives.P = gas_pressure_from_entropy(p->primitives.rho, S);
 }
diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h
index ede16fe5181a546921db7acf8ffcbc130c5e91c5..0c69728a9ce6317a8d7ddab945e7aab6e94ee247 100644
--- a/src/hydro/PressureEntropy/hydro.h
+++ b/src/hydro/PressureEntropy/hydro.h
@@ -338,11 +338,10 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
 
   /* Compute "grad h" term (note we use rho here and not rho_bar !)*/
   const float rho_inv = 1.f / p->rho;
-  const float entropy_minus_one_over_gamma = 1.f / p->entropy_one_over_gamma;
   const float rho_dh =
       1.f / (1.f + hydro_dimension_inv * p->h * p->density.rho_dh * rho_inv);
-  const float pressure_dh = p->density.pressure_dh * rho_inv * p->h *
-                            hydro_dimension_inv * entropy_minus_one_over_gamma;
+  const float pressure_dh =
+      p->density.pressure_dh * rho_inv * p->h * hydro_dimension_inv;
 
   const float grad_h_term = rho_dh * pressure_dh;
 
@@ -442,8 +441,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_force(
 
   p->force.h_dt *= p->h * hydro_dimension_inv;
 
-  p->entropy_dt *=
-      0.5f * hydro_gamma_minus_one * pow_minus_gamma_minus_one(p->rho_bar);
+  p->entropy_dt =
+      0.5f * gas_entropy_from_internal_energy(p->rho_bar, p->entropy_dt);
 }
 
 /**
diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h
index 18e22233f6f53e488119a58a988ba4c9faf3db36..ce1c38ca69954252dc804af9181b9060a14afcb9 100644
--- a/src/hydro/PressureEntropy/hydro_iact.h
+++ b/src/hydro/PressureEntropy/hydro_iact.h
@@ -255,8 +255,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
 
   /* Now, convolve with the kernel */
   const float visc_term = 0.5f * visc * (wi_dr + wj_dr);
-  const float sph_term = (S_gamma_j / S_gamma_i - f_i) * P_over_rho2_i * wi_dr +
-                         (S_gamma_i / S_gamma_j - f_j) * P_over_rho2_j * wj_dr;
+  const float sph_term =
+      (S_gamma_j / S_gamma_i - f_i / S_gamma_i) * P_over_rho2_i * wi_dr +
+      (S_gamma_i / S_gamma_j - f_j / S_gamma_j) * P_over_rho2_j * wj_dr;
 
   /* Eventually got the acceleration */
   const float acc = (visc_term + sph_term) * r_inv;
@@ -365,8 +366,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
 
   /* Now, convolve with the kernel */
   const float visc_term = 0.5f * visc * (wi_dr + wj_dr);
-  const float sph_term = (S_gamma_j / S_gamma_i - f_i) * P_over_rho2_i * wi_dr +
-                         (S_gamma_i / S_gamma_j - f_j) * P_over_rho2_j * wj_dr;
+  const float sph_term =
+      (S_gamma_j / S_gamma_i - f_i / S_gamma_i) * P_over_rho2_i * wi_dr +
+      (S_gamma_i / S_gamma_j - f_j / S_gamma_j) * P_over_rho2_j * wj_dr;
 
   /* Eventually got the acceleration */
   const float acc = (visc_term + sph_term) * r_inv;
diff --git a/src/minmax.h b/src/minmax.h
index 9d92cd71d849dba615fdb05bc342014e0593d989..a53093663c79cf4280d136747663552e49c7f1b2 100644
--- a/src/minmax.h
+++ b/src/minmax.h
@@ -43,4 +43,18 @@
     _a > _b ? _a : _b;            \
   })
 
+/**
+ * @brief Limits the value of x to be between a and b
+ *
+ * Only wraps once. If x > 2b, the returned value will be larger than b.
+ * Similarly for x < -b.
+ */
+#define box_wrap(x, a, b)                               \
+  ({                                                    \
+    const __typeof__(x) _x = (x);                       \
+    const __typeof__(a) _a = (a);                       \
+    const __typeof__(b) _b = (b);                       \
+    _x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \
+  })
+
 #endif /* SWIFT_MINMAX_H */
diff --git a/src/riemann.h b/src/riemann.h
index d0ae57a640e13c2098708735d6c34de70ebea5b0..69e1a038fc37cb07e1094c8e6736f79b6afa4f35 100644
--- a/src/riemann.h
+++ b/src/riemann.h
@@ -27,18 +27,17 @@
 #include "stdio.h"
 #include "stdlib.h"
 
-/* Check that we use an ideal equation of state, since other equations of state
-   are not compatible with these Riemann solvers. */
-#ifndef EOS_IDEAL_GAS
-#error Currently there are no Riemann solvers that can handle the requested \
-       equation of state. Select an ideal gas equation of state if you want to \
-       use this hydro scheme!
-#endif
-
 #if defined(RIEMANN_SOLVER_EXACT)
 
 #define RIEMANN_SOLVER_IMPLEMENTATION "Exact Riemann solver (Toro 2009)"
+#if defined(EOS_IDEAL_GAS)
 #include "riemann/riemann_exact.h"
+#elif defined(EOS_ISOTHERMAL_GAS)
+#include "riemann/riemann_exact_isothermal.h"
+#else
+#error \
+    "The Exact Riemann solver is incompatible with the selected equation of state!"
+#endif
 
 #elif defined(RIEMANN_SOLVER_TRRS)
 
diff --git a/src/riemann/riemann_exact_isothermal.h b/src/riemann/riemann_exact_isothermal.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e8a6a4dcce456b55f85148bed7342ad4e651c1b
--- /dev/null
+++ b/src/riemann/riemann_exact_isothermal.h
@@ -0,0 +1,450 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_RIEMANN_EXACT_ISOTHERMAL_H
+#define SWIFT_RIEMANN_EXACT_ISOTHERMAL_H
+
+#include <float.h>
+#include "adiabatic_index.h"
+#include "minmax.h"
+#include "riemann_vacuum.h"
+
+#define const_isothermal_soundspeed \
+  sqrtf(hydro_gamma_minus_one* const_isothermal_internal_energy)
+
+/**
+ * @brief Relative difference between the middle state velocity and the left or
+ * right state velocity used in the middle state density iteration.
+ *
+ * @param rho Current estimate of the middle state density.
+ * @param W Left or right state vector.
+ * @return Density dependent part of the middle state velocity.
+ */
+__attribute__((always_inline)) INLINE static float riemann_fb(float rho,
+                                                              float* W) {
+  if (rho < W[0]) {
+    return const_isothermal_soundspeed * logf(rho / W[0]);
+  } else {
+    return const_isothermal_soundspeed *
+           (sqrtf(rho / W[0]) - sqrtf(W[0] / rho));
+  }
+}
+
+/**
+ * @brief Derivative w.r.t. rho of the function riemann_fb.
+ *
+ * @param rho Current estimate of the middle state density.
+ * @param W Left or right state vector.
+ * @return Derivative of riemann_fb.
+ */
+__attribute__((always_inline)) INLINE static float riemann_fprimeb(float rho,
+                                                                   float* W) {
+  if (rho < W[0]) {
+    return const_isothermal_soundspeed * W[0] / rho;
+  } else {
+    return 0.5 * const_isothermal_soundspeed *
+           (sqrtf(rho / W[0]) + sqrtf(W[0] / rho)) / rho;
+  }
+}
+
+/**
+ * @brief Difference between the left and right middle state velocity estimates.
+ *
+ * Since the middle state velocity takes on a constant value, we want to get
+ * this difference as close to zero as possible.
+ *
+ * @param rho Current estimate of the middle state density.
+ * @param WL Left state vector.
+ * @param WR Right state vector.
+ * @param vL Left state velocity along the interface normal.
+ * @param vR Right state velocity along the interface normal.
+ * @return Difference between the left and right middle state velocity
+ * estimates.
+ */
+__attribute__((always_inline)) INLINE static float riemann_f(
+    float rho, float* WL, float* WR, float vL, float vR) {
+  return riemann_fb(rho, WR) + riemann_fb(rho, WL) + vR - vL;
+}
+
+/**
+ * @brief Derivative of riemann_f w.r.t. rho.
+ *
+ * @param rho Current estimate of the middle state density.
+ * @param WL Left state vector.
+ * @param WR Right state vector.
+ * @return Derivative of riemann_f.
+ */
+__attribute__((always_inline)) INLINE static float riemann_fprime(float rho,
+                                                                  float* WL,
+                                                                  float* WR) {
+  return riemann_fprimeb(rho, WL) + riemann_fprimeb(rho, WR);
+}
+
+/**
+ * @brief Get a good first guess for the middle state density.
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ */
+__attribute__((always_inline)) INLINE static float riemann_guess_rho(float* WL,
+                                                                     float* WR,
+                                                                     float vL,
+                                                                     float vR) {
+
+  /* Currently three possibilities and not really an algorithm to decide which
+     one to choose: */
+  /* just the average */
+  return 0.5f * (WL[0] + WR[0]);
+
+  /* two rarefaction approximation */
+  return sqrtf(WL[0] * WR[0] * expf((vL - vR) / const_isothermal_soundspeed));
+
+  /* linearized primitive variable approximation */
+  return 0.25f * (WL[0] + WR[0]) * (vL - vR) / const_isothermal_soundspeed +
+         0.5f * (WL[0] + WR[0]);
+}
+
+/**
+ * @brief Find the zeropoint of riemann_f(rho) using Brent's method.
+ *
+ * @param lower_limit Lower limit for the method (riemann_f(lower_limit) < 0)
+ * @param upper_limit Upper limit for the method (riemann_f(upper_limit) > 0)
+ * @param lowf Value of riemann_f(lower_limit).
+ * @param upf  Value of riemann_f(upper_limit).
+ * @param error_tol Tolerance used to decide if the solution is converged
+ * @param WL Left state vector
+ * @param WR Right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ */
+__attribute__((always_inline)) INLINE static float riemann_solve_brent(
+    float lower_limit, float upper_limit, float lowf, float upf,
+    float error_tol, float* WL, float* WR, float vL, float vR) {
+
+  float a, b, c, d, s;
+  float fa, fb, fc, fs;
+  float tmp, tmp2;
+  int mflag;
+  int i;
+
+  a = lower_limit;
+  b = upper_limit;
+  c = 0.0f;
+  d = FLT_MAX;
+
+  fa = lowf;
+  fb = upf;
+
+  fc = 0.0f;
+  s = 0.0f;
+  fs = 0.0f;
+
+  /* if f(a) f(b) >= 0 then error-exit */
+  if (fa * fb >= 0.0f) {
+    error(
+        "Brent's method called with equal sign function values!\n"
+        "f(%g) = %g, f(%g) = %g\n",
+        a, fa, b, fb);
+    /* return NaN */
+    return 0.0f / 0.0f;
+  }
+
+  /* if |f(a)| < |f(b)| then swap (a,b) */
+  if (fabs(fa) < fabs(fb)) {
+    tmp = a;
+    a = b;
+    b = tmp;
+    tmp = fa;
+    fa = fb;
+    fb = tmp;
+  }
+
+  c = a;
+  fc = fa;
+  mflag = 1;
+  i = 0;
+
+  while (!(fb == 0.0f) && (fabs(a - b) > error_tol * 0.5f * (a + b))) {
+    if ((fa != fc) && (fb != fc)) /* Inverse quadratic interpolation */
+      s = a * fb * fc / (fa - fb) / (fa - fc) +
+          b * fa * fc / (fb - fa) / (fb - fc) +
+          c * fa * fb / (fc - fa) / (fc - fb);
+    else
+      /* Secant Rule */
+      s = b - fb * (b - a) / (fb - fa);
+
+    tmp2 = 0.25f * (3.0f * a + b);
+    if (!(((s > tmp2) && (s < b)) || ((s < tmp2) && (s > b))) ||
+        (mflag && (fabs(s - b) >= (0.5f * fabs(b - c)))) ||
+        (!mflag && (fabs(s - b) >= (0.5f * fabs(c - d)))) ||
+        (mflag && (fabs(b - c) < error_tol)) ||
+        (!mflag && (fabs(c - d) < error_tol))) {
+      s = 0.5f * (a + b);
+      mflag = 1;
+    } else {
+      mflag = 0;
+    }
+    fs = riemann_f(s, WL, WR, vL, vR);
+    d = c;
+    c = b;
+    fc = fb;
+    if (fa * fs < 0.) {
+      b = s;
+      fb = fs;
+    } else {
+      a = s;
+      fa = fs;
+    }
+
+    /* if |f(a)| < |f(b)| then swap (a,b) */
+    if (fabs(fa) < fabs(fb)) {
+      tmp = a;
+      a = b;
+      b = tmp;
+      tmp = fa;
+      fa = fb;
+      fb = tmp;
+    }
+    i++;
+  }
+  return b;
+}
+
+/**
+ * @brief Solve the Riemann problem between the given left and right state and
+ * along the given interface normal
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param Whalf Empty state vector in which the result will be stored
+ * @param n_unit Normal vector of the interface
+ */
+__attribute__((always_inline)) INLINE static void riemann_solver_solve(
+    float* WL, float* WR, float* Whalf, float* n_unit) {
+
+  /* velocity of the left and right state in a frame aligned with n_unit */
+  float vL, vR, vhalf;
+  /* variables used for finding rhostar */
+  float rho, rhoguess, frho, frhoguess;
+  /* variables used for sampling the solution */
+  float u, S, SH, ST;
+
+  int errorFlag = 0;
+
+  /* sanity checks */
+  if (WL[0] != WL[0]) {
+    printf("NaN WL!\n");
+    errorFlag = 1;
+  }
+  if (WR[0] != WR[0]) {
+    printf("NaN WR!\n");
+    errorFlag = 1;
+  }
+  if (WL[0] < 0.0f) {
+    printf("Negative WL!\n");
+    errorFlag = 1;
+  }
+  if (WR[0] < 0.0f) {
+    printf("Negative WR!\n");
+    errorFlag = 1;
+  }
+  if (errorFlag) {
+    printf("WL: %g %g %g %g %g\n", WL[0], WL[1], WL[2], WL[3], WL[4]);
+    printf("WR: %g %g %g %g %g\n", WR[0], WR[1], WR[2], WR[3], WR[4]);
+    error("Riemman solver input error!\n");
+  }
+
+  /* calculate velocities in interface frame */
+  vL = WL[1] * n_unit[0] + WL[2] * n_unit[1] + WL[3] * n_unit[2];
+  vR = WR[1] * n_unit[0] + WR[2] * n_unit[1] + WR[3] * n_unit[2];
+
+  /* VACUUM... */
+
+  rho = 0.;
+  /* obtain a first guess for p */
+  rhoguess = riemann_guess_rho(WL, WR, vL, vR);
+  frho = riemann_f(rho, WL, WR, vL, vR);
+  frhoguess = riemann_f(rhoguess, WL, WR, vL, vR);
+  /* ok, rhostar is close to 0, better use Brent's method... */
+  /* we use Newton-Raphson until we find a suitable interval */
+  if (frho * frhoguess >= 0.0f) {
+    /* Newton-Raphson until convergence or until suitable interval is found
+       to use Brent's method */
+    unsigned int counter = 0;
+    while (fabs(rho - rhoguess) > 1.e-6f * 0.5f * (rho + rhoguess) &&
+           frhoguess < 0.0f) {
+      rho = rhoguess;
+      rhoguess = rhoguess - frhoguess / riemann_fprime(rhoguess, WL, WR);
+      frhoguess = riemann_f(rhoguess, WL, WR, vL, vR);
+      counter++;
+      if (counter > 1000) {
+        error("Stuck in Newton-Raphson!\n");
+      }
+    }
+  }
+  /* As soon as there is a suitable interval: use Brent's method */
+  if (1.e6 * fabs(rho - rhoguess) > 0.5f * (rho + rhoguess) &&
+      frhoguess > 0.0f) {
+    rho = 0.0f;
+    frho = riemann_f(rho, WL, WR, vL, vR);
+    /* use Brent's method to find the zeropoint */
+    rho = riemann_solve_brent(rho, rhoguess, frho, frhoguess, 1.e-6, WL, WR, vL,
+                              vR);
+  } else {
+    rho = rhoguess;
+  }
+
+  /* calculate the middle state velocity */
+  u = 0.5f * (vL - riemann_fb(rho, WL) + vR + riemann_fb(rho, WR));
+
+  /* sample the solution */
+  if (u > 0.0f) {
+    /* left state */
+    Whalf[1] = WL[1];
+    Whalf[2] = WL[2];
+    Whalf[3] = WL[3];
+    if (WL[0] < rho) {
+      /* left shock wave */
+      S = vL - const_isothermal_soundspeed * sqrtf(rho / WL[0]);
+      if (S >= 0.) {
+        /* to the left of the shock */
+        Whalf[0] = WL[0];
+        vhalf = 0.0f;
+      } else {
+        /* to the right of the shock */
+        Whalf[0] = rho;
+        vhalf = u - vL;
+      }
+    } else {
+      /* left rarefaction wave */
+      SH = vL - const_isothermal_soundspeed;
+      ST = u - const_isothermal_soundspeed;
+      if (SH > 0.) {
+        /* to the left of the rarefaction */
+        Whalf[0] = WL[0];
+        vhalf = 0.0f;
+      } else if (ST > 0.0f) {
+        /* inside the rarefaction */
+        Whalf[0] = WL[0] * expf(vL / const_isothermal_soundspeed - 1.0f);
+        vhalf = const_isothermal_soundspeed - vL;
+      } else {
+        /* to the right of the rarefaction */
+        Whalf[0] = rho;
+        vhalf = u - vL;
+      }
+    }
+  } else {
+    /* right state */
+    Whalf[1] = WR[1];
+    Whalf[2] = WR[2];
+    Whalf[3] = WR[3];
+    if (WR[0] < rho) {
+      /* right shock wave */
+      S = vR + const_isothermal_soundspeed * sqrtf(rho / WR[0]);
+      if (S > 0.0f) {
+        /* to the left of the shock wave: middle state */
+        Whalf[0] = rho;
+        vhalf = u - vR;
+      } else {
+        /* to the right of the shock wave: right state */
+        Whalf[0] = WR[0];
+        vhalf = 0.0f;
+      }
+    } else {
+      /* right rarefaction wave */
+      SH = vR + const_isothermal_soundspeed;
+      ST = u + const_isothermal_soundspeed;
+      if (ST > 0.0f) {
+        /* to the left of rarefaction: middle state */
+        Whalf[0] = rho;
+        vhalf = u - vR;
+      } else if (SH > 0.0f) {
+        /* inside rarefaction */
+        Whalf[0] = WR[0] * expf(-vR / const_isothermal_soundspeed - 1.0f);
+        vhalf = -const_isothermal_soundspeed - vR;
+      } else {
+        /* to the right of rarefaction: right state */
+        Whalf[0] = WR[0];
+        vhalf = 0.0f;
+      }
+    }
+  }
+
+  /* add the velocity solution along the interface normal to the velocities */
+  Whalf[1] += vhalf * n_unit[0];
+  Whalf[2] += vhalf * n_unit[1];
+  Whalf[3] += vhalf * n_unit[2];
+
+  /* the pressure is completely irrelevant in this case */
+  Whalf[4] =
+      Whalf[0] * const_isothermal_soundspeed * const_isothermal_soundspeed;
+}
+
+__attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
+    float* Wi, float* Wj, float* n_unit, float* vij, float* totflux) {
+
+  float Whalf[5];
+  float flux[5][3];
+  float vtot[3];
+  float rhoe;
+
+  riemann_solver_solve(Wi, Wj, Whalf, n_unit);
+
+  flux[0][0] = Whalf[0] * Whalf[1];
+  flux[0][1] = Whalf[0] * Whalf[2];
+  flux[0][2] = Whalf[0] * Whalf[3];
+
+  vtot[0] = Whalf[1] + vij[0];
+  vtot[1] = Whalf[2] + vij[1];
+  vtot[2] = Whalf[3] + vij[2];
+  flux[1][0] = Whalf[0] * vtot[0] * Whalf[1] + Whalf[4];
+  flux[1][1] = Whalf[0] * vtot[0] * Whalf[2];
+  flux[1][2] = Whalf[0] * vtot[0] * Whalf[3];
+  flux[2][0] = Whalf[0] * vtot[1] * Whalf[1];
+  flux[2][1] = Whalf[0] * vtot[1] * Whalf[2] + Whalf[4];
+  flux[2][2] = Whalf[0] * vtot[1] * Whalf[3];
+  flux[3][0] = Whalf[0] * vtot[2] * Whalf[1];
+  flux[3][1] = Whalf[0] * vtot[2] * Whalf[2];
+  flux[3][2] = Whalf[0] * vtot[2] * Whalf[3] + Whalf[4];
+
+  /* eqn. (15) */
+  /* F_P = \rho e ( \vec{v} - \vec{v_{ij}} ) + P \vec{v} */
+  /* \rho e = P / (\gamma-1) + 1/2 \rho \vec{v}^2 */
+  rhoe = Whalf[4] / hydro_gamma_minus_one +
+         0.5f * Whalf[0] *
+             (vtot[0] * vtot[0] + vtot[1] * vtot[1] + vtot[2] * vtot[2]);
+  flux[4][0] = rhoe * Whalf[1] + Whalf[4] * vtot[0];
+  flux[4][1] = rhoe * Whalf[2] + Whalf[4] * vtot[1];
+  flux[4][2] = rhoe * Whalf[3] + Whalf[4] * vtot[2];
+
+  totflux[0] =
+      flux[0][0] * n_unit[0] + flux[0][1] * n_unit[1] + flux[0][2] * n_unit[2];
+  totflux[1] =
+      flux[1][0] * n_unit[0] + flux[1][1] * n_unit[1] + flux[1][2] * n_unit[2];
+  totflux[2] =
+      flux[2][0] * n_unit[0] + flux[2][1] * n_unit[1] + flux[2][2] * n_unit[2];
+  totflux[3] =
+      flux[3][0] * n_unit[0] + flux[3][1] * n_unit[1] + flux[3][2] * n_unit[2];
+  totflux[4] =
+      flux[4][0] * n_unit[0] + flux[4][1] * n_unit[1] + flux[4][2] * n_unit[2];
+}
+
+#endif /* SWIFT_RIEMANN_EXACT_ISOTHERMAL_H */
diff --git a/src/riemann/riemann_hllc.h b/src/riemann/riemann_hllc.h
index 1a4c3f5f8338f6504a8c5f1d9eab8451eb20246c..962fb8380ad9919d7b7ad96325a12f0bfc53d255 100644
--- a/src/riemann/riemann_hllc.h
+++ b/src/riemann/riemann_hllc.h
@@ -24,6 +24,11 @@
 #include "minmax.h"
 #include "riemann_vacuum.h"
 
+#ifndef EOS_IDEAL_GAS
+#error \
+    "The HLLC Riemann solver currently only supports and ideal gas equation of state. Either select this equation of state, or try using another Riemann solver!"
+#endif
+
 __attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
     float *WL, float *WR, float *n, float *vij, float *totflux) {
 
diff --git a/src/riemann/riemann_trrs.h b/src/riemann/riemann_trrs.h
index b13a76b4c57af548497780e974e5c9ee3a721fac..be56b93583b693d7d492e0c8b1357cdbe57e88b5 100644
--- a/src/riemann/riemann_trrs.h
+++ b/src/riemann/riemann_trrs.h
@@ -23,6 +23,11 @@
 #include "adiabatic_index.h"
 #include "riemann_vacuum.h"
 
+#ifndef EOS_IDEAL_GAS
+#error \
+    "The TRRS Riemann solver currently only supports an ideal gas equation of state. Either select this equation of state, or try using another Riemann solver!"
+#endif
+
 /**
  * @brief Solve the Riemann problem using the Two Rarefaction Riemann Solver
  *
diff --git a/src/runner.c b/src/runner.c
index e036030c772847a5c0a0ecb81b6aea0b01258d75..f5efc99d492be837509e50bd2674ab6923404446 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -184,12 +184,12 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
   for (int i = 0; i < gcount; i++) {
 
     /* Get a direct pointer on the part. */
-    struct gpart *restrict g = &gparts[i];
+    struct gpart *restrict gp = &gparts[i];
 
     /* Is this part within the time step? */
-    if (g->ti_end <= ti_current) {
+    if (gp->ti_end <= ti_current) {
 
-      external_gravity_acceleration(time, potential, constants, g);
+      external_gravity_acceleration(time, potential, constants, gp);
     }
   }
 
@@ -747,111 +747,99 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
 }
 
 /**
- * @brief Drift particles and g-particles in a cell forward in time
+ * @brief Drift particles and g-particles in a cell forward in time,
+ *              unskipping any tasks associated with active cells.
  *
  * @param c The cell.
  * @param e The engine.
+ * @param drift whether to actually drift the particles, will not be
+ *              necessary for non-local cells.
  */
-static void runner_do_drift(struct cell *c, struct engine *e) {
+static void runner_do_drift(struct cell *c, struct engine *e, int drift) {
+
+  const int ti_current = e->ti_current;
+
+  /* Unskip any active tasks. */
+  if (c->ti_end_min == e->ti_current) {
+    const int forcerebuild = cell_unskip_tasks(c, &e->sched);
+    if (forcerebuild) atomic_inc(&e->forcerebuild);
+  }
+
+  /* Do we really need to drift? */
+  if (drift) {
+    if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return;
+  } else {
+
+    /* Not drifting, but may still need to recurse for task skipping. */
+    if (c->split) {
+      for (int k = 0; k < 8; k++) {
+        if (c->progeny[k] != NULL) {
+          struct cell *cp = c->progeny[k];
+          runner_do_drift(cp, e, 0);
+        }
+      }
+    }
+    return;
+  }
 
   const double timeBase = e->timeBase;
   const int ti_old = c->ti_old;
-  const int ti_current = e->ti_current;
   struct part *const parts = c->parts;
   struct xpart *const xparts = c->xparts;
   struct gpart *const gparts = c->gparts;
 
-  /* Do we need to drift ? */
-  if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return;
-
-  /* Check that we are actually going to move forward. */
-  if (ti_current == ti_old) return;
-
   /* Drift from the last time the cell was drifted to the current time */
   const double dt = (ti_current - ti_old) * timeBase;
-
   float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
-  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0;
-  double entropy = 0.0, mass = 0.0;
-  double mom[3] = {0.0, 0.0, 0.0};
-  double ang_mom[3] = {0.0, 0.0, 0.0};
 
   /* No children? */
   if (!c->split) {
 
-    /* Loop over all the g-particles in the cell */
-    const size_t nr_gparts = c->gcount;
-    for (size_t k = 0; k < nr_gparts; k++) {
-
-      /* Get a handle on the gpart. */
-      struct gpart *const gp = &gparts[k];
-
-      /* Drift... */
-      drift_gpart(gp, dt, timeBase, ti_old, ti_current);
-
-      /* Compute (square of) motion since last cell construction */
-      const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
-                        gp->x_diff[1] * gp->x_diff[1] +
-                        gp->x_diff[2] * gp->x_diff[2];
-      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
-    }
-
-    /* Loop over all the particles in the cell (more work for these !) */
-    const size_t nr_parts = c->count;
-    for (size_t k = 0; k < nr_parts; k++) {
-
-      /* Get a handle on the part. */
-      struct part *const p = &parts[k];
-      struct xpart *const xp = &xparts[k];
+    /* Check that we are actually going to move forward. */
+    if (ti_current >= ti_old) {
 
-      /* Drift... */
-      drift_part(p, xp, dt, timeBase, ti_old, ti_current);
+      /* Loop over all the g-particles in the cell */
+      const size_t nr_gparts = c->gcount;
+      for (size_t k = 0; k < nr_gparts; k++) {
 
-      /* Compute (square of) motion since last cell construction */
-      const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
-                        xp->x_diff[1] * xp->x_diff[1] +
-                        xp->x_diff[2] * xp->x_diff[2];
-      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
+        /* Get a handle on the gpart. */
+        struct gpart *const gp = &gparts[k];
 
-      /* Maximal smoothing length */
-      h_max = (h_max > p->h) ? h_max : p->h;
+        /* Drift... */
+        drift_gpart(gp, dt, timeBase, ti_old, ti_current);
 
-      /* Now collect quantities for statistics */
-
-      const float half_dt =
-          (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
-      const double x[3] = {p->x[0], p->x[1], p->x[2]};
-      const float v[3] = {xp->v_full[0] + p->a_hydro[0] * half_dt,
-                          xp->v_full[1] + p->a_hydro[1] * half_dt,
-                          xp->v_full[2] + p->a_hydro[2] * half_dt};
+        /* Compute (square of) motion since last cell construction */
+        const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
+                          gp->x_diff[1] * gp->x_diff[1] +
+                          gp->x_diff[2] * gp->x_diff[2];
+        dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
+      }
 
-      const float m = hydro_get_mass(p);
+      /* Loop over all the particles in the cell */
+      const size_t nr_parts = c->count;
+      for (size_t k = 0; k < nr_parts; k++) {
 
-      /* Collect mass */
-      mass += m;
+        /* Get a handle on the part. */
+        struct part *const p = &parts[k];
+        struct xpart *const xp = &xparts[k];
 
-      /* Collect momentum */
-      mom[0] += m * v[0];
-      mom[1] += m * v[1];
-      mom[2] += m * v[2];
+        /* Drift... */
+        drift_part(p, xp, dt, timeBase, ti_old, ti_current);
 
-      /* Collect angular momentum */
-      ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
-      ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
-      ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);
+        /* Compute (square of) motion since last cell construction */
+        const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
+                          xp->x_diff[1] * xp->x_diff[1] +
+                          xp->x_diff[2] * xp->x_diff[2];
+        dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
 
-      /* Collect energies. */
-      e_kin += 0.5 * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
-      e_pot += 0.;
-      e_int += m * hydro_get_internal_energy(p, half_dt);
-      e_rad += cooling_get_radiated_energy(xp);
+        /* Maximal smoothing length */
+        h_max = (h_max > p->h) ? h_max : p->h;
+      }
 
-      /* Collect entropy */
-      entropy += m * hydro_get_entropy(p, half_dt);
-    }
+      /* Now, get the maximal particle motion from its square */
+      dx_max = sqrtf(dx2_max);
 
-    /* Now, get the maximal particle motion from its square */
-    dx_max = sqrtf(dx2_max);
+    } /* Check that we are actually going to move forward. */
   }
 
   /* Otherwise, aggregate data from children. */
@@ -863,40 +851,15 @@ static void runner_do_drift(struct cell *c, struct engine *e) {
         struct cell *cp = c->progeny[k];
 
         /* Recurse. */
-        runner_do_drift(cp, e);
-
+        runner_do_drift(cp, e, drift);
         dx_max = max(dx_max, cp->dx_max);
         h_max = max(h_max, cp->h_max);
-        mass += cp->mass;
-        e_kin += cp->e_kin;
-        e_int += cp->e_int;
-        e_pot += cp->e_pot;
-        e_rad += cp->e_rad;
-        entropy += cp->entropy;
-        mom[0] += cp->mom[0];
-        mom[1] += cp->mom[1];
-        mom[2] += cp->mom[2];
-        ang_mom[0] += cp->ang_mom[0];
-        ang_mom[1] += cp->ang_mom[1];
-        ang_mom[2] += cp->ang_mom[2];
       }
   }
 
   /* Store the values */
   c->h_max = h_max;
   c->dx_max = dx_max;
-  c->mass = mass;
-  c->e_kin = e_kin;
-  c->e_int = e_int;
-  c->e_pot = e_pot;
-  c->e_rad = e_rad;
-  c->entropy = entropy;
-  c->mom[0] = mom[0];
-  c->mom[1] = mom[1];
-  c->mom[2] = mom[2];
-  c->ang_mom[0] = ang_mom[0];
-  c->ang_mom[1] = ang_mom[1];
-  c->ang_mom[2] = ang_mom[2];
 
   /* Update the time of the last drift */
   c->ti_old = ti_current;
@@ -918,9 +881,11 @@ void runner_do_drift_mapper(void *map_data, int num_elements,
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &cells[ind];
-
-    /* Only drift local particles. */
-    if (c != NULL && c->nodeID == e->nodeID) runner_do_drift(c, e);
+#ifdef WITH_MPI
+    if (c != NULL) runner_do_drift(c, e, (c->nodeID == e->nodeID));
+#else
+    if (c != NULL) runner_do_drift(c, e, 1);
+#endif
   }
 }
 
@@ -1268,6 +1233,35 @@ void *runner_main(void *data) {
       struct cell *cj = t->cj;
       t->rid = r->cpuid;
 
+/* Check that we haven't scheduled an inactive task */
+#ifdef SWIFT_DEBUG_CHECKS
+      if (cj == NULL) { /* self */
+        if (ci->ti_end_min > e->ti_current && t->type != task_type_sort)
+          error(
+              "Task (type='%s/%s') should have been skipped ti_current=%d "
+              "c->ti_end_min=%d",
+              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
+              ci->ti_end_min);
+
+        /* Special case for sorts */
+        if (ci->ti_end_min > e->ti_current && t->type == task_type_sort &&
+            t->flags == 0)
+          error(
+              "Task (type='%s/%s') should have been skipped ti_current=%d "
+              "c->ti_end_min=%d t->flags=%d",
+              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
+              ci->ti_end_min, t->flags);
+
+      } else { /* pair */
+        if (ci->ti_end_min > e->ti_current && cj->ti_end_min > e->ti_current)
+          error(
+              "Task (type='%s/%s') should have been skipped ti_current=%d "
+              "ci->ti_end_min=%d cj->ti_end_min=%d",
+              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
+              ci->ti_end_min, cj->ti_end_min);
+      }
+#endif
+
       /* Different types of tasks... */
       switch (t->type) {
         case task_type_self:
@@ -1280,9 +1274,12 @@ void *runner_main(void *data) {
             runner_doself2_force(r, ci);
           else if (t->subtype == task_subtype_grav)
             runner_doself_grav(r, ci, 1);
+          else if (t->subtype == task_subtype_external_grav)
+            runner_do_grav_external(r, ci, 1);
           else
             error("Unknown task subtype.");
           break;
+
         case task_type_pair:
           if (t->subtype == task_subtype_density)
             runner_dopair1_density(r, ci, cj);
@@ -1297,9 +1294,7 @@ void *runner_main(void *data) {
           else
             error("Unknown task subtype.");
           break;
-        case task_type_sort:
-          runner_do_sort(r, ci, t->flags, 1);
-          break;
+
         case task_type_sub_self:
           if (t->subtype == task_subtype_density)
             runner_dosub_self1_density(r, ci, 1);
@@ -1311,9 +1306,12 @@ void *runner_main(void *data) {
             runner_dosub_self2_force(r, ci, 1);
           else if (t->subtype == task_subtype_grav)
             runner_dosub_grav(r, ci, cj, 1);
+          else if (t->subtype == task_subtype_external_grav)
+            runner_do_grav_external(r, ci, 1);
           else
             error("Unknown task subtype.");
           break;
+
         case task_type_sub_pair:
           if (t->subtype == task_subtype_density)
             runner_dosub_pair1_density(r, ci, cj, t->flags, 1);
@@ -1328,6 +1326,10 @@ void *runner_main(void *data) {
           else
             error("Unknown task subtype.");
           break;
+
+        case task_type_sort:
+          runner_do_sort(r, ci, t->flags, 1);
+          break;
         case task_type_init:
           runner_do_init(r, ci, 1);
           break;
@@ -1371,9 +1373,6 @@ void *runner_main(void *data) {
         case task_type_grav_fft:
           runner_do_grav_fft(r);
           break;
-        case task_type_grav_external:
-          runner_do_grav_external(r, t->ci, 1);
-          break;
         case task_type_cooling:
           runner_do_cooling(r, t->ci, 1);
           break;
diff --git a/src/scheduler.c b/src/scheduler.c
index f98f8824c00186956e8abe5e5c2aa3aae73abf4e..44790fcd2fa5f67e6f325ba5849da19e35ab285a 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -50,6 +50,12 @@
 #include "task.h"
 #include "timers.h"
 
+/**
+ * @brief Re-set the list of active tasks.
+ */
+
+void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; }
+
 /**
  * @brief Add an unlock_task to the given task.
  *
@@ -697,7 +703,7 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
   t->wait = wait;
   t->ci = ci;
   t->cj = cj;
-  t->skip = 0;
+  t->skip = 1; /* Mark tasks as skip by default. */
   t->tight = tight;
   t->implicit = 0;
   t->weight = 0;
@@ -863,6 +869,7 @@ void scheduler_reset(struct scheduler *s, int size) {
     /* Free existing task lists if necessary. */
     if (s->tasks != NULL) free(s->tasks);
     if (s->tasks_ind != NULL) free(s->tasks_ind);
+    if (s->tid_active != NULL) free(s->tid_active);
 
     /* Allocate the new lists. */
     if (posix_memalign((void *)&s->tasks, task_align,
@@ -871,6 +878,9 @@ void scheduler_reset(struct scheduler *s, int size) {
 
     if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL)
       error("Failed to allocate task lists.");
+
+    if ((s->tid_active = (int *)malloc(sizeof(int) * size)) == NULL)
+      error("Failed to allocate aactive task lists.");
   }
 
   /* Reset the counters. */
@@ -882,6 +892,7 @@ void scheduler_reset(struct scheduler *s, int size) {
   s->submask = 0;
   s->nr_unlocks = 0;
   s->completed_unlock_writes = 0;
+  s->active_count = 0;
 
   /* Set the task pointers in the queues. */
   for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks;
@@ -891,11 +902,11 @@ void scheduler_reset(struct scheduler *s, int size) {
  * @brief Compute the task weights
  *
  * @param s The #scheduler.
- * @param verbose Are we talkative ?
+ * @param verbose Are we talkative?
  */
+
 void scheduler_reweight(struct scheduler *s, int verbose) {
 
-  const ticks tic = getticks();
   const int nr_tasks = s->nr_tasks;
   int *tid = s->tasks_ind;
   struct task *tasks = s->tasks;
@@ -904,6 +915,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
                                0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
                                0.5788, 0.4025, 0.5788};
   const float wscale = 0.001;
+  const ticks tic = getticks();
 
   /* Run through the tasks backwards and set their weights. */
   for (int k = nr_tasks - 1; k >= 0; k--) {
@@ -1033,33 +1045,84 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements,
 void scheduler_start(struct scheduler *s, unsigned int mask,
                      unsigned int submask) {
 
-  // ticks tic = getticks();
-
   /* Store the masks */
   s->mask = mask;
   s->submask = submask | (1 << task_subtype_none);
 
-  /* Clear all the waits and rids. */
+  /* Clear all the waits, rids, and times. */
   for (int k = 0; k < s->nr_tasks; k++) {
     s->tasks[k].wait = 1;
     s->tasks[k].rid = -1;
+    s->tasks[k].tic = 0;
+    s->tasks[k].toc = 0;
+    if (((1 << s->tasks[k].type) & mask) == 0 ||
+        ((1 << s->tasks[k].subtype) & s->submask) == 0)
+      s->tasks[k].skip = 1;
   }
 
   /* Re-wait the tasks. */
   threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks,
                  sizeof(struct task), 1000, s);
 
+/* Check we have not missed an active task */
+#ifdef SWIFT_DEBUG_CHECKS
+
+  const int ti_current = s->space->e->ti_current;
+  for (int k = 0; k < s->nr_tasks; k++) {
+
+    struct task *t = &s->tasks[k];
+    struct cell *ci = t->ci;
+    struct cell *cj = t->cj;
+
+    if (cj == NULL) { /* self */
+
+      if (ci->ti_end_min == ti_current && t->skip && t->type != task_type_sort)
+        error(
+            "Task (type='%s/%s') should not have been skipped ti_current=%d "
+            "c->ti_end_min=%d",
+            taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
+            ci->ti_end_min);
+
+      /* Special treatment for sort tasks */
+      if (ci->ti_end_min == ti_current && t->skip &&
+          t->type == task_type_sort && t->flags == 0)
+        error(
+            "Task (type='%s/%s') should not have been skipped ti_current=%d "
+            "c->ti_end_min=%d t->flags=%d",
+            taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
+            ci->ti_end_min, t->flags);
+
+    } else { /* pair */
+
+      if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) &&
+          t->skip)
+        error(
+            "Task (type='%s/%s') should not have been skipped ti_current=%d "
+            "ci->ti_end_min=%d cj->ti_end_min=%d",
+            taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
+            ci->ti_end_min, cj->ti_end_min);
+    }
+  }
+
+#endif
+
   /* Loop over the tasks and enqueue whoever is ready. */
-  threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tasks_ind,
-                 s->nr_tasks, sizeof(int), 1000, s);
+  for (int k = 0; k < s->active_count; k++) {
+    struct task *t = &s->tasks[s->tid_active[k]];
+    if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) &&
+        ((1 << t->subtype) & s->submask)) {
+      scheduler_enqueue(s, t);
+      pthread_cond_signal(&s->sleep_cond);
+    }
+  }
+
+  /* Clear the list of active tasks. */
+  s->active_count = 0;
 
   /* To be safe, fire of one last sleep_cond in a safe way. */
   pthread_mutex_lock(&s->sleep_mutex);
   pthread_cond_broadcast(&s->sleep_cond);
   pthread_mutex_unlock(&s->sleep_mutex);
-
-  /* message("enqueueing tasks took %.3f %s." ,
-          clocks_from_ticks( getticks() - tic ), clocks_getunit()); */
 }
 
 /**
@@ -1110,20 +1173,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
         break;
       case task_type_pair:
       case task_type_sub_pair:
-        if (t->subtype == task_subtype_grav) {
-
-          qid = t->ci->gsuper->owner;
-          if (qid < 0 ||
-              s->queues[qid].count > s->queues[t->cj->gsuper->owner].count)
-            qid = t->cj->gsuper->owner;
-
-        } else {
-
-          qid = t->ci->super->owner;
-          if (qid < 0 ||
-              s->queues[qid].count > s->queues[t->cj->super->owner].count)
-            qid = t->cj->super->owner;
-        }
+        qid = t->ci->super->owner;
+        if (qid < 0 ||
+            s->queues[qid].count > s->queues[t->cj->super->owner].count)
+          qid = t->cj->super->owner;
         break;
       case task_type_recv:
 #ifdef WITH_MPI
@@ -1222,6 +1275,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
     pthread_mutex_unlock(&s->sleep_mutex);
   }
 
+  /* Mark the task as skip. */
+  t->skip = 1;
+
   /* Return the next best task. Note that we currently do not
      implement anything that does this, as getting it to respect
      priorities is too tricky and currently unnecessary. */
@@ -1437,6 +1493,7 @@ void scheduler_clean(struct scheduler *s) {
   free(s->tasks_ind);
   free(s->unlocks);
   free(s->unlock_ind);
+  free(s->tid_active);
   for (int i = 0; i < s->nr_queues; ++i) queue_clean(&s->queues[i]);
   free(s->queues);
 }
diff --git a/src/scheduler.h b/src/scheduler.h
index c4eb5e99447d623e5fb8e442efc1c254c00bfadd..22c83ae3c551d4dc7e601e23f7a5301241bd1fa4 100644
--- a/src/scheduler.h
+++ b/src/scheduler.h
@@ -33,6 +33,7 @@
 
 /* Includes. */
 #include "cell.h"
+#include "inline.h"
 #include "lock.h"
 #include "queue.h"
 #include "task.h"
@@ -83,6 +84,10 @@ struct scheduler {
   /* The task indices. */
   int *tasks_ind;
 
+  /* List of initial tasks. */
+  int *tid_active;
+  int active_count;
+
   /* The task unlocks. */
   struct task **volatile unlocks;
   int *volatile unlock_ind;
@@ -105,7 +110,24 @@ struct scheduler {
   int nodeID;
 };
 
+/* Inlined functions (for speed). */
+/**
+ * @brief Add a task to the list of active tasks.
+ *
+ * @param s The #scheduler.
+ * @param t The task to be added.
+ */
+
+__attribute__((always_inline)) INLINE static void scheduler_activate(
+    struct scheduler *s, struct task *t) {
+  if (atomic_cas(&t->skip, 1, 0)) {
+    int ind = atomic_inc(&s->active_count);
+    s->tid_active[ind] = t - s->tasks;
+  }
+}
+
 /* Function prototypes. */
+void scheduler_clear_active(struct scheduler *s);
 void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
                     int nr_queues, unsigned int flags, int nodeID,
                     struct threadpool *tp);
diff --git a/src/space.c b/src/space.c
index 1d2733ab2d4356c19cc0e65a0b9441edcf0acb80..86c8cc36eacd58c89181831b3c5472026281043c 100644
--- a/src/space.c
+++ b/src/space.c
@@ -112,6 +112,15 @@ struct parallel_sort {
   volatile unsigned int first, last, waiting;
 };
 
+/**
+ * @brief Information required to compute the particle cell indices.
+ */
+struct index_data {
+  struct space *s;
+  struct cell *cells;
+  int *ind;
+};
+
 /**
  * @brief Get the shift-id of the given pair of cells, swapping them
  *      if need be.
@@ -326,7 +335,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
           c->count = 0;
           c->gcount = 0;
           c->super = c;
-          c->gsuper = c;
           c->ti_old = ti_current;
           lock_init(&c->lock);
         }
@@ -387,9 +395,11 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       s->cells_top[k].nr_density = 0;
       s->cells_top[k].nr_gradient = 0;
       s->cells_top[k].nr_force = 0;
+      s->cells_top[k].nr_grav = 0;
       s->cells_top[k].density = NULL;
       s->cells_top[k].gradient = NULL;
       s->cells_top[k].force = NULL;
+      s->cells_top[k].grav = NULL;
       s->cells_top[k].dx_max = 0.0f;
       s->cells_top[k].sorted = 0;
       s->cells_top[k].count = 0;
@@ -398,8 +408,20 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       s->cells_top[k].extra_ghost = NULL;
       s->cells_top[k].ghost = NULL;
       s->cells_top[k].kick = NULL;
+      s->cells_top[k].cooling = NULL;
+      s->cells_top[k].sourceterms = NULL;
       s->cells_top[k].super = &s->cells_top[k];
-      s->cells_top[k].gsuper = &s->cells_top[k];
+#if WITH_MPI
+      s->cells_top[k].recv_xv = NULL;
+      s->cells_top[k].recv_rho = NULL;
+      s->cells_top[k].recv_gradient = NULL;
+      s->cells_top[k].recv_ti = NULL;
+
+      s->cells_top[k].send_xv = NULL;
+      s->cells_top[k].send_rho = NULL;
+      s->cells_top[k].send_gradient = NULL;
+      s->cells_top[k].send_ti = NULL;
+#endif
     }
     s->maxdepth = 0;
   }
@@ -432,49 +454,21 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   struct cell *restrict cells_top = s->cells_top;
   const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
 
-  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
-  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
-  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
-
   /* Run through the particles and get their cell index. */
-  // tic = getticks();
   const size_t ind_size = s->size_parts;
   int *ind;
   if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
     error("Failed to allocate temporary particle indices.");
-  for (size_t k = 0; k < nr_parts; k++) {
-    struct part *restrict p = &s->parts[k];
-    for (int j = 0; j < 3; j++)
-      if (p->x[j] < 0.0)
-        p->x[j] += dim[j];
-      else if (p->x[j] >= dim[j])
-        p->x[j] -= dim[j];
-    ind[k] =
-        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
-    cells_top[ind[k]].count++;
-  }
-  // message( "getting particle indices took %.3f %s." ,
-  // clocks_from_ticks(getticks() - tic), clocks_getunit()):
+  if (ind_size > 0) space_parts_get_cell_index(s, ind, cells_top, verbose);
+  for (size_t i = 0; i < s->nr_parts; ++i) cells_top[ind[i]].count++;
 
   /* Run through the gravity particles and get their cell index. */
-  // tic = getticks();
   const size_t gind_size = s->size_gparts;
   int *gind;
   if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
     error("Failed to allocate temporary g-particle indices.");
-  for (size_t k = 0; k < nr_gparts; k++) {
-    struct gpart *restrict gp = &s->gparts[k];
-    for (int j = 0; j < 3; j++)
-      if (gp->x[j] < 0.0)
-        gp->x[j] += dim[j];
-      else if (gp->x[j] >= dim[j])
-        gp->x[j] -= dim[j];
-    gind[k] =
-        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
-    cells_top[gind[k]].gcount++;
-  }
-// message( "getting g-particle indices took %.3f %s." ,
-// clocks_from_ticks(getticks() - tic), clocks_getunit());
+  if (gind_size > 0) space_gparts_get_cell_index(s, gind, cells_top, verbose);
+  for (size_t i = 0; i < s->nr_gparts; ++i) cells_top[gind[i]].gcount++;
 
 #ifdef WITH_MPI
 
@@ -578,6 +572,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
     ind = ind_new;
   }
 
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
+
   /* Assign each particle to its cell. */
   for (size_t k = nr_parts; k < s->nr_parts; k++) {
     const struct part *const p = &s->parts[k];
@@ -605,9 +602,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   for (size_t k = 1; k < nr_parts; k++) {
     if (ind[k - 1] > ind[k]) {
       error("Sort failed!");
-    } else if (ind[k] != cell_getid(cdim, s->parts[k].x[0] * ih[0],
-                                    s->parts[k].x[1] * ih[1],
-                                    s->parts[k].x[2] * ih[2])) {
+    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
+                                    s->parts[k].x[1] * s->iwidth[1],
+                                    s->parts[k].x[2] * s->iwidth[2])) {
       error("Incorrect indices!");
     }
   }
@@ -752,6 +749,164 @@ void space_sanitize(struct space *s) {
   }
 }
 
+/**
+ * @brief #threadpool mapper function to compute the particle cell indices.
+ *
+ * @param map_data Pointer towards the particles.
+ * @param nr_parts The number of particles to treat.
+ * @param extra_data Pointers to the space and index list
+ */
+void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
+                                       void *extra_data) {
+
+  /* Unpack the data */
+  struct part *restrict parts = (struct part *)map_data;
+  struct index_data *data = (struct index_data *)extra_data;
+  struct space *s = data->s;
+  int *const ind = data->ind + (ptrdiff_t)(parts - s->parts);
+
+  /* Get some constants */
+  const double dim_x = s->dim[0];
+  const double dim_y = s->dim[1];
+  const double dim_z = s->dim[2];
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const double ih_x = s->iwidth[0];
+  const double ih_y = s->iwidth[1];
+  const double ih_z = s->iwidth[2];
+
+  for (int k = 0; k < nr_parts; k++) {
+
+    /* Get the particle */
+    struct part *restrict p = &parts[k];
+
+    const double old_pos_x = p->x[0];
+    const double old_pos_y = p->x[1];
+    const double old_pos_z = p->x[2];
+
+    /* Put it back into the simulation volume */
+    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
+    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
+    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
+
+    /* Get its cell index */
+    const int index =
+        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
+    ind[k] = index;
+
+    /* Update the position */
+    p->x[0] = pos_x;
+    p->x[1] = pos_y;
+    p->x[2] = pos_z;
+  }
+}
+
+/**
+ * @brief #threadpool mapper function to compute the g-particle cell indices.
+ *
+ * @param map_data Pointer towards the g-particles.
+ * @param nr_gparts The number of g-particles to treat.
+ * @param extra_data Pointers to the space and index list
+ */
+void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts,
+                                        void *extra_data) {
+
+  /* Unpack the data */
+  struct gpart *restrict gparts = (struct gpart *)map_data;
+  struct index_data *data = (struct index_data *)extra_data;
+  struct space *s = data->s;
+  int *const ind = data->ind + (ptrdiff_t)(gparts - s->gparts);
+
+  /* Get some constants */
+  const double dim_x = s->dim[0];
+  const double dim_y = s->dim[1];
+  const double dim_z = s->dim[2];
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const double ih_x = s->iwidth[0];
+  const double ih_y = s->iwidth[1];
+  const double ih_z = s->iwidth[2];
+
+  for (int k = 0; k < nr_gparts; k++) {
+
+    /* Get the particle */
+    struct gpart *restrict gp = &gparts[k];
+
+    const double old_pos_x = gp->x[0];
+    const double old_pos_y = gp->x[1];
+    const double old_pos_z = gp->x[2];
+
+    /* Put it back into the simulation volume */
+    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
+    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
+    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
+
+    /* Get its cell index */
+    const int index =
+        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
+    ind[k] = index;
+
+    /* Update the position */
+    gp->x[0] = pos_x;
+    gp->x[1] = pos_y;
+    gp->x[2] = pos_z;
+  }
+}
+
+/**
+ * @brief Computes the cell index of all the particles and update the cell
+ * count.
+ *
+ * @param s The #space.
+ * @param ind The array of indices to fill.
+ * @param cells The array of #cell to update.
+ * @param verbose Are we talkative ?
+ */
+void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
+                                int verbose) {
+
+  const ticks tic = getticks();
+
+  /* Pack the extra information */
+  struct index_data data;
+  data.s = s;
+  data.cells = cells;
+  data.ind = ind;
+
+  threadpool_map(&s->e->threadpool, space_parts_get_cell_index_mapper, s->parts,
+                 s->nr_parts, sizeof(struct part), 1000, &data);
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
+/**
+ * @brief Computes the cell index of all the g-particles and update the cell
+ * gcount.
+ *
+ * @param s The #space.
+ * @param gind The array of indices to fill.
+ * @param cells The array of #cell to update.
+ * @param verbose Are we talkative ?
+ */
+void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
+                                 int verbose) {
+
+  const ticks tic = getticks();
+
+  /* Pack the extra information */
+  struct index_data data;
+  data.s = s;
+  data.cells = cells;
+  data.ind = gind;
+
+  threadpool_map(&s->e->threadpool, space_gparts_get_cell_index_mapper,
+                 s->gparts, s->nr_gparts, sizeof(struct gpart), 1000, &data);
+
+  if (verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+}
+
 /**
  * @brief Sort the particles and condensed particles according to the given
  * indices.
@@ -1337,7 +1492,6 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) {
         temp->nodeID = c->nodeID;
         temp->parent = c;
         temp->super = NULL;
-        temp->gsuper = NULL;
         c->progeny[k] = temp;
       }
 
diff --git a/src/space.h b/src/space.h
index 85f32bd7580945de6d9713316b557c92667987c0..011dfb71a6c3ac2b51093ce83bc6b65ceecc2821 100644
--- a/src/space.h
+++ b/src/space.h
@@ -170,6 +170,10 @@ void space_recycle(struct space *s, struct cell *c);
 void space_split(struct space *s, struct cell *cells, int nr_cells,
                  int verbose);
 void space_split_mapper(void *map_data, int num_elements, void *extra_data);
+void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells,
+                                int verbose);
+void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells,
+                                 int verbose);
 void space_do_parts_sort();
 void space_do_gparts_sort();
 void space_init_parts(struct space *s);
diff --git a/src/statistics.c b/src/statistics.c
new file mode 100644
index 0000000000000000000000000000000000000000..0afca84f517ca201fc184de2abff40b9ce9d5711
--- /dev/null
+++ b/src/statistics.c
@@ -0,0 +1,316 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <string.h>
+
+/* MPI headers. */
+#ifdef WITH_MPI
+#include <mpi.h>
+#endif
+
+/* This object's header. */
+#include "statistics.h"
+
+/* Local headers. */
+#include "cooling.h"
+#include "engine.h"
+#include "error.h"
+#include "hydro.h"
+#include "threadpool.h"
+
+/**
+ * @brief Information required to compute the statistics in the mapper
+ */
+struct index_data {
+  /*! The space we play with */
+  const struct space *s;
+
+  /*! The #statistics aggregator to fill */
+  struct statistics *stats;
+};
+
+/**
+ * @brief Adds the content of one #statistics aggregator to another one.
+ *
+ * Performs a += b;
+ *
+ * @param a The #statistics structure to update.
+ * @param b The #statistics structure to add to a.
+ */
+void stats_add(struct statistics *a, const struct statistics *b) {
+
+  /* Add everything */
+  a->E_kin += b->E_kin;
+  a->E_int += b->E_int;
+  a->E_pot += b->E_pot;
+  a->E_rad += b->E_rad;
+  a->entropy += b->entropy;
+  a->mass += b->mass;
+  a->mom[0] += b->mom[0];
+  a->mom[1] += b->mom[1];
+  a->mom[2] += b->mom[2];
+  a->ang_mom[0] += b->ang_mom[0];
+  a->ang_mom[1] += b->ang_mom[1];
+  a->ang_mom[2] += b->ang_mom[2];
+}
+
+/**
+ * @brief Initialises a statistics aggregator to a valid state.
+ *
+ * @param s The #statistics aggregator to initialise
+ */
+void stats_init(struct statistics *s) {
+
+  /* Zero everything */
+  bzero(s, sizeof(struct statistics));
+
+  /* Set the lock */
+  lock_init(&s->lock);
+}
+
+/**
+ * @brief The #threadpool mapper function used to collect statistics for #part.
+ *
+ * @param map_data Pointer to the particles.
+ * @param nr_parts The number of particles in this chunk
+ * @param extra_data The #statistics aggregator.
+ */
+void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) {
+
+  /* Unpack the data */
+  const struct index_data *data = (struct index_data *)extra_data;
+  const struct space *s = data->s;
+  const struct part *restrict parts = (struct part *)map_data;
+  const struct xpart *restrict xparts =
+      s->xparts + (ptrdiff_t)(parts - s->parts);
+  const int ti_current = s->e->ti_current;
+  const double timeBase = s->e->timeBase;
+  struct statistics *const global_stats = data->stats;
+
+  /* Local accumulator */
+  struct statistics stats;
+  stats_init(&stats);
+
+  /* Loop over particles */
+  for (int k = 0; k < nr_parts; k++) {
+
+    /* Get the particle */
+    const struct part *p = &parts[k];
+    const struct xpart *xp = &xparts[k];
+
+    /* Get useful variables */
+    const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
+    const double x[3] = {p->x[0], p->x[1], p->x[2]};
+    float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]};
+    if (p->gpart != NULL) {
+      a_tot[0] += p->gpart->a_grav[0];
+      a_tot[1] += p->gpart->a_grav[1];
+      a_tot[2] += p->gpart->a_grav[2];
+    }
+    const float v[3] = {xp->v_full[0] + a_tot[0] * dt,
+                        xp->v_full[1] + a_tot[1] * dt,
+                        xp->v_full[2] + a_tot[2] * dt};
+
+    const float m = hydro_get_mass(p);
+
+    /* Collect mass */
+    stats.mass += m;
+
+    /* Collect momentum */
+    stats.mom[0] += m * v[0];
+    stats.mom[1] += m * v[1];
+    stats.mom[2] += m * v[2];
+
+    /* Collect angular momentum */
+    stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
+    stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
+    stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);
+
+    /* Collect energies. */
+    stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+    stats.E_pot += 0.;
+    stats.E_int += m * hydro_get_internal_energy(p, dt);
+    stats.E_rad += cooling_get_radiated_energy(xp);
+
+    /* Collect entropy */
+    stats.entropy += m * hydro_get_entropy(p, dt);
+  }
+
+  /* Now write back to memory */
+  if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats);
+  if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats.");
+}
+
+/**
+ * @brief The #threadpool mapper function used to collect statistics for #gpart.
+ *
+ * @param map_data Pointer to the g-particles.
+ * @param nr_gparts The number of g-particles in this chunk
+ * @param extra_data The #statistics aggregator.
+ */
+void stats_collect_gpart_mapper(void *map_data, int nr_gparts,
+                                void *extra_data) {
+
+  /* Unpack the data */
+  const struct index_data *data = (struct index_data *)extra_data;
+  const struct space *s = data->s;
+  const struct gpart *restrict gparts = (struct gpart *)map_data;
+  const int ti_current = s->e->ti_current;
+  const double timeBase = s->e->timeBase;
+  struct statistics *const global_stats = data->stats;
+
+  /* Local accumulator */
+  struct statistics stats;
+  stats_init(&stats);
+
+  /* Loop over particles */
+  for (int k = 0; k < nr_gparts; k++) {
+
+    /* Get the particle */
+    const struct gpart *gp = &gparts[k];
+
+    /* If the g-particle has a counterpart, ignore it */
+    if (gp->id_or_neg_offset < 0) continue;
+
+    /* Get useful variables */
+    const float dt = (ti_current - (gp->ti_begin + gp->ti_end) / 2) * timeBase;
+    const double x[3] = {gp->x[0], gp->x[1], gp->x[2]};
+    const float v[3] = {gp->v_full[0] + gp->a_grav[0] * dt,
+                        gp->v_full[1] + gp->a_grav[1] * dt,
+                        gp->v_full[2] + gp->a_grav[2] * dt};
+
+    const float m = gp->mass;
+
+    /* Collect mass */
+    stats.mass += m;
+
+    /* Collect momentum */
+    stats.mom[0] += m * v[0];
+    stats.mom[1] += m * v[1];
+    stats.mom[2] += m * v[2];
+
+    /* Collect angular momentum */
+    stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
+    stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
+    stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);
+
+    /* Collect energies. */
+    stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+    stats.E_pot += 0.;
+  }
+
+  /* Now write back to memory */
+  if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats);
+  if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats.");
+}
+
+/**
+ * @brief Collect physical statistics over all particles in a #space.
+ *
+ * @param s The #space to collect from.
+ * @param stats The #statistics aggregator to fill.
+ */
+void stats_collect(const struct space *s, struct statistics *stats) {
+
+  /* Prepare the data */
+  struct index_data extra_data;
+  extra_data.s = s;
+  extra_data.stats = stats;
+
+  /* Run parallel collection of statistics for parts */
+  if (s->nr_parts > 0)
+    threadpool_map(&s->e->threadpool, stats_collect_part_mapper, s->parts,
+                   s->nr_parts, sizeof(struct part), 10000, &extra_data);
+
+  /* Run parallel collection of statistics for gparts */
+  if (s->nr_gparts > 0)
+    threadpool_map(&s->e->threadpool, stats_collect_gpart_mapper, s->gparts,
+                   s->nr_gparts, sizeof(struct gpart), 10000, &extra_data);
+}
+
+/**
+ * @brief Prints the content of a #statistics aggregator to a file
+ *
+ * @param file File to write to.
+ * @param stats The #statistics object to write to the file
+ * @param time The current physical time.
+ */
+void stats_print_to_file(FILE *file, const struct statistics *stats,
+                         double time) {
+
+  const double E_tot = stats->E_kin + stats->E_int + stats->E_pot;
+
+  fprintf(file,
+          " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e "
+          "%14e\n",
+          time, stats->mass, E_tot, stats->E_kin, stats->E_int, stats->E_pot,
+          stats->E_rad, stats->entropy, stats->mom[0], stats->mom[1],
+          stats->mom[2], stats->ang_mom[0], stats->ang_mom[1],
+          stats->ang_mom[2]);
+  fflush(file);
+}
+
+/* Extra stuff in MPI-land */
+#ifdef WITH_MPI
+
+/**
+ * @brief MPI datatype corresponding to the #statistics structure.
+ */
+MPI_Datatype statistics_mpi_type;
+
+/**
+ * @brief MPI operator used for the reduction of #statistics structure.
+ */
+MPI_Op statistics_mpi_reduce_op;
+
+/**
+ * @brief MPI reduce operator for #statistics structures.
+ */
+void stats_add_MPI(void *in, void *inout, int *len, MPI_Datatype *datatype) {
+
+  for (int i = 0; i < *len; ++i)
+    stats_add(&((struct statistics *)inout)[0],
+              &((const struct statistics *)in)[i]);
+}
+
+/**
+ * @brief Registers MPI #statistics type and reduction function.
+ */
+void stats_create_MPI_type() {
+
+  /* This is not the recommended way of doing this.
+     One should define the structure field by field
+     But as long as we don't do serialization via MPI-IO
+     we don't really care.
+     Also we would have to modify this function everytime something
+     is added to the statistics structure. */
+  if (MPI_Type_contiguous(sizeof(struct statistics) / sizeof(unsigned char),
+                          MPI_BYTE, &statistics_mpi_type) != MPI_SUCCESS ||
+      MPI_Type_commit(&statistics_mpi_type) != MPI_SUCCESS) {
+    error("Failed to create MPI type for statistics.");
+  }
+
+  /* Create the reduction operation */
+  MPI_Op_create(stats_add_MPI, 1, &statistics_mpi_reduce_op);
+}
+#endif
diff --git a/src/statistics.h b/src/statistics.h
new file mode 100644
index 0000000000000000000000000000000000000000..ece1f813bb8aff2b5402753bafc031696cfa562d
--- /dev/null
+++ b/src/statistics.h
@@ -0,0 +1,76 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_STATISTICS_H
+#define SWIFT_STATISTICS_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local headers. */
+#include "lock.h"
+#include "space.h"
+
+/**
+ * @brief Quantities collected for physics statistics
+ */
+struct statistics {
+
+  /*! Kinetic energy */
+  double E_kin;
+
+  /*! Internal energy */
+  double E_int;
+
+  /*! Potential energy */
+  double E_pot;
+
+  /*! Radiative energy */
+  double E_rad;
+
+  /*! Entropy */
+  double entropy;
+
+  /*! Mass */
+  double mass;
+
+  /*! Momentum */
+  double mom[3];
+
+  /*! Angular momentum */
+  double ang_mom[3];
+
+  /*! Lock for threaded access */
+  swift_lock_type lock;
+};
+
+void stats_collect(const struct space* s, struct statistics* stats);
+void stats_add(struct statistics* a, const struct statistics* b);
+void stats_print_to_file(FILE* file, const struct statistics* stats,
+                         double time);
+void stats_init(struct statistics* s);
+
+#ifdef WITH_MPI
+extern MPI_Datatype statistics_mpi_type;
+extern MPI_Op statistics_mpi_reduce_op;
+
+void stats_add_MPI(void* in, void* out, int* len, MPI_Datatype* datatype);
+void stats_create_MPI_type();
+#endif
+
+#endif /* SWIFT_STATISTICS_H */
diff --git a/src/task.c b/src/task.c
index 00068f45769b4ca606cc729bd5e89c13ae729eef..54b9363b7ac3d5c372b591c00b0b03cc274f66b5 100644
--- a/src/task.c
+++ b/src/task.c
@@ -48,13 +48,13 @@
 
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
-    "none",       "sort",    "self",          "pair",          "sub_self",
-    "sub_pair",   "init",    "ghost",         "extra_ghost",   "kick",
-    "kick_fixdt", "send",    "recv",          "grav_gather_m", "grav_fft",
-    "grav_mm",    "grav_up", "grav_external", "cooling",       "sourceterms"};
+    "none",       "sort",    "self",    "pair",          "sub_self",
+    "sub_pair",   "init",    "ghost",   "extra_ghost",   "kick",
+    "kick_fixdt", "send",    "recv",    "grav_gather_m", "grav_fft",
+    "grav_mm",    "grav_up", "cooling", "sourceterms"};
 
 const char *subtaskID_names[task_subtype_count] = {
-    "none", "density", "gradient", "force", "grav", "tend"};
+    "none", "density", "gradient", "force", "grav", "external_grav", "tend"};
 
 /**
  * @brief Computes the overlap between the parts array of two given cells.
@@ -135,6 +135,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
           break;
 
         case task_subtype_grav:
+        case task_subtype_external_grav:
           return task_action_gpart;
           break;
 
@@ -150,7 +151,14 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
     case task_type_kick_fixdt:
     case task_type_send:
     case task_type_recv:
-      return task_action_all;
+      if (t->ci->count > 0 && t->ci->gcount > 0)
+        return task_action_all;
+      else if (t->ci->count > 0)
+        return task_action_part;
+      else if (t->ci->gcount > 0)
+        return task_action_gpart;
+      else
+        error("Task without particles");
       break;
 
     case task_type_grav_gather_m:
@@ -160,10 +168,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
       return task_action_multipole;
       break;
 
-    case task_type_grav_external:
-      return task_action_gpart;
-      break;
-
     default:
       error("Unknown task_action for task");
       return task_action_none;
@@ -393,3 +397,15 @@ void task_print_submask(unsigned int submask) {
     printf(" %s=%s", subtaskID_names[k], (submask & (1 << k)) ? "yes" : "no");
   printf(" ]\n");
 }
+
+/**
+ * @brief Print basic information about a task.
+ *
+ * @param t The #task.
+ */
+void task_print(const struct task *t) {
+
+  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
+          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
+          t->nr_unlock_tasks, t->skip);
+}
diff --git a/src/task.h b/src/task.h
index bc4df3dc2a4cfee3c382e9f2059cba84f29299f7..f840c0b4b8e807dce28f6f13479dbdf4995ab66d 100644
--- a/src/task.h
+++ b/src/task.h
@@ -53,7 +53,6 @@ enum task_types {
   task_type_grav_fft,
   task_type_grav_mm,
   task_type_grav_up,
-  task_type_grav_external,
   task_type_cooling,
   task_type_sourceterms,
   task_type_count
@@ -68,6 +67,7 @@ enum task_subtypes {
   task_subtype_gradient,
   task_subtype_force,
   task_subtype_grav,
+  task_subtype_external_grav,
   task_subtype_tend,
   task_subtype_count
 } __attribute__((packed));
@@ -160,5 +160,6 @@ int task_lock(struct task *t);
 void task_print_mask(unsigned int mask);
 void task_print_submask(unsigned int submask);
 void task_do_rewait(struct task *t);
+void task_print(const struct task *t);
 
 #endif /* SWIFT_TASK_H */
diff --git a/src/version.c b/src/version.c
index 8bd94e5651dbc597fcd80bc585a47c6633ee3993..3d948e593f58a5f6df0ccbcfc536c086a6859aa3 100644
--- a/src/version.c
+++ b/src/version.c
@@ -21,6 +21,9 @@
 /* Config parameters. */
 #include "../config.h"
 
+/* Needed for gethostname() */
+#include <unistd.h>
+
 /* MPI headers. */
 #ifdef WITH_MPI
 #include <mpi.h>
@@ -47,6 +50,24 @@
 /* Local headers. */
 #include "version_string.h"
 
+/**
+ * @brief Return the hostname
+ *
+ * Will return the name of the host.
+ *
+ * @result the hostname.
+ */
+const char *hostname(void) {
+  static char buf[256];
+  static int initialised = 0;
+  if (!initialised) {
+    buf[255] = '\0';
+    if (gethostname(buf, 255)) sprintf(buf, "%s", "Unknown host");
+    initialised = 1;
+  }
+  return buf;
+}
+
 /**
  * @brief Return the source code git revision
  *
diff --git a/src/version.h b/src/version.h
index 0d568f312cf775cf932d580a49da7c19c9e14b21..07a98c5e3175efd82110efc0104336d657edd469 100644
--- a/src/version.h
+++ b/src/version.h
@@ -22,6 +22,7 @@
 
 const char* package_description(void);
 const char* package_version(void);
+const char* hostname(void);
 const char* git_revision(void);
 const char* git_branch(void);
 const char* compiler_name(void);
diff --git a/theory/SPH/EoS/eos.tex b/theory/SPH/EoS/eos.tex
new file mode 100644
index 0000000000000000000000000000000000000000..cd21d2f5d8c3eb276bae64d91970031dc1924e67
--- /dev/null
+++ b/theory/SPH/EoS/eos.tex
@@ -0,0 +1,64 @@
+
+In \swift, all transformations between thermodynamical quantities are
+computed using a pre-defined equation of state (EoS) for the gas. This
+allows user to switch EoS without having to modify the equations of
+SPH. The definition of the EoS takes the form of simple relations
+between thermodynamic quantities. All of them must be specified even
+when they default to constants.
+
+%#######################################################################################################
+\subsection{Ideal Gas}
+\label{sec:eos:ideal}
+
+This is the simplest and most common equation of state. It corresponds
+to a gas obeying a pressure law $P = (\gamma-1) \rho u$, where $u$ is
+the gas' internal energy, $\rho$ its density and $\gamma$ the
+(user defined) adiabatic index, often assumed to be $5/3$ or $7/5$. For such a gas,
+we have the following relations between pressure ($P$), internal energy
+($u$), density ($\rho$), sound speed ($c$) and entropic function ($A$):
+
+\begin{align}
+  P &= P_{\rm eos}(\rho, u) \equiv  (\gamma-1) \rho
+  u \label{eq:eos:ideal:P_from_u} \\
+  c &= c_{\rm eos}(\rho, u) \equiv \sqrt{\gamma (\gamma-1)
+    u} \label{eq:eos:ideal:c_from_u} \\
+  A &= A_{\rm eos}(\rho, u) \equiv(\gamma-1) u
+  \rho^{\gamma-1} \label{eq:eos:ideal:A_from_u} \\
+  ~ \nonumber\\
+  P &= P_{\rm eos}(\rho, A) \equiv A
+  \rho^{\gamma} \label{eq:eos:ideal:P_from_A} \\
+  c &= c_{\rm eos}(\rho, A) \equiv \sqrt{\gamma \rho^{\gamma-1}
+    A} \label{eq:eos:ideal:c_from_A} \\
+  u &= u_{\rm eos}(\rho, A) \equiv A \rho^{\gamma-1} /
+  (\gamma-1) \label{eq:eos:ideal:u_from_A}
+\end{align}
+Note that highly optimised functions to compute $x^\gamma$ and other
+useful powers of $\gamma$ have been implemented in \swift for the most
+commonly used values of $\gamma$.
+
+
+%#######################################################################################################
+\subsection{Isothermal Gas}
+\label{sec:eos:isothermal}
+
+An isothermal equation of state can be useful for certain test cases
+or to speed-up the generation of glass files. This EoS corresponds to
+a gas with contant (user-specified) thermal energy $u_{\rm cst}$. For such a gas,
+we have the following (trivial) relations between pressure ($P$), internal energy
+($u$), density ($\rho$), sound speed ($c$) and entropic function ($A$):
+
+\begin{align}
+  P &= P_{\rm eos}(\rho, u) \equiv  (\gamma-1) \rho
+  u_{\rm cst} \label{eq:eos:isothermal:P_from_u} \\
+  c &= c_{\rm eos}(\rho, u) \equiv \sqrt{\gamma (\gamma-1)
+    u_{\rm cst}} = c_{\rm cst} \label{eq:eos:isothermal:c_from_u} \\
+  A &= A_{\rm eos}(\rho, u) \equiv(\gamma-1) u_{\rm cst}
+  \rho^{\gamma-1} \label{eq:eos:isothermal:A_from_u} \\ 
+  ~ \nonumber\\
+  P &= P_{\rm eos}(\rho, A) \equiv (\gamma-1) \rho
+  u_{\rm cst} \label{eq:eos:isothermal:P_from_A} \\
+  c &= c_{\rm eos}(\rho, A) \equiv \sqrt{\gamma (\gamma-1)
+    u_{\rm cst}} = c_{\rm cst} \label{eq:eos:isothermal:c_from_A} \\
+  u &= u_{\rm eos}(\rho, A) \equiv u_{\rm
+    cst} \label{eq:eos:isothermal:u_from_A} 
+\end{align}
diff --git a/theory/SPH/EoS/eos_standalone.tex b/theory/SPH/EoS/eos_standalone.tex
new file mode 100644
index 0000000000000000000000000000000000000000..14447f7e7f7194edd5ef0996ddf27a75d7323427
--- /dev/null
+++ b/theory/SPH/EoS/eos_standalone.tex
@@ -0,0 +1,22 @@
+\documentclass[fleqn, usenatbib, useAMS,a4paper]{mnras}
+\usepackage{graphicx}
+\usepackage{amsmath,paralist,xcolor,xspace,amssymb}
+\usepackage{times}
+
+\newcommand{\swift}{{\sc Swift}\xspace}
+
+%opening
+\title{Equation of state in SWIFT}
+\author{Matthieu Schaller}
+
+\begin{document}
+
+\maketitle
+
+\input{eos}
+
+\bibliographystyle{mnras}
+\bibliography{./bibliography.bib}
+
+
+\end{document}
diff --git a/theory/SPH/EoS/run.sh b/theory/SPH/EoS/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..72ef573fb26885563e89561d172c15a5a0081564
--- /dev/null
+++ b/theory/SPH/EoS/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+python kernels.py
+pdflatex -jobname=eos eos_standalone.tex
+bibtex eos.aux
+pdflatex -jobname=eos eos_standalone.tex
+pdflatex -jobname=eos eos_standalone.tex
diff --git a/theory/SPH/Flavours/bibliography.bib b/theory/SPH/Flavours/bibliography.bib
new file mode 100644
index 0000000000000000000000000000000000000000..2bc11dacca90fe03d05c2e847503105d80eb1317
--- /dev/null
+++ b/theory/SPH/Flavours/bibliography.bib
@@ -0,0 +1,100 @@
+@ARTICLE{Price2012,
+   author = {{Price}, D.~J.},
+    title = "{Smoothed particle hydrodynamics and magnetohydrodynamics}",
+  journal = {Journal of Computational Physics},
+archivePrefix = "arXiv",
+   eprint = {1012.1885},
+ primaryClass = "astro-ph.IM",
+     year = 2012,
+    month = feb,
+   volume = 231,
+    pages = {759-794},
+      doi = {10.1016/j.jcp.2010.12.011},
+   adsurl = {http://adsabs.harvard.edu/abs/2012JCoPh.231..759P},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+@ARTICLE{Springel2005,
+   author = {{Springel}, V.},
+    title = "{The cosmological simulation code GADGET-2}",
+  journal = {\mnras},
+   eprint = {astro-ph/0505010},
+ keywords = {methods: numerical, galaxies: interactions, dark matter},
+     year = 2005,
+    month = dec,
+   volume = 364,
+    pages = {1105-1134},
+      doi = {10.1111/j.1365-2966.2005.09655.x},
+   adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+
+@ARTICLE{Hopkins2013,
+   author = {{Hopkins}, P.~F.},
+    title = "{A general class of Lagrangian smoothed particle hydrodynamics methods and implications for fluid mixing problems}",
+  journal = {\mnras},
+archivePrefix = "arXiv",
+   eprint = {1206.5006},
+ primaryClass = "astro-ph.IM",
+ keywords = {hydrodynamics, instabilities, turbulence, methods: numerical, cosmology: theory},
+     year = 2013,
+    month = feb,
+   volume = 428,
+    pages = {2840-2856},
+      doi = {10.1093/mnras/sts210},
+   adsurl = {http://adsabs.harvard.edu/abs/2013MNRAS.428.2840H},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@ARTICLE{Springel2002,
+   author = {{Springel}, V. and {Hernquist}, L.},
+    title = "{Cosmological smoothed particle hydrodynamics simulations: the entropy equation}",
+  journal = {\mnras},
+   eprint = {astro-ph/0111016},
+ keywords = {methods: numerical, galaxies: evolution, galaxies: starburst, methods: numerical, galaxies: evolution, galaxies: starburst},
+     year = 2002,
+    month = jul,
+   volume = 333,
+    pages = {649-664},
+      doi = {10.1046/j.1365-8711.2002.05445.x},
+   adsurl = {http://adsabs.harvard.edu/abs/2002MNRAS.333..649S},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@ARTICLE{Balsara1995,
+   author = {{Balsara}, D.~S.},
+    title = "{von Neumann stability analysis of smooth particle hydrodynamics--suggestions for optimal algorithms}",
+  journal = {Journal of Computational Physics},
+     year = 1995,
+   volume = 121,
+    pages = {357-372},
+      doi = {10.1016/S0021-9991(95)90221-X},
+   adsurl = {http://adsabs.harvard.edu/abs/1995JCoPh.121..357B},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+@ARTICLE{Schaller2015,
+   author = {{Schaller}, M. and {Dalla Vecchia}, C. and {Schaye}, J. and 
+	{Bower}, R.~G. and {Theuns}, T. and {Crain}, R.~A. and {Furlong}, M. and 
+	{McCarthy}, I.~G.},
+    title = "{The EAGLE simulations of galaxy formation: the importance of the hydrodynamics scheme}",
+  journal = {\mnras},
+archivePrefix = "arXiv",
+   eprint = {1509.05056},
+ keywords = {methods: numerical, galaxies: clusters: intracluster medium, galaxies: formation, cosmology: theory},
+     year = 2015,
+    month = dec,
+   volume = 454,
+    pages = {2277-2291},
+      doi = {10.1093/mnras/stv2169},
+   adsurl = {http://adsabs.harvard.edu/abs/2015MNRAS.454.2277S},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+
+
diff --git a/theory/SPH/Flavours/run.sh b/theory/SPH/Flavours/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6d0791823d93f7feb8f469747f81b24032612523
--- /dev/null
+++ b/theory/SPH/Flavours/run.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+pdflatex -jobname=sph_flavours sph_flavours_standalone.tex
+bibtex sph_flavours.aux 
+pdflatex -jobname=sph_flavours sph_flavours_standalone.tex
+pdflatex -jobname=sph_flavours sph_flavours_standalone.tex
diff --git a/theory/SPH/Flavours/sph_flavours.tex b/theory/SPH/Flavours/sph_flavours.tex
new file mode 100644
index 0000000000000000000000000000000000000000..5fe1277373552d60607671299437a371e068169c
--- /dev/null
+++ b/theory/SPH/Flavours/sph_flavours.tex
@@ -0,0 +1,382 @@
+For vector quantities, we define $\vec{a}_{ij} \equiv (\vec{a}_i -
+\vec{a}_j)$. Barred quantities ($\bar a_{ij}$) correspond to the
+average over two particles of a quantity: $\bar a_{ij} \equiv
+\frac{1}{2}(a_i + a_j)$. To simplify notations, we also define the
+vector quantity $\Wij \equiv \frac{1}{2}\left(W(\vec{x}_{ij}, h_i) +
+\nabla_x W(\vec{x}_{ij},h_j)\right)$.
+
+
+%#######################################################################################################
+\subsection{\MinimalSPH}
+\label{sec:sph:minimal}
+
+This is the simplest fully-conservative version of SPH using the
+internal energy $u$ as a thermal variable that can be written
+down. Its implementation in \swift should be understood as a text-book
+example and template for more advanced implementations. A full
+derivation and motivation for the equations can be found in the review
+of \cite{Price2012}. Our implementation follows their equations (27),
+(43), (44), (45), (101), (103) and (104) with $\beta=3$ and
+$\alpha_u=0$. We summarize the equations here.
+
+\subsubsection{Density and other fluid properties (\nth{1} neighbour loop)}
+
+For a set of particles $i$ with positions $\vec{x}_i$ with velocities
+$\vec{v}_i$, masses $m_i$, sthermal energy per unit mass $u_i$ and
+smoothing length $h_i$, we compute the density for each particle:
+
+\begin{equation}
+  \rho_i \equiv \rho(\vec{x}_i) = \sum_j m_j W(\vec{x}_{ij}, h_i),
+  \label{eq:sph:minimal:rho}
+\end{equation}
+and the derivative of its density with respect to $h$:
+
+\begin{equation}
+    \label{eq:sph:minimal:rho_dh}
+  \rho_{\partial h_i} \equiv \dd{\rho}{h}(\vec{x}_i) = \sum_j m_j \dd{W}{h}(\vec{x}_{ij}
+  , h_i).
+\end{equation}
+The gradient terms (``h-terms'') can then be computed from the density
+and its derivative:
+
+\begin{equation}
+  f_i \equiv \left(1 + \frac{h_i}{3\rho_i}\rho_{\partial h_i}
+  \right)^{-1}.
+  \label{eq:sph:minimal:f_i}
+\end{equation}
+Using the pre-defined equation of state, the pressure $P_i$ and the sound
+speed $c_i$ at the location of particle $i$ can now be computed from
+$\rho_i$ and $u_i$:
+
+\begin{align}
+  P_i &= P_{\rm eos}(\rho_i, u_i),   \label{eq:sph:minimal:P}\\
+  c_i &= c_{\rm eos}(\rho_i, u_i).   \label{eq:sph:minimal:c}
+\end{align}
+
+\subsubsection{Hydrodynamical accelerations (\nth{2} neighbour loop)}
+
+We can then proceed with the second loop over
+neighbours. The signal velocity $v_{{\rm sig},ij}$ between two particles is given by
+
+\begin{align}
+  \mu_{ij} &=
+  \begin{cases}
+  \frac{\vec{v}_{ij} \cdot \vec{x}_{ij}}{|\vec{x}_{ij}|}  & \rm{if}~
+  \vec{v}_{ij} \cdot \vec{x}_{ij} < 0,\\
+    0 &\rm{otherwise}, \\
+  \end{cases}\nonumber\\
+  v_{{\rm sig},ij} &= c_i + c_j - 3\mu_{ij}.   \label{eq:sph:minimal:v_sig}
+\end{align}
+We also use these two quantities for the calculation of the viscosity term:
+
+\begin{equation}
+\nu_{ij} = -\frac{1}{2}\frac{\alpha \mu_{ij} v_{{\rm
+      sig},ij}}{\bar\rho_{ij}}
+  \label{eq:sph:minimal:nu_ij}
+\end{equation}
+The fluid accelerations are then given by
+
+\begin{align}
+  \frac{d\vec{v}_i}{dt} = -\sum_j m_j &\left[\frac{f_iP_i}{\rho_i^2}
+  \nabla_x W(\vec{x}_{ij}, h_i)   \nonumber
+  +\frac{f_jP_j}{\rho_j^2}\nabla_x W(\vec{x}_{ij},h_j)\right. \\
+  &+ \left. \bigg.\nu_{ij} \Wij \right], \label{eq:sph:minimal:dv_dt}
+\end{align}
+and the change in internal energy,
+
+\begin{align}
+  \frac{du_i}{dt} = \sum_j m_j &\left[\frac{f_iP_i}{\rho_i^2}  \vec{v}_{ij}
+    \cdot \nabla_x W(\vec{x}_{ij}, h_i) \right. \label{eq:sph:minimal:du_dt}\\
+    &+\left. \frac{1}{2}\nu_{ij}\vec{v}_{ij}\cdot\Big. \Wij\right], \nonumber
+\end{align}
+where in both cases the first line corresponds to the standard SPH
+term and the second line to the viscuous accelerations.
+
+We also compute an estimator of the change in smoothing length to be
+used in the prediction step. This is an estimate of the local
+divergence of the velocity field compatible with the accelerations
+computed above:
+
+\begin{equation}
+  \frac{dh_i}{dt} = -\frac{1}{3}h_i \sum_j \frac{m_j}{\rho_j}
+  \vec{v}_{ij}\cdot \nabla_x W(\vec{x}_{ij}, h_i).
+  \label{eq:sph:minimal:dh_dt}
+\end{equation}
+and update the signal velocity of the particles:
+
+\begin{equation}
+  v_{{\rm sig},i} = \max_j \left( v_{{\rm sig},ij} \right).
+  \label{eq:sph:minimal:v_sig_update}
+\end{equation}
+All the quantities required for time integration have now been obtained.
+
+\subsubsection{Time integration}
+
+For each particle, we compute a time-step given by the CFL condition:
+
+\begin{equation}
+  \Delta t = 2 C_{\rm CFL} \frac{H_i}{v_{{\rm sig},i}},
+    \label{eq:sph:minimal:dt}
+\end{equation}
+where $C_{\rm CFL}$ is a free dimensionless parameter and $H_i = \gamma h_i$ is the
+kernel support size. Particles can then be ``kicked'' forward in time:
+\begin{align}
+  \vec{v}_i &\rightarrow \vec{v}_i + \frac{d\vec{v}_i}{dt} \Delta t  \label{eq:sph:minimal:kick_v}\\
+  u_i &\rightarrow u_i + \frac{du_i}{dt} \Delta t \label{eq:sph:minimal:kick_u}\\
+  P_i &\rightarrow P_{\rm eos}\left(\rho_i, u_i\right) \label{eq:sph:minimal:kick_P}, \\
+  c_i &\rightarrow c_{\rm eos}\left(\rho_i, u_i\right) \label{eq:sph:minimal:kick_c},
+\end{align}
+where we used the pre-defined equation of state to compute the new
+value of the pressure and sound-speed.
+
+\subsubsection{Particle properties prediction}
+
+Inactive particles need to have their quantities predicted forward in
+time in the ``drift'' operation. We update them as follows:
+
+\begin{align}
+  \vec{x}_i &\rightarrow \vec{x}_i + \vec{v}_i \Delta t  \label{eq:sph:minimal:drift_x} \\
+  h_i &\rightarrow h_i \exp\left(\frac{1}{h_i} \frac{dh_i}{dt}
+  \Delta t\right), \label{eq:sph:minimal:drift_h}\\
+  \rho_i &\rightarrow \rho_i \exp\left(-\frac{3}{h_i} \frac{dh_i}{dt}
+  \Delta t\right), \label{eq:sph:minimal:drift_rho} \\
+  P_i &\rightarrow P_{\rm eos}\left(\rho_i, u_i + \frac{du_i}{dt} \Delta t\right), \label{eq:sph:minimal:drift_P}\\
+  c_i &\rightarrow c_{\rm eos}\left(\rho_i, u_i + \frac{du_i}{dt}
+  \Delta t\right) \label{eq:sph:minimal:drift_c},
+\end{align}
+where, as above, the last two updated quantities are obtained using
+the pre-defined equation of state. Note that the thermal energy $u_i$
+itself is \emph{not} updated.
+
+%#######################################################################################################
+
+\subsection{Gadget-2 SPH}
+\label{sec:sph:gadget2}
+
+This flavour of SPH is the one implemented in the \gadget-2 code
+\citep{Springel2005}. The basic equations were derived by
+\cite{Springel2002} and also includes a \cite{Balsara1995} switch for
+the suppression of viscosity. The implementation here follows closely the
+presentation of \cite{Springel2005}. Specifically, we use their equations (5), (7),
+(8), (9), (10), (13), (14) and (17). We summarize them here for completeness.
+
+\subsubsection{Density and other fluid properties (\nth{1} neighbour loop)}
+
+For a set of particles $i$ with positions $\vec{x}_i$ with velocities
+$\vec{v}_i$, masses $m_i$, entropic function per unit mass $A_i$ and
+smoothing length $h_i$, we compute the density, derivative of the density with respect
+to $h$ and the ``h-terms'' in a similar way to the minimal-SPH case
+(Eq. \ref{eq:sph:minimal:rho}, \ref{eq:sph:minimal:rho_dh} and
+\ref{eq:sph:minimal:f_i}). From these the pressure and sound-speed can
+be computed using the pre-defined equation of state:
+
+\begin{align}
+  P_i &= P_{\rm eos}(\rho_i, A_i),   \label{eq:sph:gadget2:P}\\
+  c_i &= c_{\rm eos}(\rho_i, A_i).   \label{eq:sph:gadget2:c}
+\end{align}
+We additionally compute the divergence and
+curl of the velocity field using standard SPH expressions:
+
+\begin{align}
+  \nabla\cdot\vec{v}_i &\equiv\nabla\cdot \vec{v}(\vec{x}_i) = \frac{1}{\rho_i} \sum_j m_j
+  \vec{v}_{ij}\cdot\nabla_x W(\vec{x}_{ij}, h_i) \label{eq:sph:gadget2:div_v},\\ 
+    \nabla\times\vec{v}_i &\equiv \nabla\times \vec{v}(\vec{x}_i) = \frac{1}{\rho_i} \sum_j m_j
+  \vec{v}_{ij}\times\nabla_x W(\vec{x}_{ij}, h_i) \label{eq:sph:gadget2:rot_v}.
+\end{align}
+These are used to construct the \cite{Balsara1995} switch $B_i$:
+
+\begin{equation}
+  B_i = \frac{|\nabla\cdot\vec{v}_i|}{|\nabla\cdot\vec{v}_i| +
+    |\nabla\times\vec{v}_i| + 10^{-4}c_i / h_i}, \label{eq:sph:gadget2:balsara}
+\end{equation}
+where the last term in the denominator is added to prevent numerical instabilities.
+
+\subsubsection{Hydrodynamical accelerations (\nth{2} neighbour loop)}
+
+The accelerations are computed in a similar fashion to the minimal-SPH
+case with the exception of the viscosity term which is now modified to
+include the switch. Instead of Eq. \ref{eq:sph:minimal:nu_ij}, we get:
+
+\begin{equation}
+\nu_{ij} = -\frac{1}{2}\frac{\alpha \bar B_{ij} \mu_{ij} v_{{\rm sig},ij}}{\bar\rho_{ij}},
+  \label{eq:sph:gadget2:nu_ij}  
+\end{equation}
+whilst equations \ref{eq:sph:minimal:v_sig},
+\ref{eq:sph:minimal:dv_dt}, \ref{eq:sph:minimal:dh_dt} and
+\ref{eq:sph:minimal:v_sig_update} remain unchanged. The only other
+change is the equation of motion for the thermodynamic variable which
+now has to be describing the evolution of the entropic function and
+not the evolution of the thermal energy. Instead of
+eq. \ref{eq:sph:minimal:du_dt}, we have
+
+\begin{equation}
+\frac{dA_i}{dt} = \frac{1}{2} A_{\rm eos}\left(\rho_i, \sum_j
+m_j \nu_{ij}\vec{v}_{ij}\cdot \Wij\right),
+\end{equation}
+where we made use of the pre-defined equation of state relating
+density and internal energy to entropy.
+
+\subsubsection{Time integration}
+
+The time-step condition is identical to the \MinimalSPH case
+(Eq. \ref{eq:sph:minimal:dt}). The same applies to the integration
+forward in time (Eq. \ref{eq:sph:minimal:kick_v} to
+\ref{eq:sph:minimal:kick_c}) with the exception of the change in
+internal energy (Eq. \ref{eq:sph:minimal:kick_u}) which gets replaced
+by an integration for the the entropy:
+
+
+\begin{align}
+  \vec{v}_i &\rightarrow \vec{v}_i + \frac{d\vec{v}_i}{dt} \Delta t  \label{eq:sph:gadget2:kick_v}\\
+  A_i &\rightarrow A_i + \frac{dA_i}{dt} \Delta t \label{eq:sph:gadget2:kick_A}\\
+  P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i\right) \label{eq:sph:gadget2:kick_P}, \\
+  c_i &\rightarrow c_{\rm eos}\left(\rho_i, A_i\right) \label{eq:sph:gadget2:kick_c},
+\end{align}
+where, once again, we made use of the equation of state relating
+thermodynamical quantities.
+
+\subsubsection{Particle properties prediction}
+
+The prediction step is also identical to the \MinimalSPH case with the
+entropic function replacing the thermal energy.
+
+\begin{align}
+  \vec{x}_i &\rightarrow \vec{x}_i + \vec{v}_i \Delta t  \label{eq:sph:gadget2:drift_x} \\
+  h_i &\rightarrow h_i \exp\left(\frac{1}{h_i} \frac{dh_i}{dt}
+  \Delta t\right), \label{eq:sph:gadget2:drift_h}\\
+  \rho_i &\rightarrow \rho_i \exp\left(-\frac{3}{h_i} \frac{dh_i}{dt}
+  \Delta t\right), \label{eq:sph:gadget2:drift_rho} \\
+  P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt} \Delta t\right), \label{eq:sph:gadget2:drift_P}\\
+  c_i &\rightarrow c_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt}
+  \Delta t\right) \label{eq:sph:gadget2:drift_c},
+\end{align}
+where, as above, the last two updated quantities are obtained using
+the pre-defined equation of state. Note that the entropic function $A_i$
+itself is \emph{not} updated.
+
+
+%#######################################################################################################
+
+\subsection{Pressure-Entropy SPH}
+\label{sec:sph:pe}
+
+This flavour of SPH follows the implementation described in section
+2.2.3 of \cite{Hopkins2013}. We start with their equations (17), (19),
+(21) and (22) but modify them for efficiency and generality
+reasons. We also use the same \cite{Balsara1995} viscosity switch as
+in the \GadgetSPH scheme (Sec. \ref{sec:sph:gadget2}).
+
+\subsubsection{Density and other fluid properties (\nth{1} neighbour loop)}
+
+For a set of particles $i$ with positions $\vec{x}_i$ with velocities
+$\vec{v}_i$, masses $m_i$, entropic function per unit mass $A_i$ and
+smoothing length $h_i$, we compute the density, derivative of the
+density with respect to $h$, divergence and curl of velocity field in
+a similar fashion to the \GadgetSPH scheme. From the basic particle
+properties we construct an additional temporary quantity
+
+\begin{equation}
+  \tilde{A_i} \equiv A_i^{1/\gamma},
+    \label{eq:sph:pe:A_tilde}
+\end{equation}
+which enters many equations. This allows us to construct the
+entropy-weighted density $\bar\rho_i$:
+
+\begin{equation}
+  \bar\rho_i = \frac{1}{\tilde{A_i}} \sum_j m_j \tilde{A_j} W(\vec{x}_{ij}, h_i),
+  \label{eq:sph:pe:rho_bar}
+\end{equation}
+which can then be used to construct an entropy-weighted sound-speed
+and pressure based on our assumed equation of state:
+
+\begin{align}
+  \bar c_i &= c_{\rm eos}(\bar\rho_i, A_i), \label{eq:sph:pe:c_bar}\\
+  \bar P_i &= P_{\rm eos}(\bar\rho_i, A_i), \label{eq:sph:pe:P_bar}
+\end{align}
+and estimate the derivative of this later quantity with respect to the
+smoothing length using:
+
+\begin{equation}
+\bar P_{\partial h_i} \equiv \dd{\bar{P}}{h}(\vec{x}_i) = \sum_j m_j
+\tilde{A_j} \dd{W}{h}(\vec{x}_{ij}), \label{eq:sph:pe:P_dh}
+\end{equation}
+The gradient terms (``h-terms'') are then obtained by combining $\bar
+P_{\partial h_i}$ and $\rho_{\partial h_i}$
+(eq. \ref{eq:sph:minimal:rho_dh}):
+
+\begin{equation}
+  f_i \equiv \left(\frac{h_i}{3\rho_i}\bar P_{\partial
+    h_i}\right)\left(1 + \frac{h_i}{3\rho_i}\rho_{\partial
+    h_i}\right)^{-1}. 
+\end{equation}
+
+\subsubsection{Hydrodynamical accelerations (\nth{2} neighbour loop)}
+
+The accelerations are given by the following term:
+
+\begin{align}
+  \frac{d\vec{v}_i}{dt} = -\sum_j m_j &\left[\frac{\bar P_i}{\bar\rho_i^2} \left(\frac{\tilde A_j}{\tilde A_i} - \frac{f_i}{\tilde A_i}\right)\nabla_x W(\vec{x}_{ij}, h_i) \right.  \nonumber \\
+  &+\frac{P_j}{\rho_j^2} \left(\frac{\tilde A_i}{\tilde A_j} - \frac{f_j}{\tilde A_j}\right)\nabla_x W(\vec{x}_{ij},h_j) \\
+  &+ \left. \bigg.\nu_{ij} \Wij \right], \label{eq:sph:pe:dv_dt}
+\end{align}
+where the viscosity term $\nu_{ij}$ has been computed as in
+the \GadgetSPH case (Eq. \ref{eq:sph:gadget2:balsara}
+and \ref{eq:sph:gadget2:nu_ij}). For completeness, the equation of
+motion for the entropy is
+
+\begin{equation}
+\frac{dA_i}{dt} = \frac{1}{2} A_{\rm eos}\left(\rho_i, \sum_j
+m_j \nu_{ij}\vec{v}_{ij}\cdot \Wij\right).
+\end{equation}
+
+\subsubsection{Time integration}
+
+The time-step condition is identical to the \MinimalSPH case
+(Eq. \ref{eq:sph:minimal:dt}). The same applies to the integration
+forward in time (Eq. \ref{eq:sph:minimal:kick_v} to
+\ref{eq:sph:minimal:kick_c}) with the exception of the change in
+internal energy (Eq. \ref{eq:sph:minimal:kick_u}) which gets replaced
+by an integration for the the entropy:
+
+\begin{align}
+  \vec{v}_i &\rightarrow \vec{v}_i + \frac{d\vec{v}_i}{dt} \Delta t  \label{eq:sph:pe:kick_v}\\
+  A_i &\rightarrow A_i + \frac{dA_i}{dt} \Delta t \label{eq:sph:pe:kick_A}\\
+  P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i\right) \label{eq:sph:pe:kick_P}, \\
+  c_i &\rightarrow c_{\rm eos}\left(\rho_i,
+  A_i\right) \label{eq:sph:pe:kick_c}, \\
+  \tilde A_i &= A_i^{1/\gamma}
+\end{align}
+where, once again, we made use of the equation of state relating
+thermodynamical quantities.
+
+
+\subsubsection{Particle properties prediction}
+
+The prediction step is also identical to the \MinimalSPH case with the
+entropic function replacing the thermal energy.
+
+\begin{align}
+  \vec{x}_i &\rightarrow \vec{x}_i + \vec{v}_i \Delta t  \label{eq:sph:pe:drift_x} \\
+  h_i &\rightarrow h_i \exp\left(\frac{1}{h_i} \frac{dh_i}{dt}
+  \Delta t\right), \label{eq:sph:pe:drift_h}\\
+  \rho_i &\rightarrow \rho_i \exp\left(-\frac{3}{h_i} \frac{dh_i}{dt}
+  \Delta t\right), \label{eq:sph:pe:drift_rho} \\
+  \tilde A_i &\rightarrow \left(A_i + \frac{dA_i}{dt}
+  \Delta t\right)^{1/\gamma} \label{eq:sph:pe:drift_A_tilde}, \\
+  P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt} \Delta t\right), \label{eq:sph:pe:drift_P}\\
+  c_i &\rightarrow c_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt}
+  \Delta t\right) \label{eq:sph:pe:drift_c}, 
+\end{align}
+where, as above, the last two updated quantities are obtained using
+the pre-defined equation of state. Note that the entropic function $A_i$
+itself is \emph{not} updated.
+
+\subsection{Pressure-Energy SPH}
+\label{sec:sph:pu}
+
+Section 2.2.2 of \cite{Hopkins2013}.\\ \tbd
+\subsection{Anarchy SPH}
+Dalla Vecchia (\textit{in prep.}), also described in section 2.2.2 of
+\cite{Schaller2015}.\\
+\label{sec:sph:anarchy}
+\tbd 
diff --git a/theory/SPH/Flavours/sph_flavours_standalone.tex b/theory/SPH/Flavours/sph_flavours_standalone.tex
new file mode 100644
index 0000000000000000000000000000000000000000..20c9f1451c2499d661bfbb1022bfd34c02fde4dd
--- /dev/null
+++ b/theory/SPH/Flavours/sph_flavours_standalone.tex
@@ -0,0 +1,31 @@
+\documentclass[fleqn, usenatbib, useAMS]{mnras}
+\usepackage{graphicx}
+\usepackage{amsmath,paralist,xcolor,xspace,amssymb}
+\usepackage{times}
+\usepackage[super]{nth}
+
+\newcommand{\swift}{{\sc swift}\xspace}
+\newcommand{\gadget}{{\sc gadget}\xspace}
+\newcommand{\dd}[2]{\frac{\partial #1}{\partial #2}}
+\renewcommand{\vec}[1]{{\mathbf{#1}}}
+\newcommand{\Wij}{\overline{\nabla_xW_{ij}}}
+\newcommand{\tbd}{\textcolor{red}{TO BE DONE}}
+
+\newcommand{\MinimalSPH}{\textsc{minimal-sph}\xspace}
+\newcommand{\GadgetSPH}{\textsc{gadget-sph}\xspace}
+\newcommand{\PESPH}{\textsc{pe-sph}\xspace}
+%\setlength{\mathindent}{0pt}
+
+%opening
+\title{SPH flavours in SWIFT}
+\author{Matthieu Schaller}
+
+\begin{document}
+
+\maketitle
+\input{sph_flavours}
+
+\bibliographystyle{mnras}
+\bibliography{./bibliography}
+
+\end{document}
diff --git a/theory/SPH/Kernels/bibliography.bib b/theory/SPH/Kernels/bibliography.bib
new file mode 100644
index 0000000000000000000000000000000000000000..93d484b07c625be71e338292376a3514243af725
--- /dev/null
+++ b/theory/SPH/Kernels/bibliography.bib
@@ -0,0 +1,18 @@
+@ARTICLE{Dehnen2012,
+   author = {{Dehnen}, W. and {Aly}, H.},
+    title = "{Improving convergence in smoothed particle hydrodynamics simulations without pairing instability}",
+  journal = {\mnras},
+archivePrefix = "arXiv",
+   eprint = {1204.2471},
+ primaryClass = "astro-ph.IM",
+ keywords = {hydrodynamics, methods: numerical },
+     year = 2012,
+    month = sep,
+   volume = 425,
+    pages = {1068-1082},
+      doi = {10.1111/j.1365-2966.2012.21439.x},
+   adsurl = {http://adsabs.harvard.edu/abs/2012MNRAS.425.1068D},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
diff --git a/theory/SPH/kernel/kernel_definitions.tex b/theory/SPH/Kernels/kernel_definitions.tex
similarity index 83%
rename from theory/SPH/kernel/kernel_definitions.tex
rename to theory/SPH/Kernels/kernel_definitions.tex
index 65ef5c82ad928398a69b4528c7914829f146ad6e..b90b0623719f9acf311600d3c9d9157fabbc1865 100644
--- a/theory/SPH/kernel/kernel_definitions.tex
+++ b/theory/SPH/Kernels/kernel_definitions.tex
@@ -1,24 +1,8 @@
-\documentclass[a4paper]{mnras}
-\usepackage[utf8]{inputenc}
-\usepackage{amsmath}
-\usepackage{graphicx}
-\usepackage{xspace}
+We follow the definitions and notation of
+\cite{Dehnen2012}. Motivation for all the material given here can be
+found in their paper.
 
-\newcommand{\swift}{{\sc Swift}\xspace}
-
-
-
-%opening
-\title{SPH kernels in SWIFT}
-\author{Matthieu Schaller}
-
-\begin{document}
-
-\maketitle
-
-In here we follow the definitions of Dehnen \& Aly 2012.
-
-\section{General Definitions}
+\subsection{General Definitions}
 
 The desirable properties of an SPH kernels $W(\vec{x},h)$ are:
 \begin{enumerate}
@@ -28,14 +12,12 @@ The desirable properties of an SPH kernels $W(\vec{x},h)$ are:
 \item $W(\vec{x},h)$ should have a finite support and be cheap to
   compute.
 \end{enumerate}
-
 As a consequence, the smoothing kernels used in SPH can
 hence be written (in 3D) as
 
 \begin{equation}
  W(\vec{x},h) \equiv \frac{1}{H^3}f\left(\frac{|\vec{x}|}{H}\right),
 \end{equation}
-
 where $H=\gamma h$ is defined below and $f(u)$ is a dimensionless
 function, usually a low-order polynomial, such that $f(u \geq 1) = 0$
 and normalised such that
@@ -43,7 +25,6 @@ and normalised such that
 \begin{equation}
   \int f(|\vec{u}|){\rm d}^3u = 1.
 \end{equation}
-
 $H$ is the kernel's support radius and is used as the ``smoothing
 length'' in the Gadget code( {i.e.} $H=h$). This definition is,
 however, not very physical and makes comparison of kernels at a
@@ -56,13 +37,11 @@ standard deviation
   \sigma^2 \equiv \frac{1}{3}\int \vec{u}^2 W(\vec{u},h) {\rm d}^3u.
   \label{eq:sph:sigma}
 \end{equation}
-
 The smoothing length is then:
 \begin{equation}
   h\equiv2\sigma.
     \label{eq:sph:h}
 \end{equation}
-
 Each kernel, {\it i.e.} defintion of $f(u)$, will have a different
 ratio $\gamma = H/h$. So for a \emph{fixed resolution} $h$, one will
 have different kernel support sizes, $H$, and a different number of
@@ -73,18 +52,15 @@ separation:
 \begin{equation}
   h = \eta \langle x \rangle = \eta \left(\frac{m}{\rho}\right)^{1/3},
 \end{equation}
-
 where $\rho$ is the local density of the fluid and $m$ the SPH
 particle mass. 
-
 The (weighted) number of neighbours within the kernel support is a
-useful quantity to use in implementations of SPH. It is defined as (in
-3D)
+useful quantity to use in implementations of SPH. It is defined (in
+3D) as:
 
 \begin{equation}
   N_{\rm ngb} \equiv \frac{4}{3}\pi \left(\frac{H}{h}\eta\right)^3.
 \end{equation}
-
 Once the fixed ratio $\gamma= H/h$ is known (via equations
 \ref{eq:sph:sigma} and \ref{eq:sph:h}) for a given kernel, the number
 of neighbours only depends on the resolution parameter $\eta$.  For
@@ -92,28 +68,27 @@ the usual cubic spline kernel (see below), setting the simulation
 resolution to $\eta=1.2348$ yields the commonly used value $N_{\rm
   ngb} = 48$.
 
-\section{Kernels available in \swift}
+\subsection{Kernels available in \swift}
 
 The \swift kernels are split into two categories, the B-splines
 ($M_{4,5,6}$) and the Wendland kernels ($C2$, $C4$ and $C6$). In all
 cases we impose $f(u>1) = 0$.\\
-
 The spline kernels are defined as:
 
 \begin{align}
-  f(u) &= C M_n(u), \\
+  f(u) &= C M_n(u), \nonumber\\
   M_n(u) &\equiv \frac{1}{2\pi}
   \int_{-\infty}^{\infty}
   \left(\frac{\sin\left(k/n\right)}{k/n}\right)^n\cos\left(ku\right){\rm
-  d}k,
+  d}k \nonumber,
 \end{align}
-
-whilst the Wendland kernels read
+whilst the Wendland kernels are constructed from:
 
 \begin{align}
-  f(u) &= C \Psi_{i,j}(u), \\
-  \Psi_{i,j}(u) &\equiv \mathcal{I}^k\left[\max\left(1-u,0\right)\right],\\
-  \mathcal{I}[f](u) &\equiv \int_u^\infty f\left(k\right)k{\rm d}k.
+  f(u) &= C \Psi_{i,j}(u), \nonumber\\
+  \Psi_{i,j}(u) &\equiv
+  \mathcal{I}^k\left[\max\left(1-u,0\right)\right], \nonumber\\
+  \mathcal{I}[f](u) &\equiv \int_u^\infty f\left(k\right)k{\rm d}k. \nonumber
 \end{align}
 
 \subsubsection{Cubic spline ($M_4$) kernel}
@@ -208,11 +183,13 @@ All kernels available in \swift are shown on Fig.~\ref{fig:sph:kernels}.
 \includegraphics[width=\columnwidth]{kernels.pdf}
 \caption{The kernel functions available in \swift for a mean
   inter-particle separation $\langle x\rangle=1.5$ and a resolution
-  $\eta=1.2348$. The corresponding kernel support radii $H$ (shown by
-  arrows) and number of neighours $N_{\rm ngb}$ are indicated on the
-  figure. A Gaussian kernel with the same smoothing length is shown
-  for comparison. Note that all these kernels have the \emph{same
-    resolution} despite having vastly different number of neighbours.}
+  $\eta=1.2348$ shown in linear (top) and log (bottom) units to
+  highlight their differences. The corresponding kernel support radii
+  $H$ (shown by arrows) and number of neighours $N_{\rm ngb}$ are
+  indicated on the figure. A Gaussian kernel with the same smoothing
+  length is shown for comparison. Note that all these kernels have
+  the \emph{same resolution} despite having vastly different number of
+  neighbours.}
 \label{fig:sph:kernels}
 \end{figure}
 
@@ -226,16 +203,15 @@ All kernels available in \swift are shown on Fig.~\ref{fig:sph:kernels}.
 \end{figure}
 
 
-\section{Kernel Derivatives}
+\subsection{Kernel Derivatives}
 
 The derivatives of the kernel function have relatively simple
 expressions and are shown on Fig.~\ref{fig:sph:kernel_derivatives}:
 
-\begin{eqnarray*}
- \vec\nabla_x W(\vec{x},h) &=& \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|}, \\
- \frac{\partial W(\vec{x},h)}{\partial h} &=&- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + 
+\begin{align}
+ \vec\nabla_x W(\vec{x},h) &= \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|}, \\
+ \frac{\partial W(\vec{x},h)}{\partial h} &=- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + 
 \frac{|\vec{x}|}{h}f'\left(\frac{|\vec{x}|}{h}\right)\right].
-\end{eqnarray*}
+\end{align}
 Note that for all the kernels listed above, $f'(0) = 0$. 
 
-\end{document}
diff --git a/theory/SPH/Kernels/kernel_definitions_standalone.tex b/theory/SPH/Kernels/kernel_definitions_standalone.tex
new file mode 100644
index 0000000000000000000000000000000000000000..cc9f27c91747d85c0516365e12f42f27a247e8b3
--- /dev/null
+++ b/theory/SPH/Kernels/kernel_definitions_standalone.tex
@@ -0,0 +1,22 @@
+\documentclass[fleqn, usenatbib, useAMS,a4paper]{mnras}
+\usepackage{graphicx}
+\usepackage{amsmath,paralist,xcolor,xspace,amssymb}
+\usepackage{times}
+
+\newcommand{\swift}{{\sc Swift}\xspace}
+
+%opening
+\title{SPH kernels in SWIFT}
+\author{Matthieu Schaller}
+
+\begin{document}
+
+\maketitle
+
+\input{kernel_definitions}
+
+\bibliographystyle{mnras}
+\bibliography{./bibliography.bib}
+
+
+\end{document}
diff --git a/theory/SPH/kernel/kernels.py b/theory/SPH/Kernels/kernels.py
similarity index 52%
rename from theory/SPH/kernel/kernels.py
rename to theory/SPH/Kernels/kernels.py
index 9f0b853c6e9fce7180361457373f745cc2b8e804..069bfd1ea25c8b99b894eadb46f93b9656ba9c7e 100644
--- a/theory/SPH/kernel/kernels.py
+++ b/theory/SPH/Kernels/kernels.py
@@ -26,31 +26,53 @@ from scipy.optimize import fsolve
 from matplotlib.font_manager import FontProperties
 import numpy
 
-params = {
-    'axes.labelsize': 10,
-    'axes.titlesize': 8,
-    'font.size': 10,
-    'legend.fontsize': 9,
-    'xtick.labelsize': 10,
-    'ytick.labelsize': 10,
-    'xtick.major.pad': 2.5,
-    'ytick.major.pad': 2.5,
-    'text.usetex': True,
-'figure.figsize' : (4.15,4.15),
-'figure.subplot.left'    : 0.14,
-'figure.subplot.right'   : 0.99,
-'figure.subplot.bottom'  : 0.06,
-'figure.subplot.top'     : 0.99,
+params = {'axes.labelsize': 9,
+'axes.titlesize': 10,
+'font.size': 12,
+'legend.fontsize': 12,
+'xtick.labelsize': 9,
+'ytick.labelsize': 9,
+'text.usetex': True,
+'figure.figsize' : (3.15,3.15),
+'figure.subplot.left'    : 0.17,
+'figure.subplot.right'   : 0.99  ,
+'figure.subplot.bottom'  : 0.08  ,
+'figure.subplot.top'     : 0.99  ,
 'figure.subplot.wspace'  : 0.  ,
 'figure.subplot.hspace'  : 0.  ,
 'lines.markersize' : 6,
-'lines.linewidth' : 1.5,
+'lines.linewidth' : 3.,
 'text.latex.unicode': True
 }
 rcParams.update(params)
 rc('font',**{'family':'sans-serif','sans-serif':['Times']})
 
 
+# params = {
+#     'axes.labelsize': 10,
+#     'axes.titlesize': 8,
+#     'font.size': 10,
+#     'legend.fontsize': 9,
+#     'xtick.labelsize': 10,
+#     'ytick.labelsize': 10,
+#     'xtick.major.pad': 2.5,
+#     'ytick.major.pad': 2.5,
+#     'text.usetex': True,
+# 'figure.figsize' : (4.15,4.15),
+# 'figure.subplot.left'    : 0.14,
+# 'figure.subplot.right'   : 0.99,
+# 'figure.subplot.bottom'  : 0.06,
+# 'figure.subplot.top'     : 0.99,
+# 'figure.subplot.wspace'  : 0.  ,
+# 'figure.subplot.hspace'  : 0.  ,
+# 'lines.markersize' : 6,
+# 'lines.linewidth' : 1.5,
+# 'text.latex.unicode': True
+# }
+# rcParams.update(params)
+# rc('font',**{'family':'sans-serif','sans-serif':['Times']})
+
+
 #Parameters
 eta = 1.2348422195325 # Resolution (Gives 48 neighbours for a cubic spline kernel)
 dx  = 1.5#4 #2.7162  # Mean inter-particle separation
@@ -131,17 +153,17 @@ def W_WendlandC6(r):     return C_WendlandC6 * wendlandC6(r / H_WendlandC6)  / H
 # PLOT STUFF
 figure()
 subplot(211)
-xx = linspace(0., 5*h, 1000)
+xx = linspace(0., 5*h, 100)
 maxY = 1.2*Gaussian(0, h)
 
 # Plot the kernels
-plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm %14s\\quad H=\\infty}$"%("Gaussian~~~~~~"))
-plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm %14s\\quad H=%4.3f}$"%("Cubic~spline~~", H_cubic))
-plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quartic~spline", H_quartic))
-plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quintic~spline", H_quintic))
-plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C2~", H_WendlandC2))
-plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C4~", H_WendlandC4))
-plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C6~", H_WendlandC6))
+plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm %14s~ H=\\infty}$"%("Gaussian~~~~~~"))
+plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm %14s~ H=%4.3f}$"%("Cubic~spline~~", H_cubic), lw=1.5)
+plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm %14s~ H=%4.3f}$"%("Quartic~spline", H_quartic), lw=1.5)
+plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm %14s~ H=%4.3f}$"%("Quintic~spline", H_quintic), lw=1.5)
+plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm %14s~ H=%4.3f}$"%("Wendland~C2~", H_WendlandC2), lw=1.5)
+plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm %14s~ H=%4.3f}$"%("Wendland~C4~", H_WendlandC4), lw=1.5)
+plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm %14s~ H=%4.3f}$"%("Wendland~C6~", H_WendlandC6), lw=1.5)
 
 # Indicate the position of H
 arrow(H_cubic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='b', ec='b', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
@@ -152,33 +174,33 @@ arrow(H_WendlandC4, 0.12*maxY , 0., -0.12*maxY*0.9, fc='m', ec='m', length_inclu
 arrow(H_WendlandC6, 0.12*maxY , 0., -0.12*maxY*0.9, fc='y', ec='y', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3)
 
 # Show h
-plot([h, h], [0., maxY], 'k:', linewidth=0.5)
-text(h, maxY*0.35, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom')
+plot([h, h], [0., 0.63*maxY], 'k:', linewidth=0.5)
+text(h, maxY*0.65, "$h\\equiv\\eta\\langle x\\rangle$", rotation=90,  ha='center', va='bottom', fontsize=9)
 
 # Show sigma
-plot([h/2, h/2], [0., maxY], 'k:', linewidth=0.5)
-text(h/2, maxY*0.05, "$\\sigma\\equiv h/2$", rotation=90, backgroundcolor='w', ha='center', va='bottom')
+plot([h/2, h/2], [0., 0.63*maxY], 'k:', linewidth=0.5)
+text(h/2, maxY*0.65, "$\\sigma\\equiv h/2$", rotation=90,  ha='center', va='bottom', fontsize=9)
 
 # Show <x>
-plot([dx, dx], [0., maxY], 'k:', linewidth=0.5)
-text(dx, maxY*0.35, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom')
+plot([dx, dx], [0., 0.63*maxY], 'k:', linewidth=0.5)
+text(dx, maxY*0.65, "$\\langle x\\rangle = %.1f$"%dx, rotation=90,  ha='center', va='bottom', fontsize=9)
 
 xlim(0., 2.5*h)
 ylim(0., maxY)
 gca().xaxis.set_ticklabels([])
 ylabel("$W(r,h)$", labelpad=1.5)
-legend(loc="upper right", handlelength=1.2, handletextpad=0.2)
+legend(loc="upper right", frameon=False, handletextpad=0.1, handlelength=1.2, fontsize=8)
 
 
 # Same but now in log space
 subplot(212, yscale="log")
-plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
-plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$")
-plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$")
-plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$")
-plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$")
-plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$")
-plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$")
+plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$", lw=1.5)
+plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$", lw=1.5)
+plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$", lw=1.5)
+plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$", lw=1.5)
+plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$", lw=1.5)
+plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$", lw=1.5)
+plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$", lw=1.5)
 
 # Show h
 plot([h, h], [0., 1.], 'k:', linewidth=0.5)
@@ -191,21 +213,22 @@ plot([dx, dx], [0., 1.], 'k:', linewidth=0.5)
 
 
 # Show plot properties
-text(h/5., 1e-3, "$\\langle x \\rangle = %3.1f$"%(dx), va="top", backgroundcolor='w')
-text(h/5.+0.06, 3e-4, "$\\eta = %5.4f$"%(eta), va="top", backgroundcolor='w')
+text(h/5., 2e-3, "$\\langle x \\rangle = %3.1f$"%(dx), va="top", backgroundcolor='w', fontsize=9)
+text(h/5.+0.06, 4e-4, "$\\eta = %5.4f$"%(eta), va="top", backgroundcolor='w', fontsize=9)
+text(h/5.+0.06, 8e-5, "$h = \\eta\\langle x \\rangle = %5.4f$"%(h), va="top", backgroundcolor='w', fontsize=9)
 
 # Show number of neighbours
-text(1.9*h, 2e-1/2.9**0, "$N_{\\rm ngb}=\\infty$", fontsize=10)
-text(1.9*h, 2e-1/2.9**1, "$N_{\\rm ngb}=%3.1f$"%(N_H_cubic), color='b', fontsize=9)
-text(1.9*h, 2e-1/2.9**2, "$N_{\\rm ngb}=%3.1f$"%(N_H_quartic), color='c', fontsize=9)
-text(1.9*h, 2e-1/2.9**3, "$N_{\\rm ngb}=%3.1f$"%(N_H_quintic), color='g', fontsize=9)
-text(1.9*h, 2e-1/2.9**4, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC2), color='r', fontsize=9)
-text(1.9*h, 2e-1/2.9**5, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC4), color='m', fontsize=9)
-text(1.9*h, 2e-1/2.9**6, "$N_{\\rm ngb}=%3.0f$"%(N_H_WendlandC6), color='y', fontsize=9)
+text(1.75*h, 2e-1/2.9**0, "$N_{\\rm ngb}=\\infty$", fontsize=10)
+text(1.75*h, 2e-1/2.9**1, "$N_{\\rm ngb}=%3.1f$"%(N_H_cubic), color='b', fontsize=9)
+text(1.75*h, 2e-1/2.9**2, "$N_{\\rm ngb}=%3.1f$"%(N_H_quartic), color='c', fontsize=9)
+text(1.75*h, 2e-1/2.9**3, "$N_{\\rm ngb}=%3.1f$"%(N_H_quintic), color='g', fontsize=9)
+text(1.75*h, 2e-1/2.9**4, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC2), color='r', fontsize=9)
+text(1.75*h, 2e-1/2.9**5, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC4), color='m', fontsize=9)
+text(1.75*h, 2e-1/2.9**6, "$N_{\\rm ngb}=%3.0f$"%(N_H_WendlandC6), color='y', fontsize=9)
 
 xlim(0., 2.5*h)
 ylim(1e-5, 0.7)
-xlabel("$r$", labelpad=0)
+xlabel("$r$", labelpad=-1)
 ylabel("$W(r,h)$", labelpad=0.5)
 
 savefig("kernels.pdf")
@@ -238,57 +261,67 @@ def d_wendlandC2(r):     return where(r > 1., 0., 20.*r**4 - 60.*r**3 + 60.*r**2
 def d_wendlandC4(r):     return where(r > 1., 0., 93.3333*r**7 - 448.*r**6 + 840.*r**5 - 746.667*r**4 + 280.*r**3 - 18.6667*r)
 def d_wendlandC6(r):     return where(r > 1., 0., 352.*r**10 - 2310.*r**9 + 6336.*r**8 - 9240.*r**7 + 7392.*r**6 - 2772.*r**5 + 264.*r**3 - 22.*r)
 def d_Gaussian(r,h): return (-8.*sqrt(2.)/(PI**(3./2.) * h**5)) * r * exp(- 2.*r**2 / (h**2))
+def dh_Gaussian(r,h): return -(3*Gaussian(r,h) + (r/h) * d_Gaussian(r,h))
 
 # Get the second derivative of the reduced kernel definitions for 3D kernels
-def d2_cubic_spline(r):   return where(r > 1., 0., where(r < 0.5,
-                                                         18.*r - 6.,
-                                                         -6.*r + 6.) )
-
-def d2_quartic_spline(r): return where(r > 1., 0., where(r < 0.2,
-                                                         72.*r**2 - 4.8,
-                                                         where(r < 0.6,
-                                                               -48.*r**2 + 48.*r  - (48./5.),
-                                                               12.*r**2 - 24.*r + 12.)))
-
-def d2_quintic_spline(r): return where(r > 1., 0., where(r < 1./3.,
-                                                         -200.*r**3 + 120.*r**2 - (40./9.),
-                                                         where(r < 2./3.,
-                                                               100.*r**3 - 180.*r**2 + 100.*r - (140./9.),
-                                                               -20.*r**3 + 60.*r**2 - 60.*r + 20)))
-def d2_wendlandC2(r):     return where(r > 1., 0., 80.*r**3 - 180.*r**2 + 120.*r - 20.)
-def d2_wendlandC4(r):     return where(r > 1., 0., 653.3333*r**6 - 2688.*r**5 + 4200.*r**4 - 2986.667*r**3 + 840.*r**2 - 18.6667)
-def d2_wendlandC6(r):     return where(r > 1., 0., 3520.*r**9 - 20790.*r**8 + 50688.*r**7 - 64680.*r**6 + 44352.*r**5 - 13860.*r**4 + 792.*r**2 - 22)
-def d2_Gaussian(r,h): return (32*sqrt(2)/(PI**(3./2.)*h**7)) * r**2 * exp(-2.*r**2 / (h**2)) - 8.*sqrt(2.)/(PI**(3./2.) * h**5) * exp(- 2.*r**2 / (h**2))
+# def d2_cubic_spline(r):   return where(r > 1., 0., where(r < 0.5,
+#                                                          18.*r - 6.,
+#                                                          -6.*r + 6.) )
+
+# def d2_quartic_spline(r): return where(r > 1., 0., where(r < 0.2,
+#                                                          72.*r**2 - 4.8,
+#                                                          where(r < 0.6,
+#                                                                -48.*r**2 + 48.*r  - (48./5.),
+#                                                                12.*r**2 - 24.*r + 12.)))
+
+# def d2_quintic_spline(r): return where(r > 1., 0., where(r < 1./3.,
+#                                                          -200.*r**3 + 120.*r**2 - (40./9.),
+#                                                          where(r < 2./3.,
+#                                                                100.*r**3 - 180.*r**2 + 100.*r - (140./9.),
+#                                                                -20.*r**3 + 60.*r**2 - 60.*r + 20)))
+# def d2_wendlandC2(r):     return where(r > 1., 0., 80.*r**3 - 180.*r**2 + 120.*r - 20.)
+# def d2_wendlandC4(r):     return where(r > 1., 0., 653.3333*r**6 - 2688.*r**5 + 4200.*r**4 - 2986.667*r**3 + 840.*r**2 - 18.6667)
+# def d2_wendlandC6(r):     return where(r > 1., 0., 3520.*r**9 - 20790.*r**8 + 50688.*r**7 - 64680.*r**6 + 44352.*r**5 - 13860.*r**4 + 792.*r**2 - 22)
+# def d2_Gaussian(r,h): return (32*sqrt(2)/(PI**(3./2.)*h**7)) * r**2 * exp(-2.*r**2 / (h**2)) - 8.*sqrt(2.)/(PI**(3./2.) * h**5) * exp(- 2.*r**2 / (h**2))
+
 
+# Derivative of kernel definitions (3D)
+def dWdx_cubic_spline(r):   return C_cubic      * d_cubic_spline(r / H_cubic)     / H_cubic**4
+def dWdx_quartic_spline(r): return C_quartic    * d_quartic_spline(r / H_quartic) / H_quartic**4
+def dWdx_quintic_spline(r): return C_quintic    * d_quintic_spline(r / H_quintic) / H_quintic**4
+def dWdx_WendlandC2(r):     return C_WendlandC2 * d_wendlandC2(r / H_WendlandC2)  / H_WendlandC2**4
+def dWdx_WendlandC4(r):     return C_WendlandC4 * d_wendlandC4(r / H_WendlandC4)  / H_WendlandC4**4
+def dWdx_WendlandC6(r):     return C_WendlandC6 * d_wendlandC6(r / H_WendlandC6)  / H_WendlandC6**4
 
 # Derivative of kernel definitions (3D)
-def dW_cubic_spline(r):   return C_cubic      * d_cubic_spline(r / H_cubic)     / H_cubic**4
-def dW_quartic_spline(r): return C_quartic    * d_quartic_spline(r / H_quartic) / H_quartic**4
-def dW_quintic_spline(r): return C_quintic    * d_quintic_spline(r / H_quintic) / H_quintic**4
-def dW_WendlandC2(r):     return C_WendlandC2 * d_wendlandC2(r / H_WendlandC2)  / H_WendlandC2**4
-def dW_WendlandC4(r):     return C_WendlandC4 * d_wendlandC4(r / H_WendlandC4)  / H_WendlandC4**4
-def dW_WendlandC6(r):     return C_WendlandC6 * d_wendlandC6(r / H_WendlandC6)  / H_WendlandC6**4
+def dWdh_cubic_spline(r):   return (3. * cubic_spline(r / H_cubic) + (r / H_cubic) * d_cubic_spline(r / H_cubic)) * (-C_cubic / H_cubic**4) 
+def dWdh_quartic_spline(r): return (3. * quartic_spline(r / H_quartic) + (r / H_quartic) * d_quartic_spline(r / H_quartic)) * (-C_quartic / H_quartic**4) 
+def dWdh_quintic_spline(r): return (3. * quintic_spline(r / H_quintic) + (r / H_quintic) * d_quintic_spline(r / H_quintic)) * (-C_quintic / H_quintic**4) 
+def dWdh_WendlandC2(r):     return (3. * wendlandC2(r / H_WendlandC2) + (r / H_WendlandC2) * d_wendlandC2(r / H_WendlandC2)) * (-C_WendlandC2 / H_WendlandC2**4) 
+def dWdh_WendlandC4(r):     return (3. * wendlandC4(r / H_WendlandC4) + (r / H_WendlandC4) * d_wendlandC4(r / H_WendlandC4)) * (-C_WendlandC4 / H_WendlandC4**4) 
+def dWdh_WendlandC6(r):     return (3. * wendlandC6(r / H_WendlandC6) + (r / H_WendlandC6) * d_wendlandC6(r / H_WendlandC6)) * (-C_WendlandC6 / H_WendlandC6**4) 
+
 
 # Second derivative of kernel definitions (3D)
-def d2W_cubic_spline(r):   return C_cubic      * d2_cubic_spline(r / H_cubic)     / H_cubic**5
-def d2W_quartic_spline(r): return C_quartic    * d2_quartic_spline(r / H_quartic) / H_quartic**5
-def d2W_quintic_spline(r): return C_quintic    * d2_quintic_spline(r / H_quintic) / H_quintic**5
-def d2W_WendlandC2(r):     return C_WendlandC2 * d2_wendlandC2(r / H_WendlandC2)  / H_WendlandC2**5
-def d2W_WendlandC4(r):     return C_WendlandC4 * d2_wendlandC4(r / H_WendlandC4)  / H_WendlandC4**5
-def d2W_WendlandC6(r):     return C_WendlandC6 * d2_wendlandC6(r / H_WendlandC6)  / H_WendlandC6**5
+#def d2W_cubic_spline(r):   return C_cubic      * d2_cubic_spline(r / H_cubic)     / H_cubic**5
+#def d2W_quartic_spline(r): return C_quartic    * d2_quartic_spline(r / H_quartic) / H_quartic**5
+#def d2W_quintic_spline(r): return C_quintic    * d2_quintic_spline(r / H_quintic) / H_quintic**5
+#def d2W_WendlandC2(r):     return C_WendlandC2 * d2_wendlandC2(r / H_WendlandC2)  / H_WendlandC2**5
+#def d2W_WendlandC4(r):     return C_WendlandC4 * d2_wendlandC4(r / H_WendlandC4)  / H_WendlandC4**5
+#def d2W_WendlandC6(r):     return C_WendlandC6 * d2_wendlandC6(r / H_WendlandC6)  / H_WendlandC6**5
 
 
 figure()
 subplot(211)
 
 plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7)
-plot(xx, d_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
-plot(xx, dW_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$")
-plot(xx, dW_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$")
-plot(xx, dW_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$")
-plot(xx, dW_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$")
-plot(xx, dW_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$")
-plot(xx, dW_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$")
+plot(xx, d_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$", lw=1.5)
+plot(xx, dWdx_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$", lw=1.5)
+plot(xx, dWdx_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$", lw=1.5)
+plot(xx, dWdx_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$", lw=1.5)
+plot(xx, dWdx_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$", lw=1.5)
+plot(xx, dWdx_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$", lw=1.5)
+plot(xx, dWdx_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$", lw=1.5)
 
 maxY = d_Gaussian(h/2, h)
 
@@ -304,37 +337,35 @@ plot([dx, dx], [2*maxY, 0.1], 'k:', linewidth=0.5)
 
 xlim(0., 2.5*h)
 gca().xaxis.set_ticklabels([])
-ylim(1.1*maxY, -0.1*maxY)
+ylim(1.3*maxY, -0.1*maxY)
 xlabel("$r$", labelpad=0)
 ylabel("$\\partial W(r,h)/\\partial r$", labelpad=0.5)
-legend(loc="lower right")
+legend(loc="lower right", frameon=False, handletextpad=0.1, handlelength=1.2, fontsize=8)
 
 
 
 subplot(212)
-
-maxY = d2_Gaussian(h,h)
-plot([h/2, h/2], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5)
-plot([h, h], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5)
-plot([dx, dx], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5)
-text(h/2, -3.*maxY, "$\\sigma\\equiv h/2$", rotation=90, backgroundcolor='w', ha='center', va='bottom')
-text(h, -3.*maxY, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom')
-text(dx, -3.*maxY, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom')
+plot([h/2, h/2], [0.77*maxY, -0.15*maxY], 'k:', linewidth=0.5)
+plot([h, h], [0.77*maxY, -0.15*maxY], 'k:', linewidth=0.5)
+plot([dx, dx], [0.77*maxY, -0.15*maxY], 'k:', linewidth=0.5)
+text(h/2, 1.25*maxY, "$\\sigma\\equiv h/2$", rotation=90, ha='center', va='bottom', fontsize=9)
+text(h, 1.25*maxY, "$h\\equiv\\eta\\langle x\\rangle$", rotation=90, ha='center', va='bottom', fontsize=9)
+text(dx, 1.25*maxY, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, ha='center', va='bottom', fontsize=9)
 
 
 plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7)
-plot(xx, d2_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
-plot(xx, d2W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$")
-plot(xx, d2W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$")
-plot(xx, d2W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$")
-plot(xx, d2W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$")
-plot(xx, d2W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$")
-plot(xx, d2W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$")
+#plot(xx, dh_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$")
+plot(xx, dWdh_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$", lw=1.5)
+plot(xx, dWdh_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$", lw=1.5)
+plot(xx, dWdh_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$", lw=1.5)
+plot(xx, dWdh_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$", lw=1.5)
+plot(xx, dWdh_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$", lw=1.5)
+plot(xx, dWdh_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$", lw=1.5)
 
 xlim(0., 2.5*h)
-ylim(-3.2*maxY, 1.3*maxY)
-xlabel("$r$", labelpad=0)
-ylabel("$\\partial^2 W(r,h)/\\partial r^2$", labelpad=0.5)
+ylim(1.3*maxY, -0.15*maxY)
+xlabel("$r$", labelpad=-1)
+ylabel("$\\partial W(r,h)/\\partial h$", labelpad=0.5)
 
 
 savefig("kernel_derivatives.pdf")
diff --git a/theory/SPH/Kernels/run.sh b/theory/SPH/Kernels/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5f2c8569deebad81203ea8cc27c348a0d1607c0a
--- /dev/null
+++ b/theory/SPH/Kernels/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+python kernels.py
+pdflatex -jobname=kernel_definitions kernel_definitions_standalone.tex
+bibtex kernel_definitions.aux
+pdflatex -jobname=kernel_definitions kernel_definitions_standalone.tex
+pdflatex -jobname=kernel_definitions kernel_definitions_standalone.tex
diff --git a/theory/SPH/bibliography.bib b/theory/SPH/bibliography.bib
new file mode 100644
index 0000000000000000000000000000000000000000..4bcb0e1939a257d54c4c0aa99495d7568838b4f8
--- /dev/null
+++ b/theory/SPH/bibliography.bib
@@ -0,0 +1,116 @@
+@ARTICLE{Price2012,
+   author = {{Price}, D.~J.},
+    title = "{Smoothed particle hydrodynamics and magnetohydrodynamics}",
+  journal = {Journal of Computational Physics},
+archivePrefix = "arXiv",
+   eprint = {1012.1885},
+ primaryClass = "astro-ph.IM",
+     year = 2012,
+    month = feb,
+   volume = 231,
+    pages = {759-794},
+      doi = {10.1016/j.jcp.2010.12.011},
+   adsurl = {http://adsabs.harvard.edu/abs/2012JCoPh.231..759P},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+@ARTICLE{Springel2005,
+   author = {{Springel}, V.},
+    title = "{The cosmological simulation code GADGET-2}",
+  journal = {\mnras},
+   eprint = {astro-ph/0505010},
+ keywords = {methods: numerical, galaxies: interactions, dark matter},
+     year = 2005,
+    month = dec,
+   volume = 364,
+    pages = {1105-1134},
+      doi = {10.1111/j.1365-2966.2005.09655.x},
+   adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+
+@ARTICLE{Hopkins2013,
+   author = {{Hopkins}, P.~F.},
+    title = "{A general class of Lagrangian smoothed particle hydrodynamics methods and implications for fluid mixing problems}",
+  journal = {\mnras},
+archivePrefix = "arXiv",
+   eprint = {1206.5006},
+ primaryClass = "astro-ph.IM",
+ keywords = {hydrodynamics, instabilities, turbulence, methods: numerical, cosmology: theory},
+     year = 2013,
+    month = feb,
+   volume = 428,
+    pages = {2840-2856},
+      doi = {10.1093/mnras/sts210},
+   adsurl = {http://adsabs.harvard.edu/abs/2013MNRAS.428.2840H},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@ARTICLE{Springel2002,
+   author = {{Springel}, V. and {Hernquist}, L.},
+    title = "{Cosmological smoothed particle hydrodynamics simulations: the entropy equation}",
+  journal = {\mnras},
+   eprint = {astro-ph/0111016},
+ keywords = {methods: numerical, galaxies: evolution, galaxies: starburst, methods: numerical, galaxies: evolution, galaxies: starburst},
+     year = 2002,
+    month = jul,
+   volume = 333,
+    pages = {649-664},
+      doi = {10.1046/j.1365-8711.2002.05445.x},
+   adsurl = {http://adsabs.harvard.edu/abs/2002MNRAS.333..649S},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@ARTICLE{Balsara1995,
+   author = {{Balsara}, D.~S.},
+    title = "{von Neumann stability analysis of smooth particle hydrodynamics--suggestions for optimal algorithms}",
+  journal = {Journal of Computational Physics},
+     year = 1995,
+   volume = 121,
+    pages = {357-372},
+      doi = {10.1016/S0021-9991(95)90221-X},
+   adsurl = {http://adsabs.harvard.edu/abs/1995JCoPh.121..357B},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+@ARTICLE{Schaller2015,
+   author = {{Schaller}, M. and {Dalla Vecchia}, C. and {Schaye}, J. and 
+	{Bower}, R.~G. and {Theuns}, T. and {Crain}, R.~A. and {Furlong}, M. and 
+	{McCarthy}, I.~G.},
+    title = "{The EAGLE simulations of galaxy formation: the importance of the hydrodynamics scheme}",
+  journal = {\mnras},
+archivePrefix = "arXiv",
+   eprint = {1509.05056},
+ keywords = {methods: numerical, galaxies: clusters: intracluster medium, galaxies: formation, cosmology: theory},
+     year = 2015,
+    month = dec,
+   volume = 454,
+    pages = {2277-2291},
+      doi = {10.1093/mnras/stv2169},
+   adsurl = {http://adsabs.harvard.edu/abs/2015MNRAS.454.2277S},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+
+
+
+@ARTICLE{Dehnen2012,
+   author = {{Dehnen}, W. and {Aly}, H.},
+    title = "{Improving convergence in smoothed particle hydrodynamics simulations without pairing instability}",
+  journal = {\mnras},
+archivePrefix = "arXiv",
+   eprint = {1204.2471},
+ primaryClass = "astro-ph.IM",
+ keywords = {hydrodynamics, methods: numerical },
+     year = 2012,
+    month = sep,
+   volume = 425,
+    pages = {1068-1082},
+      doi = {10.1111/j.1365-2966.2012.21439.x},
+   adsurl = {http://adsabs.harvard.edu/abs/2012MNRAS.425.1068D},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
diff --git a/theory/SPH/kernel/run.sh b/theory/SPH/kernel/run.sh
deleted file mode 100755
index 61b469025b705f298b367963700e99fb13f13bd4..0000000000000000000000000000000000000000
--- a/theory/SPH/kernel/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-python kernels.py
-pdflatex kernel_definitions.tex
-pdflatex kernel_definitions.tex
diff --git a/theory/SPH/run.sh b/theory/SPH/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..651aadad79c2471f58221e2b4fcad7a09ab12256
--- /dev/null
+++ b/theory/SPH/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+cd Kernels
+python kernels.py
+cp kernels.pdf ..
+cp kernel_derivatives.pdf ..
+cd ..
+pdflatex swift_sph.tex
+bibtex swift_sph.aux 
+pdflatex swift_sph.tex
+pdflatex swift_sph.tex
diff --git a/theory/SPH/swift_sph.tex b/theory/SPH/swift_sph.tex
new file mode 100644
index 0000000000000000000000000000000000000000..693df8cbc7cc9d03e98bdc80725572016ccf7bd0
--- /dev/null
+++ b/theory/SPH/swift_sph.tex
@@ -0,0 +1,38 @@
+\documentclass[fleqn, usenatbib, useAMS]{mnras}
+\usepackage{graphicx}
+\usepackage{amsmath,paralist,xcolor,xspace,amssymb}
+\usepackage{times}
+\usepackage[super]{nth}
+
+\newcommand{\swift}{{\sc swift}\xspace}
+\newcommand{\gadget}{{\sc gadget}\xspace}
+\newcommand{\dd}[2]{\frac{\partial #1}{\partial #2}}
+\renewcommand{\vec}[1]{{\mathbf{#1}}}
+\newcommand{\Wij}{\overline{\nabla_xW_{ij}}}
+\newcommand{\tbd}{\textcolor{red}{TO BE DONE}}
+
+\newcommand{\MinimalSPH}{\textsc{minimal-sph}\xspace}
+\newcommand{\GadgetSPH}{\textsc{gadget-sph}\xspace}
+\newcommand{\PESPH}{\textsc{pe-sph}\xspace}
+%\setlength{\mathindent}{0pt}
+
+%opening
+\title{SPH implementation in \swift}
+\author{Matthieu Schaller}
+
+\begin{document}
+
+\maketitle
+\section{Kernel functions}
+\input{Kernels/kernel_definitions}
+
+\section{Equation of state}
+\input{EoS/eos}
+
+\section{SPH flavours}
+\input{Flavours/sph_flavours}
+
+\bibliographystyle{mnras}
+\bibliography{./bibliography}
+
+\end{document}