diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml
index cc5d87ca025fbb950b8b5c46c62831c5ec5bb003..53eb99765d859a26dcdeb8c9148fb74c058c1544 100644
--- a/examples/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_12/eagle_12.yml
@@ -27,7 +27,7 @@ Statistics:
 Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration.
   epsilon:               0.001    # Softening length (in internal units).
-  theta:                 0.7      # Opening angle (Multipole acceptance criterion)
+  theta:                 0.85     # Opening angle (Multipole acceptance criterion)
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml
index 5cced70ed7b94ab572492eb57eb929925bb3157a..c01abfe94b98e8366ac05f1e3bce50a3931738c5 100644
--- a/examples/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_25/eagle_25.yml
@@ -13,6 +13,9 @@ TimeIntegration:
   dt_min:     1e-10 # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
 
+Scheduler:
+  max_top_level_cells:    20
+
 # Parameters governing the snapshots
 Snapshots:
   basename:            eagle # Common part of the name of output files
@@ -26,8 +29,8 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration. 
-  epsilon:               0.0001   # Softening length (in internal units).
-  theta:                 0.7      # Opening angle (Multipole acceptance criterion)
+  epsilon:               0.001    # Softening length (in internal units).
+  theta:                 0.85     # Opening angle (Multipole acceptance criterion)
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_50/eagle_50.yml b/examples/EAGLE_50/eagle_50.yml
index 05199ae8e7e36164e592ad94ac608540381b485f..2a6b1ef156f220ebc63bded8bbd80b21fcf6cde2 100644
--- a/examples/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_50/eagle_50.yml
@@ -26,8 +26,8 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration.
-  epsilon:               0.0001   # Softening length (in internal units).
-  theta:                 0.7      # Opening angle (Multipole acceptance criterion)
+  epsilon:               0.001    # Softening length (in internal units).
+  theta:                 0.85     # Opening angle (Multipole acceptance criterion)
 
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml
index cf00a17694e9848825d96e1e5267961908dbbbb9..6586d0d5bb5467ae55d18c43f83f01b4c715ec86 100644
--- a/examples/EAGLE_6/eagle_6.yml
+++ b/examples/EAGLE_6/eagle_6.yml
@@ -6,6 +6,10 @@ InternalUnitSystem:
   UnitCurrent_in_cgs:  1             # Amperes
   UnitTemp_in_cgs:     1             # Kelvin
 
+Scheduler:
+  max_top_level_cells: 8
+  tasks_per_cell: 50
+  
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
@@ -26,8 +30,8 @@ Statistics:
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration.
-  theta:                 0.85     # Opening angle (Multipole acceptance criterion)
   epsilon:               0.001    # Softening length (in internal units).
+  theta:                 0.85     # Opening angle (Multipole acceptance criterion)
   
 # Parameters for the hydrodynamics scheme
 SPH:
diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml
index e59d677b308ca70f212f74c7e4d8b79f015c77a9..1abb256671f1cc8c87daa711bd63f7ea6abdbbab 100644
--- a/examples/UniformDMBox/uniformBox.yml
+++ b/examples/UniformDMBox/uniformBox.yml
@@ -9,9 +9,9 @@ InternalUnitSystem:
 # Parameters governing the time integration
 TimeIntegration:
   time_begin: 0.    # The starting time of the simulation (in internal units).
-  time_end:   1.    # The end time of the simulation (in internal units).
+  time_end:   100.    # The end time of the simulation (in internal units).
   dt_min:     1e-6  # The minimal time-step size of the simulation (in internal units).
-  dt_max:     1e-3  # The maximal time-step size of the simulation (in internal units).
+  dt_max:     1.  # The maximal time-step size of the simulation (in internal units).
 
 Scheduler:
   max_top_level_cells: 8
@@ -21,18 +21,18 @@ Scheduler:
 Snapshots:
   basename:            uniformDMBox # Common part of the name of output files
   time_first:          0.           # Time of the first output (in internal units)
-  delta_time:          0.01         # Time difference between consecutive outputs (in internal units)
+  delta_time:          10.         # Time difference between consecutive outputs (in internal units)
 
 # Parameters for the self-gravity scheme
 Gravity:
   eta:                   0.025    # Constant dimensionless multiplier for time integration. 
-  theta:                 0.7      # Opening angle (Multipole acceptance criterion)
-  epsilon:               0.00001  # Softening length (in internal units).
+  theta:                 0.8      # Opening angle (Multipole acceptance criterion)
+  epsilon:               0.01     # Softening length (in internal units).
  
 # Parameters governing the conserved quantities statistics
 Statistics:
-  delta_time:          1e-2 # Time between statistics output
+  delta_time:          5. # Time between statistics output
 
 # Parameters related to the initial conditions
 InitialConditions:
-  file_name:  ./uniformDMBox_50.hdf5     # The file to read
+  file_name:  ./uniformDMBox_16.hdf5     # The file to read
diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py
index a689f007d4a6001b73ccfac9d55d47d2863cbf5c..bd20d0f796598d8a9f0b3e906bbb6048ac03b866 100755
--- a/examples/analyse_tasks.py
+++ b/examples/analyse_tasks.py
@@ -52,11 +52,10 @@ infile = args.input
 
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init_grav", "ghost", "extra_ghost", "drift_part",
-             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
-             "grav_top_level", "grav_long_range", "grav_mm", "grav_down",
-             "cooling", "sourceterms", "count"]
-
+             "init_grav", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart",
+             "kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
+             "grav_long_range", "grav_ghost_in", "grav_ghost_out", "grav_mm", "grav_down", "cooling",
+             "sourceterms", "count"]
 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
             "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
 
diff --git a/examples/check_ngbs.py b/examples/check_ngbs.py
index 6b403c6a4555c442e3004921748c57890cdb2e0f..fca975e76457445480eb7b19e60e8f9174dee941 100644
--- a/examples/check_ngbs.py
+++ b/examples/check_ngbs.py
@@ -18,8 +18,6 @@ inputFile2 = ""
 # Check list of density neighbours and check that they are correct.
 def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, num_invalid, acc):
 
-    error_val = False
-
     for k in range(0,num_invalid):
 
         # Filter neighbour lists for valid particle ids
@@ -28,7 +26,21 @@ def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, nu
 
         # Check neighbour lists for differences
         id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort))
+       
+        # Check for duplicate IDs
+        duplicate_check_naive = len(filter_neigh_naive) != len(set(filter_neigh_naive))
+        duplicate_check_sort = len(filter_neigh_sort) != len(set(filter_neigh_sort))
+
+        if duplicate_check_naive:
+            print "Duplicate neighbour ID found in: ", inputFile1
+            print filter_neigh_naive
+            return True
         
+        if duplicate_check_sort:
+            print "Duplicate neighbour ID found in: ", inputFile2
+            print filter_neigh_sort
+            return True
+
         pid = pids[mask][k]
 
         # Loop over discrepancies and check if they are actually neighbours
@@ -53,9 +65,9 @@ def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, nu
             if diff < acc * hig2:
                 print "Missing interaction due to precision issue will be ignored."
             else:
-                error_val = True
+                return True
 
-    return error_val
+    return False
 
 # Check list of force neighbours and check that they are correct.
 def check_force_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, num_invalid, acc):
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 6f9dca798b5a72a7adea3c20ae74c1071fe30d40..60c70cf9f8167ed44829765bd5cf476c2a800f84 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -110,9 +110,9 @@ pl.rcParams.update(PLOT_PARAMS)
 
 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
+             "init_grav", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart",
              "kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
-             "grav_long_range", "grav_mm", "grav_down", "cooling",
+             "grav_long_range", "grav_ghost_in", "grav_ghost_out", "grav_mm", "grav_down", "cooling",
              "sourceterms", "count"]
 
 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
@@ -123,7 +123,7 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
              "sub_self/density", "pair/force", "pair/density", "pair/grav",
              "sub_pair/force",
              "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
-             "recv/tend", "send/tend"]
+             "recv/tend", "send/tend", "recv/gpart", "send/gpart"]
 
 #  A number of colours for the various types. Recycled when there are
 #  more task types than colours...
diff --git a/src/active.h b/src/active.h
index f9ed4a0d40127f1544c744a61b05515a19857b05..9eb4123392f4b1b6bca2f68d59991ff90b92614d 100644
--- a/src/active.h
+++ b/src/active.h
@@ -82,19 +82,19 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted(
  * @param e The #engine containing information about the current time.
  * @return 1 if the #cell contains at least an active particle, 0 otherwise.
  */
-__attribute__((always_inline)) INLINE static int cell_is_active(
+__attribute__((always_inline)) INLINE static int cell_is_active_hydro(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (c->ti_end_min < e->ti_current)
+  if (c->ti_hydro_end_min < e->ti_current)
     error(
         "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
         "e->ti_current=%lld (t=%e)",
-        c->ti_end_min, c->ti_end_min * e->timeBase, e->ti_current,
+        c->ti_hydro_end_min, c->ti_hydro_end_min * e->timeBase, e->ti_current,
         e->ti_current * e->timeBase);
 #endif
 
-  return (c->ti_end_min == e->ti_current);
+  return (c->ti_hydro_end_min == e->ti_current);
 }
 
 /**
@@ -104,18 +104,61 @@ __attribute__((always_inline)) INLINE static int cell_is_active(
  * @param e The #engine containing information about the current time.
  * @return 1 if all particles in a #cell are active, 0 otherwise.
  */
-__attribute__((always_inline)) INLINE static int cell_is_all_active(
+__attribute__((always_inline)) INLINE static int cell_is_all_active_hydro(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (c->ti_end_max < e->ti_current)
+  if (c->ti_hydro_end_max < e->ti_current)
     error(
         "cell in an impossible time-zone! c->ti_end_max=%lld "
         "e->ti_current=%lld",
-        c->ti_end_max, e->ti_current);
+        c->ti_hydro_end_max, e->ti_current);
 #endif
 
-  return (c->ti_end_max == e->ti_current);
+  return (c->ti_hydro_end_max == e->ti_current);
+}
+
+/**
+ * @brief Does a cell contain any g-particle finishing their time-step now ?
+ *
+ * @param c The #cell.
+ * @param e The #engine containing information about the current time.
+ * @return 1 if the #cell contains at least an active particle, 0 otherwise.
+ */
+__attribute__((always_inline)) INLINE static int cell_is_active_gravity(
+    const struct cell *c, const struct engine *e) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->ti_gravity_end_min < e->ti_current)
+    error(
+        "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
+        "e->ti_current=%lld (t=%e)",
+        c->ti_gravity_end_min, c->ti_gravity_end_min * e->timeBase,
+        e->ti_current, e->ti_current * e->timeBase);
+#endif
+
+  return (c->ti_gravity_end_min == e->ti_current);
+}
+
+/**
+ * @brief Are *all* g-particles in a cell finishing their time-step now ?
+ *
+ * @param c The #cell.
+ * @param e The #engine containing information about the current time.
+ * @return 1 if all particles in a #cell are active, 0 otherwise.
+ */
+__attribute__((always_inline)) INLINE static int cell_is_all_active_gravity(
+    const struct cell *c, const struct engine *e) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->ti_gravity_end_max < e->ti_current)
+    error(
+        "cell in an impossible time-zone! c->ti_end_max=%lld "
+        "e->ti_current=%lld",
+        c->ti_gravity_end_max, e->ti_current);
+#endif
+
+  return (c->ti_gravity_end_max == e->ti_current);
 }
 
 /**
@@ -215,19 +258,41 @@ __attribute__((always_inline)) INLINE static int spart_is_active(
  * @param e The #engine containing information about the current time.
  * @return 1 if the #cell contains at least an active particle, 0 otherwise.
  */
-__attribute__((always_inline)) INLINE static int cell_is_starting(
+__attribute__((always_inline)) INLINE static int cell_is_starting_hydro(
     const struct cell *c, const struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (c->ti_beg_max > e->ti_current)
+  if (c->ti_hydro_beg_max > e->ti_current)
     error(
         "cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and "
         "e->ti_current=%lld (t=%e)",
-        c->ti_beg_max, c->ti_beg_max * e->timeBase, e->ti_current,
+        c->ti_hydro_beg_max, c->ti_hydro_beg_max * e->timeBase, e->ti_current,
         e->ti_current * e->timeBase);
 #endif
 
-  return (c->ti_beg_max == e->ti_current);
+  return (c->ti_hydro_beg_max == e->ti_current);
+}
+
+/**
+ * @brief Does a cell contain any g-particle starting their time-step now ?
+ *
+ * @param c The #cell.
+ * @param e The #engine containing information about the current time.
+ * @return 1 if the #cell contains at least an active particle, 0 otherwise.
+ */
+__attribute__((always_inline)) INLINE static int cell_is_starting_gravity(
+    const struct cell *c, const struct engine *e) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (c->ti_gravity_beg_max > e->ti_current)
+    error(
+        "cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and "
+        "e->ti_current=%lld (t=%e)",
+        c->ti_gravity_beg_max, c->ti_gravity_beg_max * e->timeBase,
+        e->ti_current, e->ti_current * e->timeBase);
+#endif
+
+  return (c->ti_gravity_beg_max == e->ti_current);
 }
 
 /**
diff --git a/src/cell.c b/src/cell.c
index 9c5d1bd3aae8a0982db79c8fd370354c5c840dac..9aa2e80636d98a829371fed06a1170f28cb85aa8 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -173,14 +173,20 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
 
   /* Start by packing the data of the current cell. */
   pc->h_max = c->h_max;
-  pc->ti_end_min = c->ti_end_min;
-  pc->ti_end_max = c->ti_end_max;
+  pc->ti_hydro_end_min = c->ti_hydro_end_min;
+  pc->ti_hydro_end_max = c->ti_hydro_end_max;
+  pc->ti_gravity_end_min = c->ti_gravity_end_min;
+  pc->ti_gravity_end_max = c->ti_gravity_end_max;
   pc->ti_old_part = c->ti_old_part;
   pc->ti_old_gpart = c->ti_old_gpart;
+  pc->ti_old_multipole = c->ti_old_multipole;
   pc->count = c->count;
   pc->gcount = c->gcount;
   pc->scount = c->scount;
   c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag;
+#ifdef SWIFT_DEBUG_CHECKS
+  pc->cellID = c->cellID;
+#endif
 
   /* Fill in the progeny, depth-first recursion. */
   int count = 1;
@@ -217,14 +223,20 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
 
   /* Unpack the current pcell. */
   c->h_max = pc->h_max;
-  c->ti_end_min = pc->ti_end_min;
-  c->ti_end_max = pc->ti_end_max;
+  c->ti_hydro_end_min = pc->ti_hydro_end_min;
+  c->ti_hydro_end_max = pc->ti_hydro_end_max;
+  c->ti_gravity_end_min = pc->ti_gravity_end_min;
+  c->ti_gravity_end_max = pc->ti_gravity_end_max;
   c->ti_old_part = pc->ti_old_part;
   c->ti_old_gpart = pc->ti_old_gpart;
+  c->ti_old_multipole = pc->ti_old_multipole;
   c->count = pc->count;
   c->gcount = pc->gcount;
   c->scount = pc->scount;
   c->tag = pc->tag;
+#ifdef SWIFT_DEBUG_CHECKS
+  c->cellID = pc->cellID;
+#endif
 
   /* Number of new cells created. */
   int count = 1;
@@ -283,7 +295,10 @@ int cell_pack_end_step(struct cell *restrict c,
 #ifdef WITH_MPI
 
   /* Pack this cell's data. */
-  pcells[0].ti_end_min = c->ti_end_min;
+  pcells[0].ti_hydro_end_min = c->ti_hydro_end_min;
+  pcells[0].ti_hydro_end_max = c->ti_hydro_end_max;
+  pcells[0].ti_gravity_end_min = c->ti_gravity_end_min;
+  pcells[0].ti_gravity_end_max = c->ti_gravity_end_max;
   pcells[0].dx_max_part = c->dx_max_part;
   pcells[0].dx_max_gpart = c->dx_max_gpart;
 
@@ -317,7 +332,10 @@ int cell_unpack_end_step(struct cell *restrict c,
 #ifdef WITH_MPI
 
   /* Unpack this cell's data. */
-  c->ti_end_min = pcells[0].ti_end_min;
+  c->ti_hydro_end_min = pcells[0].ti_hydro_end_min;
+  c->ti_hydro_end_max = pcells[0].ti_hydro_end_max;
+  c->ti_gravity_end_min = pcells[0].ti_gravity_end_min;
+  c->ti_gravity_end_max = pcells[0].ti_gravity_end_max;
   c->dx_max_part = pcells[0].dx_max_part;
   c->dx_max_gpart = pcells[0].dx_max_gpart;
 
@@ -337,6 +355,71 @@ int cell_unpack_end_step(struct cell *restrict c,
 #endif
 }
 
+/**
+ * @brief Pack the multipole information of the given cell and all it's
+ * sub-cells.
+ *
+ * @param c The #cell.
+ * @param pcells (output) The multipole information we pack into
+ *
+ * @return The number of packed cells.
+ */
+int cell_pack_multipoles(struct cell *restrict c,
+                         struct gravity_tensors *restrict pcells) {
+
+#ifdef WITH_MPI
+
+  /* Pack this cell's data. */
+  pcells[0] = *c->multipole;
+
+  /* Fill in the progeny, depth-first recursion. */
+  int count = 1;
+  for (int k = 0; k < 8; k++)
+    if (c->progeny[k] != NULL) {
+      count += cell_pack_multipoles(c->progeny[k], &pcells[count]);
+    }
+
+  /* Return the number of packed values. */
+  return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
+}
+
+/**
+ * @brief Unpack the multipole information of a given cell and its sub-cells.
+ *
+ * @param c The #cell
+ * @param pcells The multipole information to unpack
+ *
+ * @return The number of cells created.
+ */
+int cell_unpack_multipoles(struct cell *restrict c,
+                           struct gravity_tensors *restrict pcells) {
+
+#ifdef WITH_MPI
+
+  /* Unpack this cell's data. */
+  *c->multipole = pcells[0];
+
+  /* Fill in the progeny, depth-first recursion. */
+  int count = 1;
+  for (int k = 0; k < 8; k++)
+    if (c->progeny[k] != NULL) {
+      count += cell_unpack_multipoles(c->progeny[k], &pcells[count]);
+    }
+
+  /* Return the number of packed values. */
+  return count;
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+  return 0;
+#endif
+}
+
 /**
  * @brief Lock a cell for access to its array of #part and hold its parents.
  *
@@ -1108,9 +1191,6 @@ void cell_check_multipole_drift_point(struct cell *c, void *data) {
 
   const integertime_t ti_drift = *(integertime_t *)data;
 
-  /* Only check local cells */
-  if (c->nodeID != engine_rank) return;
-
   if (c->ti_old_multipole != ti_drift)
     error(
         "Cell multipole in an incorrect time-zone! c->ti_old_multipole=%lld "
@@ -1321,13 +1401,13 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) {
 
   /* Set the do_sub_drifts all the way up and activate the super drift
      if this has not yet been done. */
-  if (c == c->super) {
+  if (c == c->super_hydro) {
     scheduler_activate(s, c->drift_part);
   } else {
     for (struct cell *parent = c->parent;
          parent != NULL && !parent->do_sub_drift; parent = parent->parent) {
       parent->do_sub_drift = 1;
-      if (parent == c->super) {
+      if (parent == c->super_hydro) {
         scheduler_activate(s, parent->drift_part);
         break;
       }
@@ -1348,14 +1428,14 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) {
 
   /* Set the do_grav_sub_drifts all the way up and activate the super drift
      if this has not yet been done. */
-  if (c == c->super) {
+  if (c == c->super_gravity) {
     scheduler_activate(s, c->drift_gpart);
   } else {
     for (struct cell *parent = c->parent;
          parent != NULL && !parent->do_grav_sub_drift;
          parent = parent->parent) {
       parent->do_grav_sub_drift = 1;
-      if (parent == c->super) {
+      if (parent == c->super_gravity) {
         scheduler_activate(s, parent->drift_gpart);
         break;
       }
@@ -1367,14 +1447,14 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) {
  * @brief Activate the sorts up a cell hierarchy.
  */
 void cell_activate_sorts_up(struct cell *c, struct scheduler *s) {
-  if (c == c->super) {
+  if (c == c->super_hydro) {
     scheduler_activate(s, c->sorts);
     if (c->nodeID == engine_rank) cell_activate_drift_part(c, s);
   } else {
     for (struct cell *parent = c->parent;
          parent != NULL && !parent->do_sub_sort; parent = parent->parent) {
       parent->do_sub_sort = 1;
-      if (parent == c->super) {
+      if (parent == c->super_hydro) {
         scheduler_activate(s, parent->sorts);
         if (parent->nodeID == engine_rank) cell_activate_drift_part(parent, s);
         break;
@@ -1417,8 +1497,8 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) {
  * @param cj The second #cell we recurse in.
  * @param s The task #scheduler.
  */
-void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
-                                 struct scheduler *s) {
+void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj,
+                                       struct scheduler *s) {
   const struct engine *e = s->space->e;
 
   /* Store the current dx_max and h_max values. */
@@ -1432,7 +1512,7 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
   /* Self interaction? */
   if (cj == NULL) {
     /* Do anything? */
-    if (!cell_is_active(ci, e)) return;
+    if (!cell_is_active_hydro(ci, e)) return;
 
     /* Recurse? */
     if (cell_can_recurse_in_self_task(ci)) {
@@ -1440,10 +1520,11 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
       /* Loop over all progenies and pairs of progenies */
       for (int j = 0; j < 8; j++) {
         if (ci->progeny[j] != NULL) {
-          cell_activate_subcell_tasks(ci->progeny[j], NULL, s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[j], NULL, s);
           for (int k = j + 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
-              cell_activate_subcell_tasks(ci->progeny[j], ci->progeny[k], s);
+              cell_activate_subcell_hydro_tasks(ci->progeny[j], ci->progeny[k],
+                                                s);
         }
       }
     } else {
@@ -1467,200 +1548,200 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
       /* Regular sub-cell interactions of a single cell. */
       case 0: /* (  1 ,  1 ,  1 ) */
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         break;
 
       case 1: /* (  1 ,  1 ,  0 ) */
         if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[0], s);
         if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s);
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[1], s);
         break;
 
       case 2: /* (  1 ,  1 , -1 ) */
         if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s);
         break;
 
       case 3: /* (  1 ,  0 ,  1 ) */
         if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[0], s);
         if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s);
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[2], s);
         break;
 
       case 4: /* (  1 ,  0 ,  0 ) */
         if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[0], s);
         if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[1], s);
         if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[2], s);
         if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s);
         if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[0], s);
         if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[1], s);
         if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s);
         if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[3], s);
         if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[0], s);
         if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s);
         if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[2], s);
         if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[3], s);
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[1], s);
         if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[2], s);
         if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[3], s);
         break;
 
       case 5: /* (  1 ,  0 , -1 ) */
         if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[1], s);
         if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s);
         if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s);
         if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[3], s);
         break;
 
       case 6: /* (  1 , -1 ,  1 ) */
         if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s);
         break;
 
       case 7: /* (  1 , -1 ,  0 ) */
         if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[2], s);
         if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s);
         if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s);
         if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[3], s);
         break;
 
       case 8: /* (  1 , -1 , -1 ) */
         if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s);
         break;
 
       case 9: /* (  0 ,  1 ,  1 ) */
         if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[0], s);
         if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[4], s);
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[4], s);
         break;
 
       case 10: /* (  0 ,  1 ,  0 ) */
         if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[0], s);
         if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[1], s);
         if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[4], s);
         if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[5], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[5], s);
         if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[0], s);
         if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[1], s);
         if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[4], s);
         if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[5], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[5], s);
         if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[0], s);
         if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s);
         if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[4], s);
         if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[5], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[5], s);
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[1], s);
         if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[4], s);
         if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[5], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[5], s);
         break;
 
       case 11: /* (  0 ,  1 , -1 ) */
         if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[1], s);
         if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[5], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[5], s);
         if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s);
         if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[5], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[5], s);
         break;
 
       case 12: /* (  0 ,  0 ,  1 ) */
         if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[0], s);
         if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[2], s);
         if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[4], s);
         if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[6], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[6], s);
         if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[0], s);
         if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[2], s);
         if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[4], s);
         if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[6], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[6], s);
         if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[0], s);
         if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s);
         if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[4], s);
         if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[6], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[6], s);
         if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s);
         if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[2], s);
         if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[4], s);
         if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
-          cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[6], s);
+          cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[6], s);
         break;
     }
 
   }
 
   /* Otherwise, activate the sorts and drifts. */
-  else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
+  else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) {
 
     /* Get the type of pair if not specified explicitly. */
     double shift[3];
@@ -1704,7 +1785,7 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
   if (cj == NULL) {
 
     /* Do anything? */
-    if (!cell_is_active(ci, e)) return;
+    if (!cell_is_active_gravity(ci, e)) return;
 
     /* Recurse? */
     if (ci->split) {
@@ -1730,7 +1811,8 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
   else {
 
     /* Anything to do here? */
-    if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+    if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e))
+      return;
 
     if (ci->ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
     if (cj->ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e);
@@ -1764,7 +1846,7 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
     else if (!ci->split && !cj->split) {
 
       /* Activate the drifts if the cells are local. */
-      if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
+      if (cell_is_active_gravity(ci, e) || cell_is_active_gravity(cj, e)) {
         if (ci->nodeID == engine_rank) cell_activate_drift_gpart(ci, s);
         if (cj->nodeID == engine_rank) cell_activate_drift_gpart(cj, s);
       }
@@ -1833,7 +1915,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci,
   const struct engine *e = sp->e;
 
   /* Do anything? */
-  if (!cell_is_active(ci, e)) return;
+  if (!cell_is_active_gravity(ci, e)) return;
 
   /* Recurse? */
   if (ci->split) {
@@ -1852,7 +1934,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci,
 }
 
 /**
- * @brief Un-skips all the tasks associated with a given cell and checks
+ * @brief Un-skips all the hydro tasks associated with a given cell and checks
  * if the space needs to be rebuilt.
  *
  * @param c the #cell.
@@ -1860,7 +1942,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci,
  *
  * @return 1 If the space needs rebuilding. 0 otherwise.
  */
-int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
+int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
 
   struct engine *e = s->space->e;
   const int nodeID = e->nodeID;
@@ -1871,8 +1953,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
     struct task *t = l->t;
     struct cell *ci = t->ci;
     struct cell *cj = t->cj;
-    const int ci_active = cell_is_active(ci, e);
-    const int cj_active = (cj != NULL) ? cell_is_active(cj, e) : 0;
+    const int ci_active = cell_is_active_hydro(ci, e);
+    const int cj_active = (cj != NULL) ? cell_is_active_hydro(cj, e) : 0;
 
     /* Only activate tasks that involve a local active cell. */
     if ((ci_active && ci->nodeID == nodeID) ||
@@ -1902,7 +1984,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
       }
       /* Store current values of dx_max and h_max. */
       else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) {
-        cell_activate_subcell_tasks(t->ci, t->cj, s);
+        cell_activate_subcell_hydro_tasks(t->ci, t->cj, s);
       }
     }
 
@@ -1922,6 +2004,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
           scheduler_activate(s, ci->recv_xv);
           if (ci_active) {
             scheduler_activate(s, ci->recv_rho);
+
 #ifdef EXTRA_HYDRO_LOOP
             scheduler_activate(s, ci->recv_gradient);
 #endif
@@ -1960,6 +2043,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
           scheduler_activate(s, cj->recv_xv);
           if (cj_active) {
             scheduler_activate(s, cj->recv_rho);
+
 #ifdef EXTRA_HYDRO_LOOP
             scheduler_activate(s, cj->recv_gradient);
 #endif
@@ -1996,15 +2080,54 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
     }
   }
 
+  /* Unskip all the other task types. */
+  if (c->nodeID == nodeID && cell_is_active_hydro(c, e)) {
+
+    for (struct link *l = c->gradient; l != NULL; l = l->next)
+      scheduler_activate(s, l->t);
+    for (struct link *l = c->force; l != NULL; l = l->next)
+      scheduler_activate(s, l->t);
+
+    if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost);
+    if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in);
+    if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out);
+    if (c->ghost != NULL) scheduler_activate(s, c->ghost);
+    if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
+    if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
+    if (c->timestep != NULL) scheduler_activate(s, c->timestep);
+    if (c->cooling != NULL) scheduler_activate(s, c->cooling);
+    if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
+  }
+
+  return rebuild;
+}
+
+/**
+ * @brief Un-skips all the gravity tasks associated with a given cell and checks
+ * if the space needs to be rebuilt.
+ *
+ * @param c the #cell.
+ * @param s the #scheduler.
+ *
+ * @return 1 If the space needs rebuilding. 0 otherwise.
+ */
+int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
+
+  struct engine *e = s->space->e;
+  const int nodeID = e->nodeID;
+  int rebuild = 0;
+
   /* Un-skip the gravity tasks involved with this cell. */
   for (struct link *l = c->grav; l != NULL; l = l->next) {
     struct task *t = l->t;
     struct cell *ci = t->ci;
     struct cell *cj = t->cj;
+    const int ci_active = cell_is_active_gravity(ci, e);
+    const int cj_active = (cj != NULL) ? cell_is_active_gravity(cj, e) : 0;
 
     /* Only activate tasks that involve a local active cell. */
-    if ((cell_is_active(ci, e) && ci->nodeID == nodeID) ||
-        (cj != NULL && cell_is_active(cj, e) && cj->nodeID == nodeID)) {
+    if ((ci_active && ci->nodeID == nodeID) ||
+        (cj_active && cj->nodeID == nodeID)) {
       scheduler_activate(s, t);
 
       /* Set the drifting flags */
@@ -2017,30 +2140,74 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
         cell_activate_subcell_grav_tasks(t->ci, t->cj, s);
       }
     }
+
+    if (t->type == task_type_pair) {
+
+#ifdef WITH_MPI
+      /* Activate the send/recv tasks. */
+      if (ci->nodeID != nodeID) {
+
+        /* If the local cell is active, receive data from the foreign cell. */
+        if (cj_active) {
+          scheduler_activate(s, ci->recv_grav);
+        }
+
+        /* If the foreign cell is active, we want its ti_end values. */
+        if (ci_active) scheduler_activate(s, ci->recv_ti);
+
+        /* Is the foreign cell active and will need stuff from us? */
+        if (ci_active) {
+
+          scheduler_activate_send(s, cj->send_grav, ci->nodeID);
+
+          /* Drift the cell which will be sent at the level at which it is
+             sent, i.e. drift the cell specified in the send task (l->t)
+             itself. */
+          cell_activate_drift_gpart(cj, s);
+        }
+
+        /* If the local cell is active, send its ti_end values. */
+        if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID);
+
+      } else if (cj->nodeID != nodeID) {
+
+        /* If the local cell is active, receive data from the foreign cell. */
+        if (ci_active) {
+          scheduler_activate(s, cj->recv_grav);
+        }
+
+        /* If the foreign cell is active, we want its ti_end values. */
+        if (cj_active) scheduler_activate(s, cj->recv_ti);
+
+        /* Is the foreign cell active and will need stuff from us? */
+        if (cj_active) {
+
+          scheduler_activate_send(s, ci->send_grav, cj->nodeID);
+
+          /* Drift the cell which will be sent at the level at which it is
+             sent, i.e. drift the cell specified in the send task (l->t)
+             itself. */
+          cell_activate_drift_gpart(ci, s);
+        }
+
+        /* If the local cell is active, send its ti_end values. */
+        if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID);
+      }
+#endif
+    }
   }
 
   /* Unskip all the other task types. */
-  if (c->nodeID == nodeID && cell_is_active(c, e)) {
-
-    for (struct link *l = c->gradient; l != NULL; l = l->next)
-      scheduler_activate(s, l->t);
-    for (struct link *l = c->force; l != NULL; l = l->next)
-      scheduler_activate(s, l->t);
+  if (c->nodeID == nodeID && cell_is_active_gravity(c, e)) {
 
-    if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost);
-    if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in);
-    if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out);
-    if (c->ghost != NULL) scheduler_activate(s, c->ghost);
     if (c->init_grav != NULL) scheduler_activate(s, c->init_grav);
+    if (c->grav_ghost_in != NULL) scheduler_activate(s, c->grav_ghost_in);
+    if (c->grav_ghost_out != NULL) scheduler_activate(s, c->grav_ghost_out);
     if (c->kick1 != NULL) scheduler_activate(s, c->kick1);
     if (c->kick2 != NULL) scheduler_activate(s, c->kick2);
     if (c->timestep != NULL) scheduler_activate(s, c->timestep);
-    if (c->grav_ghost[0] != NULL) scheduler_activate(s, c->grav_ghost[0]);
-    if (c->grav_ghost[1] != NULL) scheduler_activate(s, c->grav_ghost[1]);
     if (c->grav_down != NULL) scheduler_activate(s, c->grav_down);
     if (c->grav_long_range != NULL) scheduler_activate(s, c->grav_long_range);
-    if (c->cooling != NULL) scheduler_activate(s, c->cooling);
-    if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
   }
 
   return rebuild;
@@ -2066,6 +2233,50 @@ void cell_set_super(struct cell *c, struct cell *super) {
       if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super);
 }
 
+/**
+ * @brief Set the super-cell pointers for all cells in a hierarchy.
+ *
+ * @param c The top-level #cell to play with.
+ * @param super_hydro Pointer to the deepest cell with tasks in this part of the
+ * tree.
+ */
+void cell_set_super_hydro(struct cell *c, struct cell *super_hydro) {
+
+  /* Are we in a cell with some kind of self/pair task ? */
+  if (super_hydro == NULL && c->density != NULL) super_hydro = c;
+
+  /* Set the super-cell */
+  c->super_hydro = super_hydro;
+
+  /* Recurse */
+  if (c->split)
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        cell_set_super_hydro(c->progeny[k], super_hydro);
+}
+
+/**
+ * @brief Set the super-cell pointers for all cells in a hierarchy.
+ *
+ * @param c The top-level #cell to play with.
+ * @param super_gravity Pointer to the deepest cell with tasks in this part of
+ * the tree.
+ */
+void cell_set_super_gravity(struct cell *c, struct cell *super_gravity) {
+
+  /* Are we in a cell with some kind of self/pair task ? */
+  if (super_gravity == NULL && c->grav != NULL) super_gravity = c;
+
+  /* Set the super-cell */
+  c->super_gravity = super_gravity;
+
+  /* Recurse */
+  if (c->split)
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        cell_set_super_gravity(c->progeny[k], super_gravity);
+}
+
 /**
  * @brief Mapper function to set the super pointer of the cells.
  *
@@ -2074,8 +2285,15 @@ void cell_set_super(struct cell *c, struct cell *super) {
  * @param extra_data Unused parameter.
  */
 void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) {
+
+  const struct engine *e = (const struct engine *)extra_data;
+
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &((struct cell *)map_data)[ind];
+    if (e->policy & engine_policy_hydro) cell_set_super_hydro(c, NULL);
+    if (e->policy &
+        (engine_policy_self_gravity | engine_policy_external_gravity))
+      cell_set_super_gravity(c, NULL);
     cell_set_super(c, NULL);
   }
 }
@@ -2388,7 +2606,7 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) {
 void cell_check_timesteps(struct cell *c) {
 #ifdef SWIFT_DEBUG_CHECKS
 
-  if (c->ti_end_min == 0 && c->nr_tasks > 0)
+  if (c->ti_hydro_end_min == 0 && c->ti_gravity_end_min == 0 && c->nr_tasks > 0)
     error("Cell without assigned time-step");
 
   if (c->split) {
diff --git a/src/cell.h b/src/cell.h
index 8b62e4362011617f52c7d8efcde409200098adcd..a9ba34b0935dcd13f9bc06b17a7c6d01e130e0f6 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -80,14 +80,23 @@ struct pcell {
   /*! Maximal smoothing length. */
   double h_max;
 
-  /*! Minimal integer end-of-timestep in this cell */
-  integertime_t ti_end_min;
+  /*! Minimal integer end-of-timestep in this cell for hydro tasks */
+  integertime_t ti_hydro_end_min;
 
-  /*! Maximal integer end-of-timestep in this cell */
-  integertime_t ti_end_max;
+  /*! Maximal integer end-of-timestep in this cell for hydro tasks */
+  integertime_t ti_hydro_end_max;
 
-  /*! Maximal integer beginning-of-timestep in this cell */
-  integertime_t ti_beg_max;
+  /*! Maximal integer beginning-of-timestep in this cell for hydro tasks */
+  integertime_t ti_hydro_beg_max;
+
+  /*! Minimal integer end-of-timestep in this cell for gravity tasks */
+  integertime_t ti_gravity_end_min;
+
+  /*! Maximal integer end-of-timestep in this cell for gravity tasks */
+  integertime_t ti_gravity_end_max;
+
+  /*! Maximal integer beginning-of-timestep in this cell for gravity tasks */
+  integertime_t ti_gravity_beg_max;
 
   /*! Integer time of the last drift of the #part in this cell */
   integertime_t ti_old_part;
@@ -95,6 +104,9 @@ struct pcell {
   /*! Integer time of the last drift of the #gpart in this cell */
   integertime_t ti_old_gpart;
 
+  /*! Integer time of the last drift of the #multipole in this cell */
+  integertime_t ti_old_multipole;
+
   /*! Number of #part in this cell. */
   int count;
 
@@ -110,6 +122,11 @@ struct pcell {
   /*! Relative indices of the cell's progeny. */
   int progeny[8];
 
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Cell ID (for debugging) */
+  int cellID;
+#endif
+
 } SWIFT_STRUCT_ALIGN;
 
 /**
@@ -117,8 +134,17 @@ struct pcell {
  */
 struct pcell_step {
 
-  /*! Minimal integer end-of-timestep in this cell */
-  integertime_t ti_end_min;
+  /*! Minimal integer end-of-timestep in this cell (hydro) */
+  integertime_t ti_hydro_end_min;
+
+  /*! Minimal integer end-of-timestep in this cell (hydro) */
+  integertime_t ti_hydro_end_max;
+
+  /*! Minimal integer end-of-timestep in this cell (gravity) */
+  integertime_t ti_gravity_end_min;
+
+  /*! Minimal integer end-of-timestep in this cell (gravity) */
+  integertime_t ti_gravity_end_max;
 
   /*! Maximal distance any #part has travelled since last rebuild */
   float dx_max_part;
@@ -170,11 +196,16 @@ struct cell {
   /*! Parent cell. */
   struct cell *parent;
 
-  /*! Super cell, i.e. the highest-level parent cell that has pair/self tasks */
+  /*! Super cell, i.e. the highest-level parent cell with *any* task */
   struct cell *super;
 
-  /*! The task computing this cell's sorts. */
-  struct task *sorts;
+  /*! Super cell, i.e. the highest-level parent cell that has a hydro pair/self
+   * tasks */
+  struct cell *super_hydro;
+
+  /*! Super cell, i.e. the highest-level parent cell that has a grav pair/self
+   * tasks */
+  struct cell *super_gravity;
 
   /*! Linked list of the tasks computing this cell's hydro density. */
   struct link *density;
@@ -188,6 +219,9 @@ struct cell {
   /*! Linked list of the tasks computing this cell's gravity forces. */
   struct link *grav;
 
+  /*! The task computing this cell's sorts. */
+  struct task *sorts;
+
   /*! The multipole initialistation task */
   struct task *init_grav;
 
@@ -219,7 +253,7 @@ struct cell {
   struct task *timestep;
 
   /*! Task linking the FFT mesh to the rest of gravity tasks */
-  struct task *grav_ghost[2];
+  struct task *grav_ghost_in, *grav_ghost_out;
 
   /*! Task computing long range non-periodic gravity interactions */
   struct task *grav_long_range;
@@ -235,27 +269,33 @@ struct cell {
 
 #ifdef WITH_MPI
 
-  /* Task receiving data (positions). */
+  /* Task receiving hydro data (positions). */
   struct task *recv_xv;
 
-  /* Task receiving data (density). */
+  /* Task receiving hydro data (density). */
   struct task *recv_rho;
 
-  /* Task receiving data (gradient). */
+  /* Task receiving hydro data (gradient). */
   struct task *recv_gradient;
 
+  /* Task receiving gpart data. */
+  struct task *recv_grav;
+
   /* Task receiving data (time-step). */
   struct task *recv_ti;
 
-  /* Linked list for sending data (positions). */
+  /* Linked list for sending hydro data (positions). */
   struct link *send_xv;
 
-  /* Linked list for sending data (density). */
+  /* Linked list for sending hydro data (density). */
   struct link *send_rho;
 
-  /* Linked list for sending data (gradient). */
+  /* Linked list for sending hydro data (gradient). */
   struct link *send_gradient;
 
+  /* Linked list for sending gpart data. */
+  struct link *send_grav;
+
   /* Linked list for sending data (time-step). */
   struct link *send_ti;
 
@@ -273,14 +313,24 @@ struct cell {
 
 #endif
 
-  /*! Minimum end of (integer) time step in this cell. */
-  integertime_t ti_end_min;
+  /*! Minimum end of (integer) time step in this cell for hydro tasks. */
+  integertime_t ti_hydro_end_min;
+
+  /*! Maximum end of (integer) time step in this cell for hydro tasks. */
+  integertime_t ti_hydro_end_max;
+
+  /*! Maximum beginning of (integer) time step in this cell for hydro tasks. */
+  integertime_t ti_hydro_beg_max;
 
-  /*! Maximum end of (integer) time step in this cell. */
-  integertime_t ti_end_max;
+  /*! Minimum end of (integer) time step in this cell for gravity tasks. */
+  integertime_t ti_gravity_end_min;
 
-  /*! Maximum beginning of (integer) time step in this cell. */
-  integertime_t ti_beg_max;
+  /*! Maximum end of (integer) time step in this cell for gravity tasks. */
+  integertime_t ti_gravity_end_max;
+
+  /*! Maximum beginning of (integer) time step in this cell for gravity tasks.
+   */
+  integertime_t ti_gravity_beg_max;
 
   /*! Last (integer) time the cell's part were drifted forward in time. */
   integertime_t ti_old_part;
@@ -397,6 +447,9 @@ struct cell {
   char do_sub_sort;
 
 #ifdef SWIFT_DEBUG_CHECKS
+  /* Cell ID (for debugging) */
+  int cellID;
+
   /*! Last (integer) time the cell's sort arrays were updated. */
   integertime_t ti_sort;
 
@@ -430,6 +483,8 @@ int cell_pack(struct cell *c, struct pcell *pc);
 int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
 int cell_pack_end_step(struct cell *c, struct pcell_step *pcell);
 int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell);
+int cell_pack_multipoles(struct cell *c, struct gravity_tensors *m);
+int cell_unpack_multipoles(struct cell *c, struct gravity_tensors *m);
 int cell_getsize(struct cell *c);
 int cell_link_parts(struct cell *c, struct part *parts);
 int cell_link_gparts(struct cell *c, struct gpart *gparts);
@@ -443,7 +498,8 @@ void cell_check_part_drift_point(struct cell *c, void *data);
 void cell_check_gpart_drift_point(struct cell *c, void *data);
 void cell_check_multipole_drift_point(struct cell *c, void *data);
 void cell_reset_task_counters(struct cell *c);
-int cell_unskip_tasks(struct cell *c, struct scheduler *s);
+int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s);
+int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s);
 void cell_set_super(struct cell *c, struct cell *super);
 void cell_drift_part(struct cell *c, const struct engine *e, int force);
 void cell_drift_gpart(struct cell *c, const struct engine *e, int force);
@@ -451,8 +507,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e);
 void cell_drift_all_multipoles(struct cell *c, const struct engine *e);
 void cell_check_timesteps(struct cell *c);
 void cell_store_pre_drift_values(struct cell *c);
-void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
-                                 struct scheduler *s);
+void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj,
+                                       struct scheduler *s);
 void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
                                       struct scheduler *s);
 void cell_activate_subcell_external_grav_tasks(struct cell *ci,
@@ -492,12 +548,8 @@ __attribute__((always_inline)) INLINE static int cell_can_recurse_in_pair_task(
 __attribute__((always_inline)) INLINE static int cell_can_recurse_in_self_task(
     const struct cell *c) {
 
-  /* Is the cell split ? */
-  /* Note: No need for more checks here as all the sub-pairs and sub-self */
-  /* operations will be executed. So no need for the particle to be at exactly
-   */
-  /* the right place. */
-  return c->split;
+  /* Is the cell split and not smaller than the smoothing length? */
+  return c->split && (kernel_gamma * c->h_max_old < 0.5f * c->dmin);
 }
 
 /**
diff --git a/src/collectgroup.c b/src/collectgroup.c
index b7e5486b59a2ec5e47b7b864071a2bb1e5ce1850..b704a0a5ea33cc1c5332fc0575061ad8e38f4d21 100644
--- a/src/collectgroup.c
+++ b/src/collectgroup.c
@@ -37,7 +37,8 @@
 /* Local collections for MPI reduces. */
 struct mpicollectgroup1 {
   size_t updates, g_updates, s_updates;
-  integertime_t ti_end_min;
+  integertime_t ti_hydro_end_min;
+  integertime_t ti_gravity_end_min;
   int forcerebuild;
 };
 
@@ -75,9 +76,15 @@ void collectgroup_init() {
  * @param e The #engine
  */
 void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) {
-  e->ti_end_min = grp1->ti_end_min;
-  e->ti_end_max = grp1->ti_end_max;
-  e->ti_beg_max = grp1->ti_beg_max;
+  e->ti_hydro_end_min = grp1->ti_hydro_end_min;
+  e->ti_hydro_end_max = grp1->ti_hydro_end_max;
+  e->ti_hydro_beg_max = grp1->ti_hydro_beg_max;
+  e->ti_gravity_end_min = grp1->ti_gravity_end_min;
+  e->ti_gravity_end_max = grp1->ti_gravity_end_max;
+  e->ti_gravity_beg_max = grp1->ti_gravity_beg_max;
+  e->ti_end_min = min(e->ti_hydro_end_min, e->ti_gravity_end_min);
+  e->ti_end_max = max(e->ti_hydro_end_max, e->ti_gravity_end_max);
+  e->ti_beg_max = max(e->ti_hydro_beg_max, e->ti_gravity_beg_max);
   e->updates = grp1->updates;
   e->g_updates = grp1->g_updates;
   e->s_updates = grp1->s_updates;
@@ -92,21 +99,37 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) {
  * @param g_updates the number of updated gravity particles on this node this
  * step.
  * @param s_updates the number of updated star particles on this node this step.
- * @param ti_end_min the minimum end time for next time step after this step.
- * @param ti_end_max the maximum end time for next time step after this step.
- * @param ti_beg_max the maximum begin time for next time step after this step.
+ * @param ti_hydro_end_min the minimum end time for next hydro time step after
+ * this step.
+ * @param ti_hydro_end_max the maximum end time for next hydro time step after
+ * this step.
+ * @param ti_hydro_beg_max the maximum begin time for next hydro time step after
+ * this step.
+ * @param ti_gravity_end_min the minimum end time for next gravity time step
+ * after this step.
+ * @param ti_gravity_end_max the maximum end time for next gravity time step
+ * after this step.
+ * @param ti_gravity_beg_max the maximum begin time for next gravity time step
+ * after this step.
  * @param forcerebuild whether a rebuild is required after this step.
  */
 void collectgroup1_init(struct collectgroup1 *grp1, size_t updates,
                         size_t g_updates, size_t s_updates,
-                        integertime_t ti_end_min, integertime_t ti_end_max,
-                        integertime_t ti_beg_max, int forcerebuild) {
+                        integertime_t ti_hydro_end_min,
+                        integertime_t ti_hydro_end_max,
+                        integertime_t ti_hydro_beg_max,
+                        integertime_t ti_gravity_end_min,
+                        integertime_t ti_gravity_end_max,
+                        integertime_t ti_gravity_beg_max, int forcerebuild) {
   grp1->updates = updates;
   grp1->g_updates = g_updates;
   grp1->s_updates = s_updates;
-  grp1->ti_end_min = ti_end_min;
-  grp1->ti_end_max = ti_end_max;
-  grp1->ti_beg_max = ti_beg_max;
+  grp1->ti_hydro_end_min = ti_hydro_end_min;
+  grp1->ti_hydro_end_max = ti_hydro_end_max;
+  grp1->ti_hydro_beg_max = ti_hydro_beg_max;
+  grp1->ti_gravity_end_min = ti_gravity_end_min;
+  grp1->ti_gravity_end_max = ti_gravity_end_max;
+  grp1->ti_gravity_beg_max = ti_gravity_beg_max;
   grp1->forcerebuild = forcerebuild;
 }
 
@@ -127,7 +150,8 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) {
   mpigrp11.updates = grp1->updates;
   mpigrp11.g_updates = grp1->g_updates;
   mpigrp11.s_updates = grp1->s_updates;
-  mpigrp11.ti_end_min = grp1->ti_end_min;
+  mpigrp11.ti_hydro_end_min = grp1->ti_hydro_end_min;
+  mpigrp11.ti_gravity_end_min = grp1->ti_gravity_end_min;
   mpigrp11.forcerebuild = grp1->forcerebuild;
 
   struct mpicollectgroup1 mpigrp12;
@@ -139,7 +163,8 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) {
   grp1->updates = mpigrp12.updates;
   grp1->g_updates = mpigrp12.g_updates;
   grp1->s_updates = mpigrp12.s_updates;
-  grp1->ti_end_min = mpigrp12.ti_end_min;
+  grp1->ti_hydro_end_min = mpigrp12.ti_hydro_end_min;
+  grp1->ti_gravity_end_min = mpigrp12.ti_gravity_end_min;
   grp1->forcerebuild = mpigrp12.forcerebuild;
 
 #endif
@@ -162,7 +187,10 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11,
   mpigrp11->s_updates += mpigrp12->s_updates;
 
   /* Minimum end time. */
-  mpigrp11->ti_end_min = min(mpigrp11->ti_end_min, mpigrp12->ti_end_min);
+  mpigrp11->ti_hydro_end_min =
+      min(mpigrp11->ti_hydro_end_min, mpigrp12->ti_hydro_end_min);
+  mpigrp11->ti_gravity_end_min =
+      min(mpigrp11->ti_gravity_end_min, mpigrp12->ti_gravity_end_min);
 
   /* Everyone must agree to not rebuild. */
   if (mpigrp11->forcerebuild || mpigrp12->forcerebuild)
diff --git a/src/collectgroup.h b/src/collectgroup.h
index b0cbf0763d94dec65ea1aae2c521c1c4b9f7bf83..f2014ed254fdde7ea293224751061d824782b4a7 100644
--- a/src/collectgroup.h
+++ b/src/collectgroup.h
@@ -38,7 +38,8 @@ struct collectgroup1 {
   size_t updates, g_updates, s_updates;
 
   /* Times for the time-step */
-  integertime_t ti_end_min, ti_end_max, ti_beg_max;
+  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
+  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
 
   /* Force the engine to rebuild? */
   int forcerebuild;
@@ -48,8 +49,12 @@ void collectgroup_init();
 void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e);
 void collectgroup1_init(struct collectgroup1 *grp1, size_t updates,
                         size_t g_updates, size_t s_updates,
-                        integertime_t ti_end_min, integertime_t ti_end_max,
-                        integertime_t ti_beg_max, int forcerebuild);
+                        integertime_t ti_hydro_end_min,
+                        integertime_t ti_hydro_end_max,
+                        integertime_t ti_hydro_beg_max,
+                        integertime_t ti_gravity_end_min,
+                        integertime_t ti_gravity_end_max,
+                        integertime_t ti_gravity_beg_max, int forcerebuild);
 void collectgroup1_reduce(struct collectgroup1 *grp1);
 
 #endif /* SWIFT_COLLECTGROUP_H */
diff --git a/src/debug.c b/src/debug.c
index 402234a27ef50bbc38c06f61dea96e48ff02c59a..8675355ca3a97eaa29d79a7bc0e064bb577168d3 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -317,7 +317,7 @@ static void dumpCells_map(struct cell *c, void *data) {
 
     /* Active cells, otherwise all. */
     if (active)
-      active = cell_is_active(c, e);
+      active = cell_is_active_hydro(c, e);
     else
       active = 1;
 
@@ -344,9 +344,9 @@ static void dumpCells_map(struct cell *c, void *data) {
               "%6.1f %20lld %6d %6d %6d %6d %6d\n",
               c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1],
               c->width[2], e->step, c->count, c->gcount, c->scount, pactcount,
-              c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min),
-              (c->super == c), cell_is_active(c, e), c->nodeID,
-              c->nodeID == e->nodeID);
+              c->depth, ntasks, c->ti_hydro_end_min,
+              get_time_bin(c->ti_hydro_end_min), (c->super == c),
+              cell_is_active_hydro(c, e), c->nodeID, c->nodeID == e->nodeID);
     }
   }
 }
diff --git a/src/engine.c b/src/engine.c
index cd30b7ab1b738d983ac2dfe11aebf89dc5218a18..f21c7cfc8af721b9f8d895cffecb0c3605d34861 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -104,7 +104,8 @@ int engine_rank;
 struct end_of_step_data {
 
   int updates, g_updates, s_updates;
-  integertime_t ti_end_min, ti_end_max, ti_beg_max;
+  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
+  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
   struct engine *e;
 };
 
@@ -136,13 +137,18 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) {
  */
 void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in,
                        struct task *ghost_out) {
+
+  /* If we have reached the leaf OR have to few particles to play with*/
   if (!c->split || c->count < engine_max_parts_per_ghost) {
+
+    /* Add the ghost task and its dependencies */
     struct scheduler *s = &e->sched;
     c->ghost =
         scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL);
     scheduler_addunlock(s, ghost_in, c->ghost);
     scheduler_addunlock(s, c->ghost, ghost_out);
   } else {
+    /* Keep recursing */
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
         engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out);
@@ -151,47 +157,26 @@ void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in,
 
 /**
  * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
- * i.e. all the O(Npart) tasks.
+ * i.e. all the O(Npart) tasks -- timestep version
  *
  * Tasks are only created here. The dependencies will be added later on.
  *
- * Note that there is no need to recurse below the super-cell.
+ * Note that there is no need to recurse below the super-cell. Note also
+ * that we only add tasks if the relevant particles are present in the cell.
  *
  * @param e The #engine.
  * @param c The #cell.
  */
-void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
+void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) {
 
   struct scheduler *s = &e->sched;
-  const int periodic = e->s->periodic;
-  const int is_with_hydro = (e->policy & engine_policy_hydro);
-  const int is_self_gravity = (e->policy & engine_policy_self_gravity);
-  const int is_external_gravity = (e->policy & engine_policy_external_gravity);
-  const int is_with_cooling = (e->policy & engine_policy_cooling);
-  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
 
   /* Are we in a super-cell ? */
   if (c->super == c) {
 
-    /* Add the sort task. */
-    if (is_with_hydro) {
-      c->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0,
-                                   c, NULL);
-    }
-
     /* Local tasks only... */
     if (c->nodeID == e->nodeID) {
 
-      /* Add the drift tasks corresponding to the policy. */
-      if (is_with_hydro) {
-        c->drift_part = scheduler_addtask(s, task_type_drift_part,
-                                          task_subtype_none, 0, 0, c, NULL);
-      }
-      if (is_self_gravity || is_external_gravity) {
-        c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart,
-                                           task_subtype_none, 0, 0, c, NULL);
-      }
-
       /* Add the two half kicks */
       c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
                                    c, NULL);
@@ -205,53 +190,71 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
 
       scheduler_addunlock(s, c->kick2, c->timestep);
       scheduler_addunlock(s, c->timestep, c->kick1);
+    }
 
-      /* Add the self-gravity tasks */
-      if (is_self_gravity) {
+  } else { /* We are above the super-cell so need to go deeper */
 
-        /* Initialisation of the multipoles */
-        c->init_grav = scheduler_addtask(s, task_type_init_grav,
-                                         task_subtype_none, 0, 0, c, NULL);
+    /* Recurse. */
+    if (c->split)
+      for (int k = 0; k < 8; k++)
+        if (c->progeny[k] != NULL)
+          engine_make_hierarchical_tasks_common(e, c->progeny[k]);
+  }
+}
 
-        /* Gravity non-neighbouring pm calculations */
-        c->grav_long_range = scheduler_addtask(
-            s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL);
+/**
+ * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
+ * i.e. all the O(Npart) tasks -- hydro version
+ *
+ * Tasks are only created here. The dependencies will be added later on.
+ *
+ * Note that there is no need to recurse below the super-cell. Note also
+ * that we only add tasks if the relevant particles are present in the cell.
+ *
+ * @param e The #engine.
+ * @param c The #cell.
+ */
+void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) {
 
-        /* Gravity recursive down-pass */
-        c->grav_down = scheduler_addtask(s, task_type_grav_down,
-                                         task_subtype_none, 0, 0, c, NULL);
+  struct scheduler *s = &e->sched;
+  const int is_with_cooling = (e->policy & engine_policy_cooling);
+  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
 
-        if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost[0]);
-        scheduler_addunlock(s, c->init_grav, c->grav_long_range);
-        scheduler_addunlock(s, c->grav_long_range, c->grav_down);
-        scheduler_addunlock(s, c->grav_down, c->kick2);
-      }
+  /* Are we in a super-cell ? */
+  if (c->super_hydro == c) {
 
-      /* Add the hydrodynamics tasks */
-      if (is_with_hydro) {
+    /* Add the sort task. */
+    c->sorts =
+        scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL);
 
-        /* Generate the ghost tasks. */
-        c->ghost_in =
-            scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
-                              /* implicit = */ 1, c, NULL);
-        c->ghost_out =
-            scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
-                              /* implicit = */ 1, c, NULL);
-        engine_add_ghosts(e, c, c->ghost_in, c->ghost_out);
+    /* Local tasks only... */
+    if (c->nodeID == e->nodeID) {
+
+      /* Add the drift task. */
+      c->drift_part = scheduler_addtask(s, task_type_drift_part,
+                                        task_subtype_none, 0, 0, c, NULL);
+
+      /* Generate the ghost tasks. */
+      c->ghost_in =
+          scheduler_addtask(s, task_type_ghost_in, task_subtype_none, 0,
+                            /* implicit = */ 1, c, NULL);
+      c->ghost_out =
+          scheduler_addtask(s, task_type_ghost_out, task_subtype_none, 0,
+                            /* implicit = */ 1, c, NULL);
+      engine_add_ghosts(e, c, c->ghost_in, c->ghost_out);
 
 #ifdef EXTRA_HYDRO_LOOP
-        /* Generate the extra ghost task. */
-        c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
-                                           task_subtype_none, 0, 0, c, NULL);
+      /* Generate the extra ghost task. */
+      c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
+                                         task_subtype_none, 0, 0, c, NULL);
 #endif
-      }
 
       /* Cooling task */
       if (is_with_cooling) {
         c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
                                        0, 0, c, NULL);
 
-        scheduler_addunlock(s, c->cooling, c->kick2);
+        scheduler_addunlock(s, c->cooling, c->super->kick2);
       }
 
       /* add source terms */
@@ -263,25 +266,90 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
 
   } else { /* We are above the super-cell so need to go deeper */
 
-#ifdef SWIFT_DEBUG_CHECKS
-    if (c->super != NULL) error("Incorrectly set super pointer");
-#endif
+    /* Recurse. */
+    if (c->split)
+      for (int k = 0; k < 8; k++)
+        if (c->progeny[k] != NULL)
+          engine_make_hierarchical_tasks_hydro(e, c->progeny[k]);
+  }
+}
+
+/**
+ * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
+ * i.e. all the O(Npart) tasks -- gravity version
+ *
+ * Tasks are only created here. The dependencies will be added later on.
+ *
+ * Note that there is no need to recurse below the super-cell. Note also
+ * that we only add tasks if the relevant particles are present in the cell.
+ *
+ * @param e The #engine.
+ * @param c The #cell.
+ */
+void engine_make_hierarchical_tasks_gravity(struct engine *e, struct cell *c) {
+
+  struct scheduler *s = &e->sched;
+  const int periodic = e->s->periodic;
+  const int is_self_gravity = (e->policy & engine_policy_self_gravity);
+
+  /* Are we in a super-cell ? */
+  if (c->super_gravity == c) {
+
+    /* Local tasks only... */
+    if (c->nodeID == e->nodeID) {
+
+      c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart,
+                                         task_subtype_none, 0, 0, c, NULL);
+
+      if (is_self_gravity) {
+
+        /* Initialisation of the multipoles */
+        c->init_grav = scheduler_addtask(s, task_type_init_grav,
+                                         task_subtype_none, 0, 0, c, NULL);
+
+        /* Gravity non-neighbouring pm calculations */
+        c->grav_long_range = scheduler_addtask(
+            s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL);
+
+        /* Gravity recursive down-pass */
+        c->grav_down = scheduler_addtask(s, task_type_grav_down,
+                                         task_subtype_none, 0, 0, c, NULL);
+
+        if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost_in);
+        if (periodic) scheduler_addunlock(s, c->grav_ghost_out, c->grav_down);
+        scheduler_addunlock(s, c->init_grav, c->grav_long_range);
+        scheduler_addunlock(s, c->grav_long_range, c->grav_down);
+        scheduler_addunlock(s, c->grav_down, c->super->kick2);
+      }
+    }
+
+  } else { /* We are above the super-cell so need to go deeper */
 
     /* Recurse. */
     if (c->split)
       for (int k = 0; k < 8; k++)
         if (c->progeny[k] != NULL)
-          engine_make_hierarchical_tasks(e, c->progeny[k]);
+          engine_make_hierarchical_tasks_gravity(e, c->progeny[k]);
   }
 }
 
 void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements,
                                            void *extra_data) {
   struct engine *e = (struct engine *)extra_data;
+  const int is_with_hydro = (e->policy & engine_policy_hydro);
+  const int is_with_self_gravity = (e->policy & engine_policy_self_gravity);
+  const int is_with_external_gravity =
+      (e->policy & engine_policy_external_gravity);
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &((struct cell *)map_data)[ind];
-    engine_make_hierarchical_tasks(e, c);
+    /* Make the common tasks (time integration) */
+    engine_make_hierarchical_tasks_common(e, c);
+    /* Add the hydro stuff */
+    if (is_with_hydro) engine_make_hierarchical_tasks_hydro(e, c);
+    /* And the gravity stuff */
+    if (is_with_self_gravity || is_with_external_gravity)
+      engine_make_hierarchical_tasks_gravity(e, c);
   }
 }
 
@@ -1044,29 +1112,7 @@ void engine_repartition_trigger(struct engine *e) {
 }
 
 /**
- * @brief Add up/down gravity tasks to a cell hierarchy.
- *
- * @param e The #engine.
- * @param c The #cell
- * @param up The upward gravity #task.
- * @param down The downward gravity #task.
- */
-void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
-                          struct task *down) {
-
-  /* Link the tasks to this cell. */
-  // c->grav_up = up;
-  c->grav_down = down;
-
-  /* Recurse? */
-  if (c->split)
-    for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL)
-        engine_addtasks_grav(e, c->progeny[k], up, down);
-}
-
-/**
- * @brief Add send tasks to a hierarchy of cells.
+ * @brief Add send tasks for the hydro pairs to a hierarchy of cells.
  *
  * @param e The #engine.
  * @param ci The sending #cell.
@@ -1074,11 +1120,10 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
  * @param t_xv The send_xv #task, if it has already been created.
  * @param t_rho The send_rho #task, if it has already been created.
  * @param t_gradient The send_gradient #task, if already created.
- * @param t_ti The send_ti #task, if required and has already been created.
  */
-void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
-                          struct task *t_xv, struct task *t_rho,
-                          struct task *t_gradient, struct task *t_ti) {
+void engine_addtasks_send_hydro(struct engine *e, struct cell *ci,
+                                struct cell *cj, struct task *t_xv,
+                                struct task *t_rho, struct task *t_gradient) {
 
 #ifdef WITH_MPI
   struct link *l = NULL;
@@ -1097,49 +1142,45 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
     /* Create the tasks and their dependencies? */
     if (t_xv == NULL) {
 
-      t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag,
-                               0, ci, cj);
+      t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv,
+                               6 * ci->tag + 0, 0, ci, cj);
       t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho,
-                                4 * ci->tag + 1, 0, ci, cj);
-      t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
-                               4 * ci->tag + 2, 0, ci, cj);
+                                6 * ci->tag + 1, 0, ci, cj);
 #ifdef EXTRA_HYDRO_LOOP
       t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient,
-                                     4 * ci->tag + 3, 0, ci, cj);
+                                     6 * ci->tag + 3, 0, ci, cj);
 #endif
 
 #ifdef EXTRA_HYDRO_LOOP
 
       scheduler_addunlock(s, t_gradient, ci->super->kick2);
 
-      scheduler_addunlock(s, ci->super->extra_ghost, t_gradient);
+      scheduler_addunlock(s, ci->super_hydro->extra_ghost, t_gradient);
 
-      /* The send_rho task should unlock the super-cell's extra_ghost task. */
-      scheduler_addunlock(s, t_rho, ci->super->extra_ghost);
+      /* The send_rho task should unlock the super_hydro-cell's extra_ghost
+       * task. */
+      scheduler_addunlock(s, t_rho, ci->super_hydro->extra_ghost);
 
       /* The send_rho task depends on the cell's ghost task. */
-      scheduler_addunlock(s, ci->super->ghost_out, t_rho);
+      scheduler_addunlock(s, ci->super_hydro->ghost_out, t_rho);
 
-      /* The send_xv task should unlock the super-cell's ghost task. */
-      scheduler_addunlock(s, t_xv, ci->super->ghost_in);
+      /* The send_xv task should unlock the super_hydro-cell's ghost task. */
+      scheduler_addunlock(s, t_xv, ci->super_hydro->ghost_in);
 
 #else
-      /* The send_rho task should unlock the super-cell's kick task. */
+      /* The send_rho task should unlock the super_hydro-cell's kick task. */
       scheduler_addunlock(s, t_rho, ci->super->kick2);
 
       /* The send_rho task depends on the cell's ghost task. */
-      scheduler_addunlock(s, ci->super->ghost_out, t_rho);
+      scheduler_addunlock(s, ci->super_hydro->ghost_out, t_rho);
 
-      /* The send_xv task should unlock the super-cell's ghost task. */
-      scheduler_addunlock(s, t_xv, ci->super->ghost_in);
+      /* The send_xv task should unlock the super_hydro-cell's ghost task. */
+      scheduler_addunlock(s, t_xv, ci->super_hydro->ghost_in);
 
 #endif
 
       /* Drift before you send */
-      scheduler_addunlock(s, ci->super->drift_part, t_xv);
-
-      /* The super-cell's timestep task should unlock the send_ti task. */
-      scheduler_addunlock(s, ci->super->timestep, t_ti);
+      scheduler_addunlock(s, ci->super_hydro->drift_part, t_xv);
     }
 
     /* Add them to the local cell. */
@@ -1148,6 +1189,116 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 #ifdef EXTRA_HYDRO_LOOP
     engine_addlink(e, &ci->send_gradient, t_gradient);
 #endif
+  }
+
+  /* Recurse? */
+  if (ci->split)
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL)
+        engine_addtasks_send_hydro(e, ci->progeny[k], cj, t_xv, t_rho,
+                                   t_gradient);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Add send tasks for the gravity pairs to a hierarchy of cells.
+ *
+ * @param e The #engine.
+ * @param ci The sending #cell.
+ * @param cj Dummy cell containing the nodeID of the receiving node.
+ * @param t_grav The send_grav #task, if it has already been created.
+ */
+void engine_addtasks_send_gravity(struct engine *e, struct cell *ci,
+                                  struct cell *cj, struct task *t_grav) {
+
+#ifdef WITH_MPI
+  struct link *l = NULL;
+  struct scheduler *s = &e->sched;
+  const int nodeID = cj->nodeID;
+
+  /* Check if any of the gravity tasks are for the target node. */
+  for (l = ci->grav; l != NULL; l = l->next)
+    if (l->t->ci->nodeID == nodeID ||
+        (l->t->cj != NULL && l->t->cj->nodeID == nodeID))
+      break;
+
+  /* If so, attach send tasks. */
+  if (l != NULL) {
+
+    /* Create the tasks and their dependencies? */
+    if (t_grav == NULL) {
+
+      t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart,
+                                 6 * ci->tag + 4, 0, ci, cj);
+
+      /* The sends should unlock the down pass. */
+      scheduler_addunlock(s, t_grav, ci->super_gravity->grav_down);
+
+      /* Drift before you send */
+      scheduler_addunlock(s, ci->super_gravity->drift_gpart, t_grav);
+    }
+
+    /* Add them to the local cell. */
+    engine_addlink(e, &ci->send_grav, t_grav);
+  }
+
+  /* Recurse? */
+  if (ci->split)
+    for (int k = 0; k < 8; k++)
+      if (ci->progeny[k] != NULL)
+        engine_addtasks_send_gravity(e, ci->progeny[k], cj, t_grav);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Add send tasks for the time-step to a hierarchy of cells.
+ *
+ * @param e The #engine.
+ * @param ci The sending #cell.
+ * @param cj Dummy cell containing the nodeID of the receiving node.
+ * @param t_ti The send_ti #task, if it has already been created.
+ */
+void engine_addtasks_send_timestep(struct engine *e, struct cell *ci,
+                                   struct cell *cj, struct task *t_ti) {
+
+#ifdef WITH_MPI
+  struct link *l = NULL;
+  struct scheduler *s = &e->sched;
+  const int nodeID = cj->nodeID;
+
+  /* Check if any of the gravity tasks are for the target node. */
+  for (l = ci->grav; l != NULL; l = l->next)
+    if (l->t->ci->nodeID == nodeID ||
+        (l->t->cj != NULL && l->t->cj->nodeID == nodeID))
+      break;
+
+  /* Check whether instead any of the hydro tasks are for the target node. */
+  if (l == NULL)
+    for (l = ci->density; l != NULL; l = l->next)
+      if (l->t->ci->nodeID == nodeID ||
+          (l->t->cj != NULL && l->t->cj->nodeID == nodeID))
+        break;
+
+  /* If found anything, attach send tasks. */
+  if (l != NULL) {
+
+    /* Create the tasks and their dependencies? */
+    if (t_ti == NULL) {
+
+      t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
+                               6 * ci->tag + 2, 0, ci, cj);
+
+      /* The super-cell's timestep task should unlock the send_ti task. */
+      scheduler_addunlock(s, ci->super->timestep, t_ti);
+    }
+
+    /* Add them to the local cell. */
     engine_addlink(e, &ci->send_ti, t_ti);
   }
 
@@ -1155,8 +1306,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
   if (ci->split)
     for (int k = 0; k < 8; k++)
       if (ci->progeny[k] != NULL)
-        engine_addtasks_send(e, ci->progeny[k], cj, t_xv, t_rho, t_gradient,
-                             t_ti);
+        engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -1164,76 +1314,139 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
 }
 
 /**
- * @brief Add recv tasks to a hierarchy of cells.
+ * @brief Add recv tasks for hydro pairs to a hierarchy of cells.
  *
  * @param e The #engine.
  * @param c The foreign #cell.
  * @param t_xv The recv_xv #task, if it has already been created.
  * @param t_rho The recv_rho #task, if it has already been created.
  * @param t_gradient The recv_gradient #task, if it has already been created.
- * @param t_ti The recv_ti #task, if required and has already been created.
  */
-void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
-                          struct task *t_rho, struct task *t_gradient,
-                          struct task *t_ti) {
+void engine_addtasks_recv_hydro(struct engine *e, struct cell *c,
+                                struct task *t_xv, struct task *t_rho,
+                                struct task *t_gradient) {
 
 #ifdef WITH_MPI
   struct scheduler *s = &e->sched;
 
-  /* Do we need to construct a recv task?
-     Note that since c is a foreign cell, all its density tasks will involve
-     only the current rank, and thus we don't have to check them.*/
+  /* Have we reached a level where there are any hydro tasks ? */
   if (t_xv == NULL && c->density != NULL) {
 
     /* Create the tasks. */
-    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0,
-                             c, NULL);
+    t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 6 * c->tag + 0,
+                             0, c, NULL);
     t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho,
-                              4 * c->tag + 1, 0, c, NULL);
-    t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
-                             4 * c->tag + 2, 0, c, NULL);
+                              6 * c->tag + 1, 0, c, NULL);
 #ifdef EXTRA_HYDRO_LOOP
     t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient,
-                                   4 * c->tag + 3, 0, c, NULL);
+                                   6 * c->tag + 3, 0, c, NULL);
 #endif
   }
+
   c->recv_xv = t_xv;
   c->recv_rho = t_rho;
   c->recv_gradient = t_gradient;
-  c->recv_ti = t_ti;
 
-/* Add dependencies. */
-#ifdef EXTRA_HYDRO_LOOP
+  /* Add dependencies. */
+  if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
+
   for (struct link *l = c->density; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_xv, l->t);
     scheduler_addunlock(s, l->t, t_rho);
   }
+#ifdef EXTRA_HYDRO_LOOP
   for (struct link *l = c->gradient; l != NULL; l = l->next) {
     scheduler_addunlock(s, t_rho, l->t);
     scheduler_addunlock(s, l->t, t_gradient);
   }
-  for (struct link *l = c->force; l != NULL; l = l->next) {
+  for (struct link *l = c->force; l != NULL; l = l->next)
     scheduler_addunlock(s, t_gradient, l->t);
-    scheduler_addunlock(s, l->t, t_ti);
+#else
+  for (struct link *l = c->force; l != NULL; l = l->next)
+    scheduler_addunlock(s, t_rho, l->t);
+#endif
+
+  /* Recurse? */
+  if (c->split)
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        engine_addtasks_recv_hydro(e, c->progeny[k], t_xv, t_rho, t_gradient);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Add recv tasks for gravity pairs to a hierarchy of cells.
+ *
+ * @param e The #engine.
+ * @param c The foreign #cell.
+ * @param t_grav The recv_gpart #task, if it has already been created.
+ */
+void engine_addtasks_recv_gravity(struct engine *e, struct cell *c,
+                                  struct task *t_grav) {
+
+#ifdef WITH_MPI
+  struct scheduler *s = &e->sched;
+
+  /* Have we reached a level where there are any gravity tasks ? */
+  if (t_grav == NULL && c->grav != NULL) {
+
+    /* Create the tasks. */
+    t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart,
+                               6 * c->tag + 4, 0, c, NULL);
   }
-  if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
+
+  c->recv_grav = t_grav;
+
+  for (struct link *l = c->grav; l != NULL; l = l->next)
+    scheduler_addunlock(s, t_grav, l->t);
+
+  /* Recurse? */
+  if (c->split)
+    for (int k = 0; k < 8; k++)
+      if (c->progeny[k] != NULL)
+        engine_addtasks_recv_gravity(e, c->progeny[k], t_grav);
+
 #else
-  for (struct link *l = c->density; l != NULL; l = l->next) {
-    scheduler_addunlock(s, t_xv, l->t);
-    scheduler_addunlock(s, l->t, t_rho);
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+/**
+ * @brief Add recv tasks for gravity pairs to a hierarchy of cells.
+ *
+ * @param e The #engine.
+ * @param c The foreign #cell.
+ * @param t_ti The recv_ti #task, if already been created.
+ */
+void engine_addtasks_recv_timestep(struct engine *e, struct cell *c,
+                                   struct task *t_ti) {
+
+#ifdef WITH_MPI
+  struct scheduler *s = &e->sched;
+
+  /* Have we reached a level where there are any self/pair tasks ? */
+  if (t_ti == NULL && (c->grav != NULL || c->density != NULL)) {
+
+    t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
+                             6 * c->tag + 2, 0, c, NULL);
   }
-  for (struct link *l = c->force; l != NULL; l = l->next) {
-    scheduler_addunlock(s, t_rho, l->t);
+
+  c->recv_ti = t_ti;
+
+  for (struct link *l = c->grav; l != NULL; l = l->next)
+    scheduler_addunlock(s, l->t, t_ti);
+
+  for (struct link *l = c->force; l != NULL; l = l->next)
     scheduler_addunlock(s, l->t, t_ti);
-  }
-  if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts);
-#endif
 
   /* Recurse? */
   if (c->split)
     for (int k = 0; k < 8; k++)
       if (c->progeny[k] != NULL)
-        engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho, t_gradient, t_ti);
+        engine_addtasks_recv_timestep(e, c->progeny[k], t_ti);
 
 #else
   error("SWIFT was not compiled with MPI support.");
@@ -1257,7 +1470,7 @@ void engine_exchange_cells(struct engine *e) {
   MPI_Request reqs_in[engine_maxproxies];
   MPI_Request reqs_out[engine_maxproxies];
   MPI_Status status;
-  ticks tic = getticks();
+  const ticks tic = getticks();
 
   /* Run through the cells and get the size of the ones that will be sent off.
    */
@@ -1330,8 +1543,10 @@ void engine_exchange_cells(struct engine *e) {
   size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0;
   for (int k = 0; k < nr_proxies; k++)
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
-      count_parts_in += e->proxies[k].cells_in[j]->count;
-      count_gparts_in += e->proxies[k].cells_in[j]->gcount;
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro)
+        count_parts_in += e->proxies[k].cells_in[j]->count;
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity)
+        count_gparts_in += e->proxies[k].cells_in[j]->gcount;
       count_sparts_in += e->proxies[k].cells_in[j]->scount;
     }
   if (count_parts_in > s->size_parts_foreign) {
@@ -1362,11 +1577,18 @@ void engine_exchange_cells(struct engine *e) {
   struct spart *sparts = s->sparts_foreign;
   for (int k = 0; k < nr_proxies; k++) {
     for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
-      cell_link_parts(e->proxies[k].cells_in[j], parts);
-      cell_link_gparts(e->proxies[k].cells_in[j], gparts);
+
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) {
+        cell_link_parts(e->proxies[k].cells_in[j], parts);
+        parts = &parts[e->proxies[k].cells_in[j]->count];
+      }
+
+      if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) {
+        cell_link_gparts(e->proxies[k].cells_in[j], gparts);
+        gparts = &gparts[e->proxies[k].cells_in[j]->gcount];
+      }
+
       cell_link_sparts(e->proxies[k].cells_in[j], sparts);
-      parts = &parts[e->proxies[k].cells_in[j]->count];
-      gparts = &gparts[e->proxies[k].cells_in[j]->gcount];
       sparts = &sparts[e->proxies[k].cells_in[j]->scount];
     }
   }
@@ -1720,12 +1942,235 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
 #endif
 }
 
+/**
+ * @brief Exchanges the top-level multipoles between all the nodes
+ * such that every node has a multipole for each top-level cell.
+ *
+ * @param e The #engine.
+ */
+void engine_exchange_top_multipoles(struct engine *e) {
+
+#ifdef WITH_MPI
+
+#ifdef SWIFT_DEBUG_CHECKS
+  for (int i = 0; i < e->s->nr_cells; ++i) {
+    const struct gravity_tensors *m = &e->s->multipoles_top[i];
+    if (e->s->cells_top[i].nodeID == engine_rank) {
+      if (m->m_pole.M_000 > 0.) {
+        if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
+          error("Invalid multipole position in X");
+        if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
+          error("Invalid multipole position in Y");
+        if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
+          error("Invalid multipole position in Z");
+      }
+    } else {
+      if (m->m_pole.M_000 != 0.) error("Non-zero mass for foreign m-pole");
+      if (m->CoM[0] != 0.) error("Non-zero position in X for foreign m-pole");
+      if (m->CoM[1] != 0.) error("Non-zero position in Y for foreign m-pole");
+      if (m->CoM[2] != 0.) error("Non-zero position in Z for foreign m-pole");
+      if (m->m_pole.num_gpart != 0)
+        error("Non-zero gpart count in foreign m-pole");
+    }
+  }
+#endif
+
+  /* Each node (space) has constructed its own top-level multipoles.
+   * We now need to make sure every other node has a copy of everything.
+   *
+   * WARNING: Adult stuff ahead: don't do this at home!
+   *
+   * Since all nodes have their top-level multi-poles computed
+   * and all foreign ones set to 0 (all bytes), we can gather all the m-poles
+   * by doing a bit-wise OR reduction across all the nodes directly in
+   * place inside the multi-poles_top array.
+   * This only works if the foreign m-poles on every nodes are zeroed and no
+   * multi-pole is present on more than one node (two things guaranteed by the
+   * domain decomposition).
+   */
+  MPI_Allreduce(MPI_IN_PLACE, e->s->multipoles_top,
+                e->s->nr_cells * sizeof(struct gravity_tensors), MPI_BYTE,
+                MPI_BOR, MPI_COMM_WORLD);
+
+#ifdef SWIFT_DEBUG_CHECKS
+  long long counter = 0;
+
+  /* Let's check that what we received makes sense */
+  for (int i = 0; i < e->s->nr_cells; ++i) {
+    const struct gravity_tensors *m = &e->s->multipoles_top[i];
+    counter += m->m_pole.num_gpart;
+    if (m->m_pole.M_000 > 0.) {
+      if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
+        error("Invalid multipole position in X");
+      if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
+        error("Invalid multipole position in Y");
+      if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
+        error("Invalid multipole position in Z");
+    }
+  }
+  if (counter != e->total_nr_gparts)
+    error("Total particles in multipoles inconsistent with engine");
+#endif
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
+void engine_exchange_proxy_multipoles(struct engine *e) {
+
+#ifdef WITH_MPI
+
+  const ticks tic = getticks();
+
+  /* Start by counting the number of cells to send and receive */
+  int count_send = 0;
+  int count_recv = 0;
+  int count_send_requests = 0;
+  int count_recv_requests = 0;
+
+  /* Loop over the proxies. */
+  for (int pid = 0; pid < e->nr_proxies; pid++) {
+
+    /* Get a handle on the proxy. */
+    const struct proxy *p = &e->proxies[pid];
+
+    /* Now collect the number of requests associated */
+    count_recv_requests += p->nr_cells_in;
+    count_send_requests += p->nr_cells_out;
+
+    /* And the actual number of things we are going to ship */
+    for (int k = 0; k < p->nr_cells_in; k++)
+      count_recv += p->cells_in[k]->pcell_size;
+
+    for (int k = 0; k < p->nr_cells_out; k++)
+      count_send += p->cells_out[k]->pcell_size;
+  }
+
+  /* Allocate the buffers for the packed data */
+  struct gravity_tensors *buffer_send =
+      malloc(sizeof(struct gravity_tensors) * count_send);
+  struct gravity_tensors *buffer_recv =
+      malloc(sizeof(struct gravity_tensors) * count_recv);
+  if (buffer_send == NULL || buffer_recv == NULL)
+    error("Unable to allocate memory for multipole transactions");
+
+  /* Also allocate the MPI requests */
+  const int count_requests = count_send_requests + count_recv_requests;
+  MPI_Request *requests = malloc(sizeof(MPI_Request) * count_requests);
+  if (requests == NULL) error("Unable to allocate memory for MPI requests");
+
+  int this_request = 0;
+  int this_recv = 0;
+  int this_send = 0;
+
+  /* Loop over the proxies to issue the receives. */
+  for (int pid = 0; pid < e->nr_proxies; pid++) {
+
+    /* Get a handle on the proxy. */
+    const struct proxy *p = &e->proxies[pid];
+
+    for (int k = 0; k < p->nr_cells_in; k++) {
+
+      const int num_elements = p->cells_in[k]->pcell_size;
+
+      /* Receive everything */
+      MPI_Irecv(&buffer_recv[this_recv],
+                num_elements * sizeof(struct gravity_tensors), MPI_BYTE,
+                p->cells_in[k]->nodeID, p->cells_in[k]->tag, MPI_COMM_WORLD,
+                &requests[this_request]);
+
+      /* Move to the next slot in the buffers */
+      this_recv += num_elements;
+      this_request++;
+    }
+
+    /* Loop over the proxies to issue the sends. */
+    for (int k = 0; k < p->nr_cells_out; k++) {
+
+      /* Number of multipoles in this cell hierarchy */
+      const int num_elements = p->cells_out[k]->pcell_size;
+
+      /* Let's pack everything recursively */
+      cell_pack_multipoles(p->cells_out[k], &buffer_send[this_send]);
+
+      /* Send everything (note the use of cells_in[0] to get the correct node
+       * ID. */
+      MPI_Isend(&buffer_send[this_send],
+                num_elements * sizeof(struct gravity_tensors), MPI_BYTE,
+                p->cells_in[0]->nodeID, p->cells_out[k]->tag, MPI_COMM_WORLD,
+                &requests[this_request]);
+
+      /* Move to the next slot in the buffers */
+      this_send += num_elements;
+      this_request++;
+    }
+  }
+
+  /* Wait for all the requests to arrive home */
+  MPI_Status *stats = malloc(count_requests * sizeof(MPI_Status));
+  int res;
+  if ((res = MPI_Waitall(count_requests, requests, stats)) != MPI_SUCCESS) {
+    for (int k = 0; k < count_requests; ++k) {
+      char buff[MPI_MAX_ERROR_STRING];
+      MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
+      message("request from source %i, tag %i has error '%s'.",
+              stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
+    }
+    error("Failed during waitall for multipole data.");
+  }
+
+  /* Let's now unpack the multipoles at the right place */
+  this_recv = 0;
+  for (int pid = 0; pid < e->nr_proxies; pid++) {
+
+    /* Get a handle on the proxy. */
+    const struct proxy *p = &e->proxies[pid];
+
+    for (int k = 0; k < p->nr_cells_in; k++) {
+
+      const int num_elements = p->cells_in[k]->pcell_size;
+
+#ifdef SWIFT_DEBUG_CHECKS
+
+      /* Check that the first element (top-level cell's multipole) matches what
+       * we received */
+      if (p->cells_in[k]->multipole->m_pole.num_gpart !=
+          buffer_recv[this_recv].m_pole.num_gpart)
+        error("Current: M_000=%e num_gpart=%lld\n New: M_000=%e num_gpart=%lld",
+              p->cells_in[k]->multipole->m_pole.M_000,
+              p->cells_in[k]->multipole->m_pole.num_gpart,
+              buffer_recv[this_recv].m_pole.M_000,
+              buffer_recv[this_recv].m_pole.num_gpart);
+#endif
+
+      /* Unpack recursively */
+      cell_unpack_multipoles(p->cells_in[k], &buffer_recv[this_recv]);
+
+      /* Move to the next slot in the buffers */
+      this_recv += num_elements;
+    }
+  }
+
+  /* Free everything */
+  free(stats);
+  free(buffer_send);
+  free(buffer_recv);
+  free(requests);
+
+  /* How much time did this take? */
+  if (e->verbose)
+    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
+            clocks_getunit());
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
 /**
  * @brief Constructs the top-level tasks for the short-range gravity
  * and long-range gravity interactions.
  *
- * - One FTT task per MPI rank.
- * - Multiple gravity ghosts for dependencies.
  * - All top-cells get a self task.
  * - All pairs within range according to the multipole acceptance
  *   criterion get a pair task.
@@ -1773,8 +2218,8 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
     const int ghost_id = cell_getid(cdim_ghost, i / 4, j / 4, k / 4);
     if (ghost_id > n_ghosts) error("Invalid ghost_id");
     if (periodic) {
-      ci->grav_ghost[0] = ghosts[2 * ghost_id + 0];
-      ci->grav_ghost[1] = ghosts[2 * ghost_id + 1];
+      ci->grav_ghost_in = ghosts[2 * ghost_id + 0];
+      ci->grav_ghost_out = ghosts[2 * ghost_id + 1];
     }
 
     /* Recover the multipole information */
@@ -1790,15 +2235,12 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           const int cjd = cell_getid(cdim, ii, jj, kk);
           struct cell *cj = &cells[cjd];
 
-          /* Avoid duplicates */
-          if (cid <= cjd) continue;
+          /* Avoid duplicates of local pairs*/
+          if (cid <= cjd && cj->nodeID == nodeID) continue;
 
           /* Skip cells without gravity particles */
           if (cj->gcount == 0) continue;
 
-          /* Is that neighbour local ? */
-          if (cj->nodeID != nodeID) continue;  // MATTHIEU
-
           /* Recover the multipole information */
           const struct gravity_tensors *const multi_j = cj->multipole;
 
@@ -1831,11 +2273,10 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
 
 /**
  * @brief Constructs the top-level tasks for the short-range gravity
- * interactions.
+ * interactions (master function).
  *
- * - All top-cells get a self task.
- * - All pairs within range according to the multipole acceptance
- *   criterion get a pair task.
+ * - Create the FFT task and the array of gravity ghosts.
+ * - Call the mapper function to create the other tasks.
  *
  * @param e The #engine.
  */
@@ -1863,9 +2304,9 @@ void engine_make_self_gravity_tasks(struct engine *e) {
     /* Make the ghosts implicit and add the dependencies */
     for (int n = 0; n < n_ghosts / 2; ++n) {
       ghosts[2 * n + 0] = scheduler_addtask(
-          sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL);
+          sched, task_type_grav_ghost_in, task_subtype_none, 0, 1, NULL, NULL);
       ghosts[2 * n + 1] = scheduler_addtask(
-          sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL);
+          sched, task_type_grav_ghost_out, task_subtype_none, 0, 1, NULL, NULL);
       scheduler_addunlock(sched, ghosts[2 * n + 0], s->grav_top_level);
       scheduler_addunlock(sched, s->grav_top_level, ghosts[2 * n + 1]);
     }
@@ -1876,6 +2317,19 @@ void engine_make_self_gravity_tasks(struct engine *e) {
   threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL,
                  s->nr_cells, 1, 0, extra_data);
 
+#ifdef SWIFT_DEBUG_CHECKS
+  if (periodic)
+    for (int i = 0; i < s->nr_cells; ++i) {
+      const struct cell *c = &s->cells_top[i];
+      if (c->nodeID == engine_rank &&
+          (c->grav_ghost_in == NULL || c->grav_ghost_out == NULL))
+        error("Invalid gravity_ghost for local cell");
+      if (c->nodeID != engine_rank &&
+          (c->grav_ghost_in != NULL || c->grav_ghost_out != NULL))
+        error("Invalid gravity_ghost for foreign cell");
+    }
+#endif
+
   /* Clean up. */
   if (periodic) free(ghosts);
 }
@@ -2085,9 +2539,9 @@ static inline void engine_make_self_gravity_dependencies(
     struct scheduler *sched, struct task *gravity, struct cell *c) {
 
   /* init --> gravity --> grav_down --> kick */
-  scheduler_addunlock(sched, c->super->drift_gpart, gravity);
-  scheduler_addunlock(sched, c->super->init_grav, gravity);
-  scheduler_addunlock(sched, gravity, c->super->grav_down);
+  scheduler_addunlock(sched, c->super_gravity->drift_gpart, gravity);
+  scheduler_addunlock(sched, c->super_gravity->init_grav, gravity);
+  scheduler_addunlock(sched, gravity, c->super_gravity->grav_down);
 }
 
 /**
@@ -2102,7 +2556,7 @@ static inline void engine_make_external_gravity_dependencies(
     struct scheduler *sched, struct task *gravity, struct cell *c) {
 
   /* init --> external gravity --> kick */
-  scheduler_addunlock(sched, c->super->drift_gpart, gravity);
+  scheduler_addunlock(sched, c->super_gravity->drift_gpart, gravity);
   scheduler_addunlock(sched, gravity, c->super->kick2);
 }
 
@@ -2116,7 +2570,6 @@ void engine_link_gravity_tasks(struct engine *e) {
   struct scheduler *sched = &e->sched;
   const int nodeID = e->nodeID;
   const int nr_tasks = sched->nr_tasks;
-  const int periodic = e->s->periodic;
 
   for (int k = 0; k < nr_tasks; k++) {
 
@@ -2127,7 +2580,6 @@ void engine_link_gravity_tasks(struct engine *e) {
     if (t->type == task_type_self && t->subtype == task_subtype_grav) {
 
       engine_make_self_gravity_dependencies(sched, t, t->ci);
-      if (periodic) scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t);
     }
 
     /* Self-interaction for external gravity ? */
@@ -2143,15 +2595,12 @@ void engine_link_gravity_tasks(struct engine *e) {
       if (t->ci->nodeID == nodeID) {
 
         engine_make_self_gravity_dependencies(sched, t, t->ci);
-        if (periodic && t->ci->super < t->cj->super)
-          scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t);
       }
 
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID &&
+          t->ci->super_gravity != t->cj->super_gravity) {
 
         engine_make_self_gravity_dependencies(sched, t, t->cj);
-        if (periodic && t->ci->super < t->cj->super)
-          scheduler_addunlock(sched, t->cj->super->grav_ghost[1], t);
       }
 
     }
@@ -2180,7 +2629,8 @@ void engine_link_gravity_tasks(struct engine *e) {
 
         engine_make_self_gravity_dependencies(sched, t, t->ci);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID &&
+          t->ci->super_gravity != t->cj->super_gravity) {
 
         engine_make_self_gravity_dependencies(sched, t, t->cj);
       }
@@ -2206,18 +2656,11 @@ static inline void engine_make_hydro_loops_dependencies(
 
   /* density loop --> ghost --> gradient loop --> extra_ghost */
   /* extra_ghost --> force loop  */
-  scheduler_addunlock(sched, density, c->super->ghost_in);
-  scheduler_addunlock(sched, c->super->ghost_out, gradient);
-  scheduler_addunlock(sched, gradient, c->super->extra_ghost);
-  scheduler_addunlock(sched, c->super->extra_ghost, force);
-
-  if (with_cooling) {
-    /* force loop --> cooling (--> kick2)  */
-    scheduler_addunlock(sched, force, c->super->cooling);
-  } else {
-    /* force loop --> kick2 */
-    scheduler_addunlock(sched, force, c->super->kick2);
-  }
+  scheduler_addunlock(sched, c->super_hydro->sorts, density);
+  scheduler_addunlock(sched, density, c->super_hydro->ghost_in);
+  scheduler_addunlock(sched, c->super_hydro->ghost_out, gradient);
+  scheduler_addunlock(sched, gradient, c->super_hydro->extra_ghost);
+  scheduler_addunlock(sched, c->super_hydro->extra_ghost, force);
 }
 
 #else
@@ -2237,16 +2680,8 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
                                                         struct cell *c,
                                                         int with_cooling) {
   /* density loop --> ghost --> force loop */
-  scheduler_addunlock(sched, density, c->super->ghost_in);
-  scheduler_addunlock(sched, c->super->ghost_out, force);
-
-  if (with_cooling) {
-    /* force loop --> cooling (--> kick2)  */
-    scheduler_addunlock(sched, force, c->super->cooling);
-  } else {
-    /* force loop --> kick2 */
-    scheduler_addunlock(sched, force, c->super->kick2);
-  }
+  scheduler_addunlock(sched, density, c->super_hydro->ghost_in);
+  scheduler_addunlock(sched, c->super_hydro->ghost_out, force);
 }
 
 #endif
@@ -2273,14 +2708,14 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 
     /* Sort tasks depend on the drift of the cell. */
     if (t->type == task_type_sort && t->ci->nodeID == engine_rank) {
-      scheduler_addunlock(sched, t->ci->super->drift_part, t);
+      scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t);
     }
 
     /* Self-interaction? */
     else if (t->type == task_type_self && t->subtype == task_subtype_density) {
 
       /* Make the self-density tasks depend on the drift only. */
-      scheduler_addunlock(sched, t->ci->super->drift_part, t);
+      scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t);
 
 #ifdef EXTRA_HYDRO_LOOP
       /* Start by constructing the task for the second  and third hydro loop. */
@@ -2308,6 +2743,7 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 
       /* Now, build all the dependencies for the hydro */
       engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
+      scheduler_addunlock(sched, t2, t->ci->super->kick2);
 #endif
     }
 
@@ -2316,12 +2752,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 
       /* Make all density tasks depend on the drift and the sorts. */
       if (t->ci->nodeID == engine_rank)
-        scheduler_addunlock(sched, t->ci->super->drift_part, t);
-      scheduler_addunlock(sched, t->ci->super->sorts, t);
-      if (t->ci->super != t->cj->super) {
+        scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t);
+      scheduler_addunlock(sched, t->ci->super_hydro->sorts, t);
+      if (t->ci->super_hydro != t->cj->super_hydro) {
         if (t->cj->nodeID == engine_rank)
-          scheduler_addunlock(sched, t->cj->super->drift_part, t);
-        scheduler_addunlock(sched, t->cj->super->sorts, t);
+          scheduler_addunlock(sched, t->cj->super_hydro->drift_part, t);
+        scheduler_addunlock(sched, t->cj->super_hydro->sorts, t);
       }
 
 #ifdef EXTRA_HYDRO_LOOP
@@ -2338,12 +2774,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
       engine_addlink(e, &t->cj->force, t3);
 
       /* Now, build all the dependencies for the hydro for the cells */
-      /* that are local and are not descendant of the same super-cells */
+      /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
                                              with_cooling);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->super_hydro != t->cj->super_hydro) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj,
                                              with_cooling);
       }
@@ -2359,12 +2795,17 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
       engine_addlink(e, &t->cj->force, t2);
 
       /* Now, build all the dependencies for the hydro for the cells */
-      /* that are local and are not descendant of the same super-cells */
+      /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
+        scheduler_addunlock(sched, t2, t->ci->super->kick2);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling);
+      if (t->cj->nodeID == nodeID) {
+        if (t->ci->super_hydro != t->cj->super_hydro)
+          engine_make_hydro_loops_dependencies(sched, t, t2, t->cj,
+                                               with_cooling);
+        if (t->ci->super != t->cj->super)
+          scheduler_addunlock(sched, t2, t->cj->super->kick2);
       }
 
 #endif
@@ -2376,8 +2817,8 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
              t->subtype == task_subtype_density) {
 
       /* Make all density tasks depend on the drift and sorts. */
-      scheduler_addunlock(sched, t->ci->super->drift_part, t);
-      scheduler_addunlock(sched, t->ci->super->sorts, t);
+      scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t);
+      scheduler_addunlock(sched, t->ci->super_hydro->sorts, t);
 
 #ifdef EXTRA_HYDRO_LOOP
 
@@ -2394,7 +2835,7 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
       engine_addlink(e, &t->ci->force, t3);
 
       /* Now, build all the dependencies for the hydro for the cells */
-      /* that are local and are not descendant of the same super-cells */
+      /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
                                              with_cooling);
@@ -2410,10 +2851,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
       engine_addlink(e, &t->ci->force, t2);
 
       /* Now, build all the dependencies for the hydro for the cells */
-      /* that are local and are not descendant of the same super-cells */
+      /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
-      }
+        scheduler_addunlock(sched, t2, t->ci->super->kick2);
+      } else
+        error("oo");
 #endif
     }
 
@@ -2423,12 +2866,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
 
       /* Make all density tasks depend on the drift. */
       if (t->ci->nodeID == engine_rank)
-        scheduler_addunlock(sched, t->ci->super->drift_part, t);
-      scheduler_addunlock(sched, t->ci->super->sorts, t);
-      if (t->ci->super != t->cj->super) {
+        scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t);
+      scheduler_addunlock(sched, t->ci->super_hydro->sorts, t);
+      if (t->ci->super_hydro != t->cj->super_hydro) {
         if (t->cj->nodeID == engine_rank)
-          scheduler_addunlock(sched, t->cj->super->drift_part, t);
-        scheduler_addunlock(sched, t->cj->super->sorts, t);
+          scheduler_addunlock(sched, t->cj->super_hydro->drift_part, t);
+        scheduler_addunlock(sched, t->cj->super_hydro->sorts, t);
       }
 
 #ifdef EXTRA_HYDRO_LOOP
@@ -2448,12 +2891,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
       engine_addlink(e, &t->cj->force, t3);
 
       /* Now, build all the dependencies for the hydro for the cells */
-      /* that are local and are not descendant of the same super-cells */
+      /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci,
                                              with_cooling);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+      if (t->cj->nodeID == nodeID && t->ci->super_hydro != t->cj->super_hydro) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj,
                                              with_cooling);
       }
@@ -2469,52 +2912,23 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements,
       engine_addlink(e, &t->cj->force, t2);
 
       /* Now, build all the dependencies for the hydro for the cells */
-      /* that are local and are not descendant of the same super-cells */
+      /* that are local and are not descendant of the same super_hydro-cells */
       if (t->ci->nodeID == nodeID) {
         engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling);
+        scheduler_addunlock(sched, t2, t->ci->super->kick2);
       }
-      if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
-        engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling);
+      if (t->cj->nodeID == nodeID) {
+        if (t->ci->super_hydro != t->cj->super_hydro)
+          engine_make_hydro_loops_dependencies(sched, t, t2, t->cj,
+                                               with_cooling);
+        if (t->ci->super != t->cj->super)
+          scheduler_addunlock(sched, t2, t->cj->super->kick2);
       }
 #endif
     }
   }
 }
 
-/**
- * @brief Constructs the gravity tasks building the multipoles and propagating
- *them to the children
- *
- * Correct implementation is still lacking here.
- *
- * @param e The #engine.
- */
-void engine_make_gravityrecursive_tasks(struct engine *e) {
-
-  /* struct space *s = e->s; */
-  /* struct scheduler *sched = &e->sched; */
-  /* const int nodeID = e->nodeID; */
-  /* const int nr_cells = s->nr_cells; */
-  /* struct cell *cells = s->cells_top; */
-
-  /* for (int k = 0; k < nr_cells; k++) { */
-
-  /*   /\* Only do this for local cells containing gravity particles *\/ */
-  /*   if (cells[k].nodeID == nodeID && cells[k].gcount > 0) { */
-
-  /*     /\* Create tasks at top level. *\/ */
-  /*     struct task *up = NULL; */
-  /*     struct task *down = NULL; */
-  /*         /\* scheduler_addtask(sched, task_type_grav_down,
-   * task_subtype_none, 0, 0, *\/ */
-  /*         /\*                   &cells[k], NULL); *\/ */
-
-  /*     /\* Push tasks down the cell hierarchy. *\/ */
-  /*     engine_addtasks_grav(e, &cells[k], up, down); */
-  /*   } */
-  /* } */
-}
-
 /**
  * @brief Fill the #space's task list.
  *
@@ -2579,10 +2993,6 @@ void engine_maketasks(struct engine *e) {
     error("Failed to allocate cell-task links.");
   e->nr_links = 0;
 
-  /* Add the gravity up/down tasks at the top-level cells and push them down. */
-  if (e->policy & engine_policy_self_gravity)
-    engine_make_gravityrecursive_tasks(e);
-
   /* Count the number of tasks associated with each cell and
      store the density tasks in each cell, and make each sort
      depend on the sorts of its progeny. */
@@ -2592,7 +3002,7 @@ void engine_maketasks(struct engine *e) {
   /* Now that the self/pair tasks are at the right level, set the super
    * pointers. */
   threadpool_map(&e->threadpool, cell_set_super_mapper, cells, nr_cells,
-                 sizeof(struct cell), 0, NULL);
+                 sizeof(struct cell), 0, e);
 
   /* Append hierarchical tasks to each cell. */
   threadpool_map(&e->threadpool, engine_make_hierarchical_tasks_mapper, cells,
@@ -2619,16 +3029,43 @@ void engine_maketasks(struct engine *e) {
       /* Get a handle on the proxy. */
       struct proxy *p = &e->proxies[pid];
 
-      /* Loop through the proxy's incoming cells and add the
-         recv tasks. */
       for (int k = 0; k < p->nr_cells_in; k++)
-        engine_addtasks_recv(e, p->cells_in[k], NULL, NULL, NULL, NULL);
+        engine_addtasks_recv_timestep(e, p->cells_in[k], NULL);
 
-      /* Loop through the proxy's outgoing cells and add the
-         send tasks. */
       for (int k = 0; k < p->nr_cells_out; k++)
-        engine_addtasks_send(e, p->cells_out[k], p->cells_in[0], NULL, NULL,
-                             NULL, NULL);
+        engine_addtasks_send_timestep(e, p->cells_out[k], p->cells_in[0], NULL);
+
+      /* Loop through the proxy's incoming cells and add the
+         recv tasks for the cells in the proxy that have a hydro connection. */
+      if (e->policy & engine_policy_hydro)
+        for (int k = 0; k < p->nr_cells_in; k++)
+          if (p->cells_in_type[k] & proxy_cell_type_hydro)
+            engine_addtasks_recv_hydro(e, p->cells_in[k], NULL, NULL, NULL);
+
+      /* Loop through the proxy's incoming cells and add the
+         recv tasks for the cells in the proxy that have a gravity connection.
+         */
+      if (e->policy & engine_policy_self_gravity)
+        for (int k = 0; k < p->nr_cells_in; k++)
+          if (p->cells_in_type[k] & proxy_cell_type_gravity)
+            engine_addtasks_recv_gravity(e, p->cells_in[k], NULL);
+
+      /* Loop through the proxy's outgoing cells and add the
+         send tasks for the cells in the proxy that have a hydro connection. */
+      if (e->policy & engine_policy_hydro)
+        for (int k = 0; k < p->nr_cells_out; k++)
+          if (p->cells_out_type[k] & proxy_cell_type_hydro)
+            engine_addtasks_send_hydro(e, p->cells_out[k], p->cells_in[0], NULL,
+                                       NULL, NULL);
+
+      /* Loop through the proxy's outgoing cells and add the
+         send tasks for the cells in the proxy that have a gravity connection.
+         */
+      if (e->policy & engine_policy_self_gravity)
+        for (int k = 0; k < p->nr_cells_out; k++)
+          if (p->cells_out_type[k] & proxy_cell_type_gravity)
+            engine_addtasks_send_gravity(e, p->cells_out[k], p->cells_in[0],
+                                         NULL);
     }
   }
 #endif
@@ -2677,24 +3114,42 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
 
       if (ci->nodeID != engine_rank) error("Non-local self task found");
 
-      /* Set this task's skip. */
-      if (cell_is_active(ci, e)) scheduler_activate(s, t);
-
       /* Activate the hydro drift */
       if (t->type == task_type_self && t->subtype == task_subtype_density) {
-        cell_activate_drift_part(ci, s);
-      }
-      /* Activate the gravity drift */
-      else if (t->type == task_type_self && t->subtype == task_subtype_grav) {
-        cell_activate_subcell_grav_tasks(t->ci, NULL, s);
+        if (cell_is_active_hydro(ci, e)) {
+          scheduler_activate(s, t);
+          cell_activate_drift_part(ci, s);
+        }
       }
+
       /* Store current values of dx_max and h_max. */
       else if (t->type == task_type_sub_self &&
                t->subtype == task_subtype_density) {
-        cell_activate_subcell_tasks(ci, NULL, s);
+        if (cell_is_active_hydro(ci, e)) {
+          scheduler_activate(s, t);
+          cell_activate_subcell_hydro_tasks(ci, NULL, s);
+        }
+      }
 
-      } else if (t->type == task_type_sub_self &&
-                 t->subtype == task_subtype_grav) {
+      else if (t->type == task_type_self && t->subtype == task_subtype_force) {
+        if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t);
+      }
+
+      else if (t->type == task_type_sub_self &&
+               t->subtype == task_subtype_force) {
+        if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t);
+      }
+
+      /* Activate the gravity drift */
+      else if (t->type == task_type_self && t->subtype == task_subtype_grav) {
+        if (cell_is_active_gravity(ci, e)) {
+          scheduler_activate(s, t);
+          cell_activate_subcell_grav_tasks(t->ci, NULL, s);
+        }
+      }
+
+      else if (t->type == task_type_sub_self &&
+               t->subtype == task_subtype_grav) {
         error("Invalid task sub-type encountered");
       }
     }
@@ -2705,12 +3160,17 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       /* Local pointers. */
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
-      const int ci_active = cell_is_active(ci, e);
-      const int cj_active = cell_is_active(cj, e);
+      const int ci_active_hydro = cell_is_active_hydro(ci, e);
+      const int cj_active_hydro = cell_is_active_hydro(cj, e);
+      const int ci_active_gravity = cell_is_active_gravity(ci, e);
+      const int cj_active_gravity = cell_is_active_gravity(cj, e);
 
       /* Only activate tasks that involve a local active cell. */
-      if ((ci_active && ci->nodeID == engine_rank) ||
-          (cj_active && cj->nodeID == engine_rank)) {
+      if ((t->subtype == task_subtype_density ||
+           t->subtype == task_subtype_force) &&
+          ((ci_active_hydro && ci->nodeID == engine_rank) ||
+           (cj_active_hydro && cj->nodeID == engine_rank))) {
+
         scheduler_activate(s, t);
 
         /* Set the correct sorting flags */
@@ -2730,19 +3190,28 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           cell_activate_sorts(ci, t->flags, s);
           cell_activate_sorts(cj, t->flags, s);
 
-        } else if (t->type == task_type_pair &&
-                   t->subtype == task_subtype_grav) {
-          /* Activate the gravity drift */
-          cell_activate_subcell_grav_tasks(t->ci, t->cj, s);
         }
 
         /* Store current values of dx_max and h_max. */
         else if (t->type == task_type_sub_pair &&
                  t->subtype == task_subtype_density) {
-          cell_activate_subcell_tasks(t->ci, t->cj, s);
+          cell_activate_subcell_hydro_tasks(t->ci, t->cj, s);
+        }
+      }
+
+      if ((t->subtype == task_subtype_grav) &&
+          ((ci_active_gravity && ci->nodeID == engine_rank) ||
+           (cj_active_gravity && cj->nodeID == engine_rank))) {
+
+        scheduler_activate(s, t);
+
+        if (t->type == task_type_pair && t->subtype == task_subtype_grav) {
+          /* Activate the gravity drift */
+          cell_activate_subcell_grav_tasks(t->ci, t->cj, s);
+        }
 
-        } else if (t->type == task_type_sub_pair &&
-                   t->subtype == task_subtype_grav) {
+        else if (t->type == task_type_sub_pair &&
+                 t->subtype == task_subtype_grav) {
           error("Invalid task sub-type encountered");
         }
       }
@@ -2758,9 +3227,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (ci->nodeID != engine_rank) {
 
           /* If the local cell is active, receive data from the foreign cell. */
-          if (cj_active) {
+          if (cj_active_hydro) {
             scheduler_activate(s, ci->recv_xv);
-            if (ci_active) {
+            if (ci_active_hydro) {
               scheduler_activate(s, ci->recv_rho);
 #ifdef EXTRA_HYDRO_LOOP
               scheduler_activate(s, ci->recv_gradient);
@@ -2769,10 +3238,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           }
 
           /* If the foreign cell is active, we want its ti_end values. */
-          if (ci_active) scheduler_activate(s, ci->recv_ti);
+          if (ci_active_hydro) scheduler_activate(s, ci->recv_ti);
 
           /* Is the foreign cell active and will need stuff from us? */
-          if (ci_active) {
+          if (ci_active_hydro) {
 
             struct link *l =
                 scheduler_activate_send(s, cj->send_xv, ci->nodeID);
@@ -2783,7 +3252,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
             cell_activate_drift_part(l->t->ci, s);
 
             /* If the local cell is also active, more stuff will be needed. */
-            if (cj_active) {
+            if (cj_active_hydro) {
               scheduler_activate_send(s, cj->send_rho, ci->nodeID);
 
 #ifdef EXTRA_HYDRO_LOOP
@@ -2793,14 +3262,15 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           }
 
           /* If the local cell is active, send its ti_end values. */
-          if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID);
+          if (cj_active_hydro)
+            scheduler_activate_send(s, cj->send_ti, ci->nodeID);
 
         } else if (cj->nodeID != engine_rank) {
 
           /* If the local cell is active, receive data from the foreign cell. */
-          if (ci_active) {
+          if (ci_active_hydro) {
             scheduler_activate(s, cj->recv_xv);
-            if (cj_active) {
+            if (cj_active_hydro) {
               scheduler_activate(s, cj->recv_rho);
 #ifdef EXTRA_HYDRO_LOOP
               scheduler_activate(s, cj->recv_gradient);
@@ -2809,10 +3279,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           }
 
           /* If the foreign cell is active, we want its ti_end values. */
-          if (cj_active) scheduler_activate(s, cj->recv_ti);
+          if (cj_active_hydro) scheduler_activate(s, cj->recv_ti);
 
           /* Is the foreign cell active and will need stuff from us? */
-          if (cj_active) {
+          if (cj_active_hydro) {
 
             struct link *l =
                 scheduler_activate_send(s, ci->send_xv, cj->nodeID);
@@ -2823,7 +3293,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
             cell_activate_drift_part(l->t->ci, s);
 
             /* If the local cell is also active, more stuff will be needed. */
-            if (ci_active) {
+            if (ci_active_hydro) {
 
               scheduler_activate_send(s, ci->send_rho, cj->nodeID);
 
@@ -2834,32 +3304,97 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           }
 
           /* If the local cell is active, send its ti_end values. */
-          if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID);
+          if (ci_active_hydro)
+            scheduler_activate_send(s, ci->send_ti, cj->nodeID);
+        }
+#endif
+      }
+
+      /* Only interested in gravity tasks as of here. */
+      if (t->subtype == task_subtype_grav) {
+
+#ifdef WITH_MPI
+        /* Activate the send/recv tasks. */
+        if (ci->nodeID != engine_rank) {
+
+          /* If the local cell is active, receive data from the foreign cell. */
+          if (cj_active_gravity) {
+            scheduler_activate(s, ci->recv_grav);
+          }
+
+          /* If the foreign cell is active, we want its ti_end values. */
+          if (ci_active_gravity) scheduler_activate(s, ci->recv_ti);
+
+          /* Is the foreign cell active and will need stuff from us? */
+          if (ci_active_gravity) {
+
+            struct link *l =
+                scheduler_activate_send(s, cj->send_grav, ci->nodeID);
+
+            /* Drift the cell which will be sent at the level at which it is
+               sent, i.e. drift the cell specified in the send task (l->t)
+               itself. */
+            cell_activate_drift_gpart(l->t->ci, s);
+          }
+
+          /* If the local cell is active, send its ti_end values. */
+          if (cj_active_gravity)
+            scheduler_activate_send(s, cj->send_ti, ci->nodeID);
+
+        } else if (cj->nodeID != engine_rank) {
+
+          /* If the local cell is active, receive data from the foreign cell. */
+          if (ci_active_gravity) {
+            scheduler_activate(s, cj->recv_grav);
+          }
+
+          /* If the foreign cell is active, we want its ti_end values. */
+          if (cj_active_gravity) scheduler_activate(s, cj->recv_ti);
+
+          /* Is the foreign cell active and will need stuff from us? */
+          if (cj_active_gravity) {
+
+            struct link *l =
+                scheduler_activate_send(s, ci->send_grav, cj->nodeID);
+
+            /* Drift the cell which will be sent at the level at which it is
+               sent, i.e. drift the cell specified in the send task (l->t)
+               itself. */
+            cell_activate_drift_gpart(l->t->ci, s);
+          }
+
+          /* If the local cell is active, send its ti_end values. */
+          if (ci_active_gravity)
+            scheduler_activate_send(s, ci->send_ti, cj->nodeID);
         }
 #endif
       }
     }
 
     /* Kick/init ? */
-    else if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
-             t->type == task_type_init_grav) {
-      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+    else if (t->type == task_type_kick1 || t->type == task_type_kick2) {
+
+      if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e))
+        scheduler_activate(s, t);
     }
 
     /* Hydro ghost tasks ? */
-    else if (t->type == task_type_ghost || t->type == task_type_extra_ghost) {
-      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+    else if (t->type == task_type_ghost || t->type == task_type_extra_ghost ||
+             t->type == task_type_ghost_in || t->type == task_type_ghost_out) {
+      if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Gravity stuff ? */
     else if (t->type == task_type_grav_down ||
-             t->type == task_type_grav_long_range) {
-      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+             t->type == task_type_grav_long_range ||
+             t->type == task_type_init_grav) {
+      if (cell_is_active_gravity(t->ci, e)) scheduler_activate(s, t);
     }
 
     /* Periodic gravity stuff (Note this is not linked to a cell) ? */
     else if (t->type == task_type_grav_top_level ||
-             t->type == task_type_grav_ghost) {
+             t->type == task_type_grav_ghost_in ||
+             t->type == task_type_grav_ghost_out) {
       scheduler_activate(s, t);
     }
 
@@ -2868,12 +3403,13 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       t->ci->updated = 0;
       t->ci->g_updated = 0;
       t->ci->s_updated = 0;
-      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+      if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e))
+        scheduler_activate(s, t);
     }
 
     /* Subgrid tasks */
     else if (t->type == task_type_cooling || t->type == task_type_sourceterms) {
-      if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
+      if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t);
     }
   }
 }
@@ -3077,9 +3613,15 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
   /* Initial cleaning up session ? */
   if (clean_h_values) space_sanitize(e->s);
 
-/* If in parallel, exchange the cell structure. */
+/* If in parallel, exchange the cell structure, top-level and neighbouring
+ * multipoles. */
 #ifdef WITH_MPI
   engine_exchange_cells(e);
+
+  if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e);
+
+  if (e->policy & engine_policy_self_gravity)
+    engine_exchange_proxy_multipoles(e);
 #endif
 
   /* Re-build the tasks. */
@@ -3101,7 +3643,7 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
     error("engine_marktasks failed after space_rebuild.");
 
   /* Print the status of the system */
-  // if (e->verbose) engine_print_task_counts(e);
+  if (e->verbose) engine_print_task_counts(e);
 
   /* Flag that a rebuild has taken place */
   e->step_props |= engine_step_prop_rebuild;
@@ -3183,7 +3725,10 @@ void engine_collect_end_of_step_recurse(struct cell *c) {
 
   /* Counters for the different quantities. */
   int updated = 0, g_updated = 0, s_updated = 0;
-  integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
 
   /* Collect the values from the progeny. */
   for (int k = 0; k < 8; k++) {
@@ -3194,9 +3739,12 @@ void engine_collect_end_of_step_recurse(struct cell *c) {
       engine_collect_end_of_step_recurse(cp);
 
       /* And update */
-      ti_end_min = min(ti_end_min, cp->ti_end_min);
-      ti_end_max = max(ti_end_max, cp->ti_end_max);
-      ti_beg_max = max(ti_beg_max, cp->ti_beg_max);
+      ti_hydro_end_min = min(ti_hydro_end_min, cp->ti_hydro_end_min);
+      ti_hydro_end_max = max(ti_hydro_end_max, cp->ti_hydro_end_max);
+      ti_hydro_beg_max = max(ti_hydro_beg_max, cp->ti_hydro_beg_max);
+      ti_gravity_end_min = min(ti_gravity_end_min, cp->ti_gravity_end_min);
+      ti_gravity_end_max = max(ti_gravity_end_max, cp->ti_gravity_end_max);
+      ti_gravity_beg_max = max(ti_gravity_beg_max, cp->ti_gravity_beg_max);
       updated += cp->updated;
       g_updated += cp->g_updated;
       s_updated += cp->s_updated;
@@ -3209,9 +3757,12 @@ void engine_collect_end_of_step_recurse(struct cell *c) {
   }
 
   /* Store the collected values in the cell. */
-  c->ti_end_min = ti_end_min;
-  c->ti_end_max = ti_end_max;
-  c->ti_beg_max = ti_beg_max;
+  c->ti_hydro_end_min = ti_hydro_end_min;
+  c->ti_hydro_end_max = ti_hydro_end_max;
+  c->ti_hydro_beg_max = ti_hydro_beg_max;
+  c->ti_gravity_end_min = ti_gravity_end_min;
+  c->ti_gravity_end_max = ti_gravity_end_max;
+  c->ti_gravity_beg_max = ti_gravity_beg_max;
   c->updated = updated;
   c->g_updated = g_updated;
   c->s_updated = s_updated;
@@ -3227,7 +3778,10 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
 
   /* Local collectible */
   int updates = 0, g_updates = 0, s_updates = 0;
-  integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &s->cells_top[local_cells[ind]];
@@ -3238,9 +3792,12 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
       engine_collect_end_of_step_recurse(c);
 
       /* And aggregate */
-      ti_end_min = min(ti_end_min, c->ti_end_min);
-      ti_end_max = max(ti_end_max, c->ti_end_max);
-      ti_beg_max = max(ti_beg_max, c->ti_beg_max);
+      ti_hydro_end_min = min(ti_hydro_end_min, c->ti_hydro_end_min);
+      ti_hydro_end_max = max(ti_hydro_end_max, c->ti_hydro_end_max);
+      ti_hydro_beg_max = max(ti_hydro_beg_max, c->ti_hydro_beg_max);
+      ti_gravity_end_min = min(ti_gravity_end_min, c->ti_gravity_end_min);
+      ti_gravity_end_max = max(ti_gravity_end_max, c->ti_gravity_end_max);
+      ti_gravity_beg_max = max(ti_gravity_beg_max, c->ti_gravity_beg_max);
       updates += c->updated;
       g_updates += c->g_updated;
       s_updates += c->s_updated;
@@ -3258,9 +3815,15 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements,
     data->updates += updates;
     data->g_updates += g_updates;
     data->s_updates += s_updates;
-    data->ti_end_min = min(ti_end_min, data->ti_end_min);
-    data->ti_end_max = max(ti_end_max, data->ti_end_max);
-    data->ti_beg_max = max(ti_beg_max, data->ti_beg_max);
+    data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min);
+    data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max);
+    data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max);
+    data->ti_gravity_end_min =
+        min(ti_gravity_end_min, data->ti_gravity_end_min);
+    data->ti_gravity_end_max =
+        max(ti_gravity_end_max, data->ti_gravity_end_max);
+    data->ti_gravity_beg_max =
+        max(ti_gravity_beg_max, data->ti_gravity_beg_max);
   }
   if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
 }
@@ -3288,7 +3851,10 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
   const struct space *s = e->s;
   struct end_of_step_data data;
   data.updates = 0, data.g_updates = 0, data.s_updates = 0;
-  data.ti_end_min = max_nr_timesteps, data.ti_end_max = 0, data.ti_beg_max = 0;
+  data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0,
+  data.ti_hydro_beg_max = 0;
+  data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0,
+  data.ti_gravity_beg_max = 0;
   data.e = e;
 
   /* Collect information from the local top-level cells */
@@ -3297,8 +3863,10 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
 
   /* Store these in the temporary collection group. */
   collectgroup1_init(&e->collect_group1, data.updates, data.g_updates,
-                     data.s_updates, data.ti_end_min, data.ti_end_max,
-                     data.ti_beg_max, e->forcerebuild);
+                     data.s_updates, data.ti_hydro_end_min,
+                     data.ti_hydro_end_max, data.ti_hydro_beg_max,
+                     data.ti_gravity_end_min, data.ti_gravity_end_max,
+                     data.ti_gravity_beg_max, e->forcerebuild);
 
 /* Aggregate collective data from the different nodes for this step. */
 #ifdef WITH_MPI
@@ -3306,43 +3874,47 @@ void engine_collect_end_of_step(struct engine *e, int apply) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   {
-    /* Check the above using the original MPI calls. */
-    integertime_t in_i[1], out_i[1];
-    in_i[0] = 0;
-    out_i[0] = data.ti_end_min;
-    if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate ti_end_min.");
-    if (in_i[0] != (long long)e->collect_group1.ti_end_min)
-      error("Failed to get same ti_end_min, is %lld, should be %lld", in_i[0],
-            e->collect_group1.ti_end_min);
-
-    long long in_ll[3], out_ll[3];
-    out_ll[0] = data.updates;
-    out_ll[1] = data.g_updates;
-    out_ll[2] = data.s_updates;
-    if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate particle counts.");
-    if (in_ll[0] != (long long)e->collect_group1.updates)
-      error("Failed to get same updates, is %lld, should be %ld", in_ll[0],
-            e->collect_group1.updates);
-    if (in_ll[1] != (long long)e->collect_group1.g_updates)
-      error("Failed to get same g_updates, is %lld, should be %ld", in_ll[1],
-            e->collect_group1.g_updates);
-    if (in_ll[2] != (long long)e->collect_group1.s_updates)
-      error("Failed to get same s_updates, is %lld, should be %ld", in_ll[2],
-            e->collect_group1.s_updates);
-
-    int buff = 0;
-    if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX,
-                      MPI_COMM_WORLD) != MPI_SUCCESS)
-      error("Failed to aggregate the rebuild flag across nodes.");
-    if (!!buff != !!e->collect_group1.forcerebuild)
-      error(
-          "Failed to get same rebuild flag from all nodes, is %d,"
-          "should be %d",
-          buff, e->collect_group1.forcerebuild);
+    /* /\* Check the above using the original MPI calls. *\/ */
+    /* integertime_t in_i[1], out_i[1]; */
+    /* in_i[0] = 0; */
+    /* out_i[0] = data.ti_end_min; */
+    /* if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN, */
+    /*                   MPI_COMM_WORLD) != MPI_SUCCESS) */
+    /*   error("Failed to aggregate ti_end_min."); */
+    /* if (in_i[0] != (long long)e->collect_group1.ti_end_min) */
+    /*   error("Failed to get same ti_end_min, is %lld, should be %lld",
+     * in_i[0], */
+    /*         e->collect_group1.ti_end_min); */
+
+    /* long long in_ll[3], out_ll[3]; */
+    /* out_ll[0] = data.updates; */
+    /* out_ll[1] = data.g_updates; */
+    /* out_ll[2] = data.s_updates; */
+    /* if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, */
+    /*                   MPI_COMM_WORLD) != MPI_SUCCESS) */
+    /*   error("Failed to aggregate particle counts."); */
+    /* if (in_ll[0] != (long long)e->collect_group1.updates) */
+    /*   error("Failed to get same updates, is %lld, should be %ld", in_ll[0],
+     */
+    /*         e->collect_group1.updates); */
+    /* if (in_ll[1] != (long long)e->collect_group1.g_updates) */
+    /*   error("Failed to get same g_updates, is %lld, should be %ld", in_ll[1],
+     */
+    /*         e->collect_group1.g_updates); */
+    /* if (in_ll[2] != (long long)e->collect_group1.s_updates) */
+    /*   error("Failed to get same s_updates, is %lld, should be %ld", in_ll[2],
+     */
+    /*         e->collect_group1.s_updates); */
+
+    /* int buff = 0; */
+    /* if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, */
+    /*                   MPI_COMM_WORLD) != MPI_SUCCESS) */
+    /*   error("Failed to aggregate the rebuild flag across nodes."); */
+    /* if (!!buff != !!e->collect_group1.forcerebuild) */
+    /*   error( */
+    /*       "Failed to get same rebuild flag from all nodes, is %d," */
+    /*       "should be %d", */
+    /*       buff, e->collect_group1.forcerebuild); */
   }
 #endif
 #endif
@@ -3429,7 +4001,8 @@ void engine_skip_force_and_kick(struct engine *e) {
         t->type == task_type_timestep || t->subtype == task_subtype_force ||
         t->subtype == task_subtype_grav ||
         t->type == task_type_grav_long_range ||
-        t->type == task_type_grav_ghost ||
+        t->type == task_type_grav_ghost_in ||
+        t->type == task_type_grav_ghost_out ||
         t->type == task_type_grav_top_level || t->type == task_type_grav_down ||
         t->type == task_type_cooling || t->type == task_type_sourceterms)
       t->skip = 1;
@@ -3561,15 +4134,16 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that we have the correct total mass in the top-level multipoles */
-  size_t num_gpart_mpole = 0;
+  long long num_gpart_mpole = 0;
   if (e->policy & engine_policy_self_gravity) {
     for (int i = 0; i < e->s->nr_cells; ++i)
       num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart;
-    if (num_gpart_mpole != e->s->nr_gparts)
+    if (num_gpart_mpole != e->total_nr_gparts)
       error(
-          "Multipoles don't contain the total number of gpart s->nr_gpart=%zd, "
-          "m_poles=%zd",
-          e->s->nr_gparts, num_gpart_mpole);
+          "Top-level multipoles don't contain the total number of gpart "
+          "s->nr_gpart=%lld, "
+          "m_poles=%lld",
+          e->total_nr_gparts, num_gpart_mpole);
   }
 #endif
 
@@ -3756,15 +4330,15 @@ void engine_step(struct engine *e) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that we have the correct total mass in the top-level multipoles */
-  size_t num_gpart_mpole = 0;
+  long long num_gpart_mpole = 0;
   if (e->policy & engine_policy_self_gravity) {
     for (int i = 0; i < e->s->nr_cells; ++i)
       num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart;
-    if (num_gpart_mpole != e->s->nr_gparts)
+    if (num_gpart_mpole != e->total_nr_gparts)
       error(
-          "Multipoles don't contain the total number of gpart mpoles=%zd "
-          "ngparts=%zd",
-          num_gpart_mpole, e->s->nr_gparts);
+          "Multipoles don't contain the total number of gpart mpoles=%lld "
+          "ngparts=%lld",
+          num_gpart_mpole, e->total_nr_gparts);
   }
 #endif
 
@@ -3871,9 +4445,13 @@ void engine_unskip(struct engine *e) {
   threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->local_cells_top,
                  e->s->nr_local_cells, sizeof(int), 1, e);
 
-  /* And the top level gravity FFT one */
-  if (e->s->periodic && (e->policy & engine_policy_self_gravity))
-    scheduler_activate(&e->sched, e->s->grav_top_level);
+  /* And the top level gravity FFT one when periodicity is on.*/
+  if (e->s->periodic && (e->policy & engine_policy_self_gravity)) {
+
+    /* Only if there are other tasks (i.e. something happens on this node) */
+    if (e->sched.active_count > 0)
+      scheduler_activate(&e->sched, e->s->grav_top_level);
+  }
 
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
@@ -3902,10 +4480,11 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements,
 
       /* Drift all the g-particles */
       cell_drift_gpart(c, e, 1);
+    }
 
-      /* Drift the multipoles */
-      if (e->policy & engine_policy_self_gravity)
-        cell_drift_all_multipoles(c, e);
+    /* Drift the multipoles */
+    if (e->policy & engine_policy_self_gravity) {
+      cell_drift_all_multipoles(c, e);
     }
   }
 }
@@ -3959,7 +4538,7 @@ void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements,
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &cells[ind];
-    if (c != NULL && c->nodeID == e->nodeID) {
+    if (c != NULL) {
 
       /* Drift the multipole at this level only */
       if (c->ti_old_multipole != e->ti_current) cell_drift_multipole(c, e);
@@ -4030,11 +4609,24 @@ void engine_reconstruct_multipoles(struct engine *e) {
 void engine_makeproxies(struct engine *e) {
 
 #ifdef WITH_MPI
-  const int *cdim = e->s->cdim;
+  const int nodeID = e->nodeID;
   const struct space *s = e->s;
+  const int *cdim = s->cdim;
+  const int periodic = s->periodic;
+
+  /* Get some info about the physics */
+  const double *dim = s->dim;
+  const struct gravity_props *props = e->gravity_properties;
+  const double theta_crit2 = props->theta_crit2;
+  const int with_hydro = (e->policy & engine_policy_hydro);
+  const int with_gravity = (e->policy & engine_policy_self_gravity);
+
+  /* Handle on the cells and proxies */
   struct cell *cells = s->cells_top;
   struct proxy *proxies = e->proxies;
-  ticks tic = getticks();
+
+  /* Let's time this */
+  const ticks tic = getticks();
 
   /* Prepare the proxies and the proxy index. */
   if (e->proxy_ind == NULL)
@@ -4043,33 +4635,53 @@ void engine_makeproxies(struct engine *e) {
   for (int k = 0; k < e->nr_nodes; k++) e->proxy_ind[k] = -1;
   e->nr_proxies = 0;
 
-  /* The following loop is super-clunky, but it's necessary
-     to ensure that the order of the send and recv cells in
-     the proxies is identical for all nodes! */
+  /* Compute how many cells away we need to walk */
+  int delta = 1; /*hydro case */
+  if (with_gravity) {
+    const double distance = 2.5 * cells[0].width[0] / props->theta_crit;
+    delta = (int)(distance / cells[0].width[0]) + 1;
+  }
+
+  /* Let's be verbose about this choice */
+  if (e->verbose)
+    message("Looking for proxies up to %d top-level cells away", delta);
 
   /* Loop over each cell in the space. */
   int ind[3];
-  for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++)
-    for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++)
+  for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++) {
+    for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++) {
       for (ind[2] = 0; ind[2] < cdim[2]; ind[2]++) {
 
         /* Get the cell ID. */
         const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]);
 
+        double CoM_i[3] = {0., 0., 0.};
+        double r_max_i = 0.;
+
+        if (with_gravity) {
+
+          /* Get ci's multipole */
+          const struct gravity_tensors *multi_i = cells[cid].multipole;
+          CoM_i[0] = multi_i->CoM[0];
+          CoM_i[1] = multi_i->CoM[1];
+          CoM_i[2] = multi_i->CoM[2];
+          r_max_i = multi_i->r_max;
+        }
+
         /* Loop over all its neighbours (periodic). */
-        for (int i = -1; i <= 1; i++) {
+        for (int i = -delta; i <= delta; i++) {
           int ii = ind[0] + i;
           if (ii >= cdim[0])
             ii -= cdim[0];
           else if (ii < 0)
             ii += cdim[0];
-          for (int j = -1; j <= 1; j++) {
+          for (int j = -delta; j <= delta; j++) {
             int jj = ind[1] + j;
             if (jj >= cdim[1])
               jj -= cdim[1];
             else if (jj < 0)
               jj += cdim[1];
-            for (int k = -1; k <= 1; k++) {
+            for (int k = -delta; k <= delta; k++) {
               int kk = ind[2] + k;
               if (kk >= cdim[2])
                 kk -= cdim[2];
@@ -4079,45 +4691,127 @@ void engine_makeproxies(struct engine *e) {
               /* Get the cell ID. */
               const int cjd = cell_getid(cdim, ii, jj, kk);
 
+              /* Early abort (same cell) */
+              if (cid == cjd) continue;
+
+              /* Early abort (both same node) */
+              if (cells[cid].nodeID == nodeID && cells[cjd].nodeID == nodeID)
+                continue;
+
+              /* Early abort (both foreign node) */
+              if (cells[cid].nodeID != nodeID && cells[cjd].nodeID != nodeID)
+                continue;
+
+              int proxy_type = 0;
+
+              /* In the hydro case, only care about neighbours */
+              if (with_hydro) {
+
+                /* This is super-ugly but checks for direct neighbours */
+                /* with periodic BC */
+                if (((abs(ind[0] - ii) <= 1 ||
+                      abs(ind[0] - ii - cdim[0]) <= 1 ||
+                      abs(ind[0] - ii + cdim[0]) <= 1) &&
+                     (abs(ind[1] - jj) <= 1 ||
+                      abs(ind[1] - jj - cdim[1]) <= 1 ||
+                      abs(ind[1] - jj + cdim[1]) <= 1) &&
+                     (abs(ind[2] - kk) <= 1 ||
+                      abs(ind[2] - kk - cdim[2]) <= 1 ||
+                      abs(ind[2] - kk + cdim[2]) <= 1)))
+                  proxy_type |= (int)proxy_cell_type_hydro;
+              }
+
+              /* In the gravity case, check distances using the MAC. */
+              if (with_gravity) {
+
+                /* Get cj's multipole */
+                const struct gravity_tensors *multi_j = cells[cjd].multipole;
+                const double CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1],
+                                         multi_j->CoM[2]};
+                const double r_max_j = multi_j->r_max;
+
+                /* Let's compute the current distance between the cell pair*/
+                double dx = CoM_i[0] - CoM_j[0];
+                double dy = CoM_i[1] - CoM_j[1];
+                double dz = CoM_i[2] - CoM_j[2];
+
+                /* Apply BC */
+                if (periodic) {
+                  dx = nearest(dx, dim[0]);
+                  dy = nearest(dy, dim[1]);
+                  dz = nearest(dz, dim[2]);
+                }
+                const double r2 = dx * dx + dy * dy + dz * dz;
+
+                /* Are we too close for M2L? */
+                if (!gravity_M2L_accept(r_max_i, r_max_j, theta_crit2, r2))
+                  proxy_type |= (int)proxy_cell_type_gravity;
+              }
+
+              /* Abort if not in range at all */
+              if (proxy_type == proxy_cell_type_none) continue;
+
               /* Add to proxies? */
-              if (cells[cid].nodeID == e->nodeID &&
-                  cells[cjd].nodeID != e->nodeID) {
+              if (cells[cid].nodeID == nodeID && cells[cjd].nodeID != nodeID) {
+
+                /* Do we already have a relationship with this node? */
                 int pid = e->proxy_ind[cells[cjd].nodeID];
                 if (pid < 0) {
                   if (e->nr_proxies == engine_maxproxies)
                     error("Maximum number of proxies exceeded.");
+
+                  /* Ok, start a new proxy for this pair of nodes */
                   proxy_init(&proxies[e->nr_proxies], e->nodeID,
                              cells[cjd].nodeID);
+
+                  /* Store the information */
                   e->proxy_ind[cells[cjd].nodeID] = e->nr_proxies;
                   pid = e->nr_proxies;
                   e->nr_proxies += 1;
                 }
-                proxy_addcell_in(&proxies[pid], &cells[cjd]);
-                proxy_addcell_out(&proxies[pid], &cells[cid]);
+
+                /* Add the cell to the proxy */
+                proxy_addcell_in(&proxies[pid], &cells[cjd], proxy_type);
+                proxy_addcell_out(&proxies[pid], &cells[cid], proxy_type);
+
+                /* Store info about where to send the cell */
                 cells[cid].sendto |= (1ULL << pid);
               }
 
-              if (cells[cjd].nodeID == e->nodeID &&
-                  cells[cid].nodeID != e->nodeID) {
+              /* Same for the symmetric case? */
+              if (cells[cjd].nodeID == nodeID && cells[cid].nodeID != nodeID) {
+
+                /* Do we already have a relationship with this node? */
                 int pid = e->proxy_ind[cells[cid].nodeID];
                 if (pid < 0) {
                   if (e->nr_proxies == engine_maxproxies)
                     error("Maximum number of proxies exceeded.");
+
+                  /* Ok, start a new proxy for this pair of nodes */
                   proxy_init(&proxies[e->nr_proxies], e->nodeID,
                              cells[cid].nodeID);
+
+                  /* Store the information */
                   e->proxy_ind[cells[cid].nodeID] = e->nr_proxies;
                   pid = e->nr_proxies;
                   e->nr_proxies += 1;
                 }
-                proxy_addcell_in(&proxies[pid], &cells[cid]);
-                proxy_addcell_out(&proxies[pid], &cells[cjd]);
+
+                /* Add the cell to the proxy */
+                proxy_addcell_in(&proxies[pid], &cells[cid], proxy_type);
+                proxy_addcell_out(&proxies[pid], &cells[cjd], proxy_type);
+
+                /* Store info about where to send the cell */
                 cells[cjd].sendto |= (1ULL << pid);
               }
             }
           }
         }
       }
+    }
+  }
 
+  /* Be clear about the time */
   if (e->verbose)
     message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
             clocks_getunit());
@@ -4344,8 +5038,8 @@ void engine_unpin() {
  */
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
-                 int nr_threads, int Ngas, int Ndm, int with_aff, int policy,
-                 int verbose, struct repartition *reparttype,
+                 int nr_threads, long long Ngas, long long Ndm, int with_aff,
+                 int policy, int verbose, struct repartition *reparttype,
                  const struct unit_system *internal_units,
                  const struct phys_const *physical_constants,
                  const struct hydro_props *hydro,
diff --git a/src/engine.h b/src/engine.h
index f80589ad3fad508f48a6fc3eda676a929b824c6e..a571f2b24d57b2720c3c77ebd7600a3830e4d2a3 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -147,13 +147,31 @@ struct engine {
   double timeBase;
   double timeBase_inv;
 
-  /* Minimal ti_end for the next time-step */
+  /* Minimal hydro ti_end for the next time-step */
+  integertime_t ti_hydro_end_min;
+
+  /* Maximal hydro ti_end for the next time-step */
+  integertime_t ti_hydro_end_max;
+
+  /* Maximal hydro ti_beg for the next time-step */
+  integertime_t ti_hydro_beg_max;
+
+  /* Minimal gravity ti_end for the next time-step */
+  integertime_t ti_gravity_end_min;
+
+  /* Maximal gravity ti_end for the next time-step */
+  integertime_t ti_gravity_end_max;
+
+  /* Maximal gravity ti_beg for the next time-step */
+  integertime_t ti_gravity_beg_max;
+
+  /* Minimal overall ti_end for the next time-step */
   integertime_t ti_end_min;
 
-  /* Maximal ti_end for the next time-step */
+  /* Maximal overall ti_end for the next time-step */
   integertime_t ti_end_max;
 
-  /* Maximal ti_beg for the next time-step */
+  /* Maximal overall ti_beg for the next time-step */
   integertime_t ti_beg_max;
 
   /* Number of particles updated in the previous step */
@@ -163,7 +181,7 @@ struct engine {
   int step_props;
 
   /* Total numbers of particles in the system. */
-  size_t total_nr_parts, total_nr_gparts;
+  long long total_nr_parts, total_nr_gparts;
 
   /* The internal system of units */
   const struct unit_system *internal_units;
@@ -281,8 +299,8 @@ void engine_print_stats(struct engine *e);
 void engine_dump_snapshot(struct engine *e);
 void engine_init(struct engine *e, struct space *s,
                  const struct swift_params *params, int nr_nodes, int nodeID,
-                 int nr_threads, int Ngas, int Ndm, int with_aff, int policy,
-                 int verbose, struct repartition *reparttype,
+                 int nr_threads, long long Ngas, long long Ndm, int with_aff,
+                 int policy, int verbose, struct repartition *reparttype,
                  const struct unit_system *internal_units,
                  const struct phys_const *physical_constants,
                  const struct hydro_props *hydro,
diff --git a/src/partition.c b/src/partition.c
index 0b7e5ca76bed0993b56b3825bb17914b06fe7e9c..8b67cf4866eff332b5c5deea1451c48f520bce9a 100644
--- a/src/partition.c
+++ b/src/partition.c
@@ -577,9 +577,12 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
     int cid = ci - cells;
 
     /* Different weights for different tasks. */
-    if (t->type == task_type_ghost || t->type == task_type_kick1 ||
-        t->type == task_type_kick2 || t->type == task_type_timestep ||
-        t->type == task_type_drift_part || t->type == task_type_drift_gpart) {
+    if (t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
+        t->type == task_type_ghost || t->type == task_type_extra_ghost ||
+        t->type == task_type_kick1 || t->type == task_type_kick2 ||
+        t->type == task_type_timestep || t->type == task_type_init_grav ||
+        t->type == task_type_grav_down ||
+        t->type == task_type_grav_long_range) {
 
       /* Particle updates add only to vertex weight. */
       if (taskvweights) weights_v[cid] += w;
@@ -614,41 +617,46 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
           if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
         }
 
-        if (timebins) {
-          /* Add weights to edge for all cells based on the expected
-           * interaction time (calculated as the time to the last expected
-           * time) as we want to avoid having active cells on the edges, so we
-           * cut for that. Note that weight is added to the local and remote
-           * cells, as we want to keep both away from any cuts, this can
-           * overflow int, so take care. */
-          int dti = num_time_bins - get_time_bin(ci->ti_end_min);
-          int dtj = num_time_bins - get_time_bin(cj->ti_end_min);
-          double dt = (double)(1 << dti) + (double)(1 << dtj);
-
-          /* ci */
-          int kk;
-          for (kk = 26 * cid; inds[kk] != cjd; kk++)
-            ;
-          weights_e[kk] += dt;
-
-          /* cj */
-          for (kk = 26 * cjd; inds[kk] != cid; kk++)
-            ;
-          weights_e[kk] += dt;
-        } else {
-
-          /* Add weights from task costs to the edge. */
-
-          /* ci */
-          int kk;
-          for (kk = 26 * cid; inds[kk] != cjd; kk++)
-            ;
-          weights_e[kk] += w;
-
-          /* cj */
-          for (kk = 26 * cjd; inds[kk] != cid; kk++)
-            ;
-          weights_e[kk] += w;
+        /* Find indices of ci/cj neighbours. Note with gravity these cells may
+         * not be neighbours, in that case we ignore any edge weight for that
+         * pair. */
+        int ik = -1;
+        for (int k = 26 * cid; k < 26 * nr_cells; k++) {
+          if (inds[k] == cjd) {
+            ik = k;
+            break;
+          }
+        }
+
+        /* cj */
+        int jk = -1;
+        for (int k = 26 * cjd; k < 26 * nr_cells; k++) {
+          if (inds[k] == cid) {
+            jk = k;
+            break;
+          }
+        }
+        if (ik != -1 && jk != -1) {
+
+          if (timebins) {
+            /* Add weights to edge for all cells based on the expected
+             * interaction time (calculated as the time to the last expected
+             * time) as we want to avoid having active cells on the edges, so
+             * we cut for that. Note that weight is added to the local and
+             * remote cells, as we want to keep both away from any cuts, this
+             * can overflow int, so take care. */
+            int dti = num_time_bins - get_time_bin(ci->ti_hydro_end_min);
+            int dtj = num_time_bins - get_time_bin(cj->ti_hydro_end_min);
+            double dt = (double)(1 << dti) + (double)(1 << dtj);
+            weights_e[ik] += dt;
+            weights_e[jk] += dt;
+
+          } else {
+
+            /* Add weights from task costs to the edge. */
+            weights_e[ik] += w;
+            weights_e[jk] += w;
+          }
         }
       }
     }
diff --git a/src/proxy.c b/src/proxy.c
index dd6faa3055cb17a0a3050d9e62d107d7489a4326..4c110d1a23cbe337978147567b2e6ad8a6d6ba65 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -124,26 +124,44 @@ void proxy_cells_exch2(struct proxy *p) {
  *
  * @param p The #proxy.
  * @param c The #cell.
+ * @param type Why is this cell in the proxy (hdro, gravity, ...) ?
  */
-void proxy_addcell_in(struct proxy *p, struct cell *c) {
+void proxy_addcell_in(struct proxy *p, struct cell *c, int type) {
+
+  if (type == proxy_cell_type_none) error("Invalid type for proxy");
 
   /* Check if the cell is already registered with the proxy. */
   for (int k = 0; k < p->nr_cells_in; k++)
-    if (p->cells_in[k] == c) return;
+    if (p->cells_in[k] == c) {
+
+      /* Update the type */
+      p->cells_in_type[k] |= type;
+      return;
+    }
 
   /* Do we need to grow the number of in cells? */
   if (p->nr_cells_in == p->size_cells_in) {
+
     p->size_cells_in *= proxy_buffgrow;
-    struct cell **temp;
-    if ((temp = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL)
+
+    struct cell **temp_cell;
+    if ((temp_cell = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL)
       error("Failed to allocate incoming cell list.");
-    memcpy(temp, p->cells_in, sizeof(struct cell *) * p->nr_cells_in);
+    memcpy(temp_cell, p->cells_in, sizeof(struct cell *) * p->nr_cells_in);
     free(p->cells_in);
-    p->cells_in = temp;
+    p->cells_in = temp_cell;
+
+    int *temp_type;
+    if ((temp_type = malloc(sizeof(int) * p->size_cells_in)) == NULL)
+      error("Failed to allocate incoming cell type list.");
+    memcpy(temp_type, p->cells_in_type, sizeof(int) * p->nr_cells_in);
+    free(p->cells_in_type);
+    p->cells_in_type = temp_type;
   }
 
   /* Add the cell. */
   p->cells_in[p->nr_cells_in] = c;
+  p->cells_in_type[p->nr_cells_in] = type;
   p->nr_cells_in += 1;
 }
 
@@ -152,26 +170,43 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) {
  *
  * @param p The #proxy.
  * @param c The #cell.
+ * @param type Why is this cell in the proxy (hdro, gravity, ...) ?
  */
-void proxy_addcell_out(struct proxy *p, struct cell *c) {
+void proxy_addcell_out(struct proxy *p, struct cell *c, int type) {
+
+  if (type == proxy_cell_type_none) error("Invalid type for proxy");
 
   /* Check if the cell is already registered with the proxy. */
   for (int k = 0; k < p->nr_cells_out; k++)
-    if (p->cells_out[k] == c) return;
+    if (p->cells_out[k] == c) {
+
+      /* Update the type */
+      p->cells_out_type[k] |= type;
+      return;
+    }
 
   /* Do we need to grow the number of out cells? */
   if (p->nr_cells_out == p->size_cells_out) {
     p->size_cells_out *= proxy_buffgrow;
-    struct cell **temp;
-    if ((temp = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL)
+
+    struct cell **temp_cell;
+    if ((temp_cell = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL)
       error("Failed to allocate outgoing cell list.");
-    memcpy(temp, p->cells_out, sizeof(struct cell *) * p->nr_cells_out);
+    memcpy(temp_cell, p->cells_out, sizeof(struct cell *) * p->nr_cells_out);
     free(p->cells_out);
-    p->cells_out = temp;
+    p->cells_out = temp_cell;
+
+    int *temp_type;
+    if ((temp_type = malloc(sizeof(int) * p->size_cells_out)) == NULL)
+      error("Failed to allocate outgoing cell type list.");
+    memcpy(temp_type, p->cells_out_type, sizeof(int) * p->nr_cells_out);
+    free(p->cells_out_type);
+    p->cells_out_type = temp_type;
   }
 
   /* Add the cell. */
   p->cells_out[p->nr_cells_out] = c;
+  p->cells_out_type[p->nr_cells_out] = type;
   p->nr_cells_out += 1;
 }
 
@@ -433,6 +468,9 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) {
     if ((p->cells_in =
              (struct cell **)malloc(sizeof(void *) * p->size_cells_in)) == NULL)
       error("Failed to allocate cells_in buffer.");
+    if ((p->cells_in_type = (int *)malloc(sizeof(int) * p->size_cells_in)) ==
+        NULL)
+      error("Failed to allocate cells_in_type buffer.");
   }
   p->nr_cells_in = 0;
   if (p->cells_out == NULL) {
@@ -440,6 +478,9 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) {
     if ((p->cells_out = (struct cell **)malloc(sizeof(void *) *
                                                p->size_cells_out)) == NULL)
       error("Failed to allocate cells_out buffer.");
+    if ((p->cells_out_type = (int *)malloc(sizeof(int) * p->size_cells_out)) ==
+        NULL)
+      error("Failed to allocate cells_out_type buffer.");
   }
   p->nr_cells_out = 0;
 
diff --git a/src/proxy.h b/src/proxy.h
index a245077193878bb669b474944965badceffcee80..b45f6fcca86b0320a49b5c2b879539cbf8c73116 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -36,6 +36,15 @@
 #define proxy_tag_sparts 4
 #define proxy_tag_cells 5
 
+/**
+ * @brief The different reasons a cell can be in a proxy
+ */
+enum proxy_cell_type {
+  proxy_cell_type_none = 0,
+  proxy_cell_type_hydro = (1 << 0),
+  proxy_cell_type_gravity = (1 << 1),
+};
+
 /* Data structure for the proxy. */
 struct proxy {
 
@@ -44,11 +53,13 @@ struct proxy {
 
   /* Incoming cells. */
   struct cell **cells_in;
+  int *cells_in_type;
   struct pcell *pcells_in;
   int nr_cells_in, size_cells_in, size_pcells_in;
 
   /* Outgoing cells. */
   struct cell **cells_out;
+  int *cells_out_type;
   struct pcell *pcells_out;
   int nr_cells_out, size_cells_out, size_pcells_out;
 
@@ -87,8 +98,8 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N);
 void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N);
 void proxy_parts_exch1(struct proxy *p);
 void proxy_parts_exch2(struct proxy *p);
-void proxy_addcell_in(struct proxy *p, struct cell *c);
-void proxy_addcell_out(struct proxy *p, struct cell *c);
+void proxy_addcell_in(struct proxy *p, struct cell *c, int type);
+void proxy_addcell_out(struct proxy *p, struct cell *c, int type);
 void proxy_cells_exch1(struct proxy *p);
 void proxy_cells_exch2(struct proxy *p);
 
diff --git a/src/runner.c b/src/runner.c
index 392bb261bf156441a6baafa8e795c59405eb0082..a8ea47e8ce449e716d5f454b4af4bd618dc4fcc9 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -139,7 +139,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_gravity(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -185,7 +185,7 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -304,7 +304,8 @@ void runner_check_sorts(struct cell *c, int flags) {
   if (flags & ~c->sorted) error("Inconsistent sort flags (downward)!");
   if (c->split)
     for (int k = 0; k < 8; k++)
-      if (c->progeny[k] != NULL) runner_check_sorts(c->progeny[k], c->sorted);
+      if (c->progeny[k] != NULL && c->progeny[k]->count > 0)
+        runner_check_sorts(c->progeny[k], c->sorted);
 #else
   error("Calling debugging code without debugging flag activated.");
 #endif
@@ -343,7 +344,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup,
 
   /* Check that the particles have been moved to the current time */
   if (flags && !cell_are_part_drifted(c, r->e))
-    error("Sorting un-drifted cell");
+    error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Make sure the sort flags are consistent (downward). */
@@ -376,7 +377,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup,
     float dx_max_sort = 0.0f;
     float dx_max_sort_old = 0.0f;
     for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
+      if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
         /* Only propagate cleanup if the progeny is stale. */
         runner_do_sort(r, c->progeny[k], flags,
                        cleanup && (c->progeny[k]->dx_max_sort >
@@ -550,10 +551,7 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) {
 #endif
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
-
-  /* Drift the multipole */
-  cell_drift_multipole(c, e);
+  if (!cell_is_active_gravity(c, e)) return;
 
   /* Reset the gravity acceleration tensors */
   gravity_field_tensors_init(&c->multipole->pot, e->ti_current);
@@ -587,7 +585,7 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -640,7 +638,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -833,34 +831,62 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
 }
 
 /**
- * @brief Unskip any tasks associated with active cells.
+ * @brief Unskip any hydro tasks associated with active cells.
  *
  * @param c The cell.
  * @param e The engine.
  */
-static void runner_do_unskip(struct cell *c, struct engine *e) {
+static void runner_do_unskip_hydro(struct cell *c, struct engine *e) {
 
   /* Ignore empty cells. */
-  if (c->count == 0 && c->gcount == 0) return;
+  if (c->count == 0) return;
 
   /* Skip inactive cells. */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   /* Recurse */
   if (c->split) {
     for (int k = 0; k < 8; k++) {
       if (c->progeny[k] != NULL) {
         struct cell *cp = c->progeny[k];
-        runner_do_unskip(cp, e);
+        runner_do_unskip_hydro(cp, e);
       }
     }
   }
 
   /* Unskip any active tasks. */
-  const int forcerebuild = cell_unskip_tasks(c, &e->sched);
+  const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched);
   if (forcerebuild) atomic_inc(&e->forcerebuild);
 }
 
+/**
+ * @brief Unskip any gravity tasks associated with active cells.
+ *
+ * @param c The cell.
+ * @param e The engine.
+ */
+static void runner_do_unskip_gravity(struct cell *c, struct engine *e) {
+
+  /* Ignore empty cells. */
+  if (c->gcount == 0) return;
+
+  /* Skip inactive cells. */
+  if (!cell_is_active_gravity(c, e)) return;
+
+  /* Recurse */
+  if (c->split) {
+    for (int k = 0; k < 8; k++) {
+      if (c->progeny[k] != NULL) {
+        struct cell *cp = c->progeny[k];
+        runner_do_unskip_gravity(cp, e);
+      }
+    }
+  }
+
+  /* Unskip any active tasks. */
+  cell_unskip_gravity_tasks(c, &e->sched);
+}
+
 /**
  * @brief Mapper function to unskip active tasks.
  *
@@ -877,7 +903,12 @@ void runner_do_unskip_mapper(void *map_data, int num_elements,
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &s->cells_top[local_cells[ind]];
-    if (c != NULL) runner_do_unskip(c, e);
+    if (c != NULL) {
+      if (e->policy & engine_policy_hydro) runner_do_unskip_hydro(c, e);
+      if (e->policy &
+          (engine_policy_self_gravity | engine_policy_external_gravity))
+        runner_do_unskip_gravity(c, e);
+    }
   }
 }
 /**
@@ -935,7 +966,7 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_starting(c, e)) return;
+  if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -1060,7 +1091,7 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -1194,7 +1225,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) {
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) {
     c->updated = 0;
     c->g_updated = 0;
     c->s_updated = 0;
@@ -1202,7 +1233,10 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
   }
 
   int updated = 0, g_updated = 0, s_updated = 0;
-  integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
 
   /* No children? */
   if (!c->split) {
@@ -1231,7 +1265,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
 
         /* Update particle */
         p->time_bin = get_time_bin(ti_new_step);
-        if (p->gpart != NULL) p->gpart->time_bin = get_time_bin(ti_new_step);
+        if (p->gpart != NULL) p->gpart->time_bin = p->time_bin;
 
         /* Tell the particle what the new physical time step is */
         float dt = get_timestep(p->time_bin, timeBase);
@@ -1242,11 +1276,23 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
         if (p->gpart != NULL) g_updated++;
 
         /* What is the next sync-point ? */
-        ti_end_min = min(ti_current + ti_new_step, ti_end_min);
-        ti_end_max = max(ti_current + ti_new_step, ti_end_max);
+        ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max);
 
         /* What is the next starting point for this cell ? */
-        ti_beg_max = max(ti_current, ti_beg_max);
+        ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max);
+
+        if (p->gpart != NULL) {
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
+        }
       }
 
       else { /* part is inactive */
@@ -1254,15 +1300,25 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
         const integertime_t ti_end =
             get_integer_time_end(ti_current, p->time_bin);
 
-        /* What is the next sync-point ? */
-        ti_end_min = min(ti_end, ti_end_min);
-        ti_end_max = max(ti_end, ti_end_max);
-
         const integertime_t ti_beg =
             get_integer_time_begin(ti_current + 1, p->time_bin);
 
+        /* What is the next sync-point ? */
+        ti_hydro_end_min = min(ti_end, ti_hydro_end_min);
+        ti_hydro_end_max = max(ti_end, ti_hydro_end_max);
+
         /* What is the next starting point for this cell ? */
-        ti_beg_max = max(ti_beg, ti_beg_max);
+        ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max);
+
+        if (p->gpart != NULL) {
+
+          /* What is the next sync-point ? */
+          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
+
+          /* What is the next starting point for this cell ? */
+          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
+        }
       }
     }
 
@@ -1297,11 +1353,13 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
           g_updated++;
 
           /* What is the next sync-point ? */
-          ti_end_min = min(ti_current + ti_new_step, ti_end_min);
-          ti_end_max = max(ti_current + ti_new_step, ti_end_max);
+          ti_gravity_end_min =
+              min(ti_current + ti_new_step, ti_gravity_end_min);
+          ti_gravity_end_max =
+              max(ti_current + ti_new_step, ti_gravity_end_max);
 
           /* What is the next starting point for this cell ? */
-          ti_beg_max = max(ti_current, ti_beg_max);
+          ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
 
         } else { /* gpart is inactive */
 
@@ -1309,14 +1367,14 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
               get_integer_time_end(ti_current, gp->time_bin);
 
           /* What is the next sync-point ? */
-          ti_end_min = min(ti_end, ti_end_min);
-          ti_end_max = max(ti_end, ti_end_max);
+          ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+          ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
 
           const integertime_t ti_beg =
               get_integer_time_begin(ti_current + 1, gp->time_bin);
 
           /* What is the next starting point for this cell ? */
-          ti_beg_max = max(ti_beg, ti_beg_max);
+          ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
         }
       }
     }
@@ -1350,11 +1408,11 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
         g_updated++;
 
         /* What is the next sync-point ? */
-        ti_end_min = min(ti_current + ti_new_step, ti_end_min);
-        ti_end_max = max(ti_current + ti_new_step, ti_end_max);
+        ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min);
+        ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max);
 
         /* What is the next starting point for this cell ? */
-        ti_beg_max = max(ti_current, ti_beg_max);
+        ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max);
 
       } else { /* star particle is inactive */
 
@@ -1362,14 +1420,14 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
             get_integer_time_end(ti_current, sp->time_bin);
 
         /* What is the next sync-point ? */
-        ti_end_min = min(ti_end, ti_end_min);
-        ti_end_max = max(ti_end, ti_end_max);
+        ti_gravity_end_min = min(ti_end, ti_gravity_end_min);
+        ti_gravity_end_max = max(ti_end, ti_gravity_end_max);
 
         const integertime_t ti_beg =
             get_integer_time_begin(ti_current + 1, sp->time_bin);
 
         /* What is the next starting point for this cell ? */
-        ti_beg_max = max(ti_beg, ti_beg_max);
+        ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max);
       }
     }
   } else {
@@ -1386,9 +1444,12 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
         updated += cp->updated;
         g_updated += cp->g_updated;
         s_updated += cp->s_updated;
-        ti_end_min = min(cp->ti_end_min, ti_end_min);
-        ti_end_max = max(cp->ti_end_max, ti_end_max);
-        ti_beg_max = max(cp->ti_beg_max, ti_beg_max);
+        ti_hydro_end_min = min(cp->ti_hydro_end_min, ti_hydro_end_min);
+        ti_hydro_end_max = max(cp->ti_hydro_end_max, ti_hydro_end_max);
+        ti_hydro_beg_max = max(cp->ti_hydro_beg_max, ti_hydro_beg_max);
+        ti_gravity_end_min = min(cp->ti_gravity_end_min, ti_gravity_end_min);
+        ti_gravity_end_max = max(cp->ti_gravity_end_max, ti_gravity_end_max);
+        ti_gravity_beg_max = max(cp->ti_gravity_beg_max, ti_gravity_beg_max);
       }
   }
 
@@ -1396,9 +1457,12 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) {
   c->updated = updated;
   c->g_updated = g_updated;
   c->s_updated = s_updated;
-  c->ti_end_min = ti_end_min;
-  c->ti_end_max = ti_end_max;
-  c->ti_beg_max = ti_beg_max;
+  c->ti_hydro_end_min = ti_hydro_end_min;
+  c->ti_hydro_end_max = ti_hydro_end_max;
+  c->ti_hydro_beg_max = ti_hydro_beg_max;
+  c->ti_gravity_end_min = ti_gravity_end_min;
+  c->ti_gravity_end_max = ti_gravity_end_max;
+  c->ti_gravity_beg_max = ti_gravity_beg_max;
 
   if (timer) TIMER_TOC(timer_timestep);
 }
@@ -1425,7 +1489,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return;
 
   /* Recurse? */
   if (c->split) {
@@ -1481,14 +1545,14 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) {
 
           /* Check that this gpart has interacted with all the other
            * particles (via direct or multipoles) in the box */
-          if (gp->num_interacted != (long long)e->s->nr_gparts)
+          if (gp->num_interacted != e->total_nr_gparts)
             error(
                 "g-particle (id=%lld, type=%s) did not interact "
                 "gravitationally "
                 "with all other gparts gp->num_interacted=%lld, "
-                "total_gparts=%zd",
+                "total_gparts=%lld (local num_gparts=%zd)",
                 gp->id_or_neg_offset, part_type_names[gp->type],
-                gp->num_interacted, e->s->nr_gparts);
+                gp->num_interacted, e->total_nr_gparts, e->s->nr_gparts);
         }
 #endif
       }
@@ -1529,8 +1593,8 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
 
   TIMER_TIC;
 
-  integertime_t ti_end_min = max_nr_timesteps;
-  integertime_t ti_end_max = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps;
+  integertime_t ti_hydro_end_max = 0;
   timebin_t time_bin_min = num_time_bins;
   timebin_t time_bin_max = 0;
   float h_max = 0.f;
@@ -1554,33 +1618,35 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts,
     }
 
     /* Convert into a time */
-    ti_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_end_max = get_integer_time_end(ti_current, time_bin_max);
+    ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max);
   }
 
   /* Otherwise, recurse and collect. */
   else {
     for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
+      if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
         runner_do_recv_part(r, c->progeny[k], clear_sorts, 0);
-        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
-        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+        ti_hydro_end_min =
+            min(ti_hydro_end_min, c->progeny[k]->ti_hydro_end_min);
+        ti_hydro_end_max =
+            max(ti_hydro_end_max, c->progeny[k]->ti_hydro_end_max);
         h_max = max(h_max, c->progeny[k]->h_max);
       }
     }
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (ti_end_min < ti_current)
+  if (ti_hydro_end_min < ti_current)
     error(
         "Received a cell at an incorrect time c->ti_end_min=%lld, "
         "e->ti_current=%lld.",
-        ti_end_min, ti_current);
+        ti_hydro_end_min, ti_current);
 #endif
 
   /* ... and store. */
-  c->ti_end_min = ti_end_min;
-  c->ti_end_max = ti_end_max;
+  // c->ti_hydro_end_min = ti_hydro_end_min;
+  // c->ti_hydro_end_max = ti_hydro_end_max;
   c->ti_old_part = ti_current;
   c->h_max = h_max;
 
@@ -1608,8 +1674,8 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
 
   TIMER_TIC;
 
-  integertime_t ti_end_min = max_nr_timesteps;
-  integertime_t ti_end_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps;
+  integertime_t ti_gravity_end_max = 0;
   timebin_t time_bin_min = num_time_bins;
   timebin_t time_bin_max = 0;
 
@@ -1633,32 +1699,34 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
     }
 
     /* Convert into a time */
-    ti_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_end_max = get_integer_time_end(ti_current, time_bin_max);
+    ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max);
   }
 
   /* Otherwise, recurse and collect. */
   else {
     for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
+      if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) {
         runner_do_recv_gpart(r, c->progeny[k], 0);
-        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
-        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+        ti_gravity_end_min =
+            min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min);
+        ti_gravity_end_max =
+            max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max);
       }
     }
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (ti_end_min < ti_current)
+  if (ti_gravity_end_min < ti_current)
     error(
         "Received a cell at an incorrect time c->ti_end_min=%lld, "
         "e->ti_current=%lld.",
-        ti_end_min, ti_current);
+        ti_gravity_end_min, ti_current);
 #endif
 
   /* ... and store. */
-  c->ti_end_min = ti_end_min;
-  c->ti_end_max = ti_end_max;
+  // c->ti_gravity_end_min = ti_gravity_end_min;
+  // c->ti_gravity_end_max = ti_gravity_end_max;
   c->ti_old_gpart = ti_current;
 
   if (timer) TIMER_TOC(timer_dorecv_gpart);
@@ -1685,8 +1753,8 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) {
 
   TIMER_TIC;
 
-  integertime_t ti_end_min = max_nr_timesteps;
-  integertime_t ti_end_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps;
+  integertime_t ti_gravity_end_max = 0;
   timebin_t time_bin_min = num_time_bins;
   timebin_t time_bin_max = 0;
 
@@ -1710,32 +1778,34 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) {
     }
 
     /* Convert into a time */
-    ti_end_min = get_integer_time_end(ti_current, time_bin_min);
-    ti_end_max = get_integer_time_end(ti_current, time_bin_max);
+    ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min);
+    ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max);
   }
 
   /* Otherwise, recurse and collect. */
   else {
     for (int k = 0; k < 8; k++) {
-      if (c->progeny[k] != NULL) {
+      if (c->progeny[k] != NULL && c->progeny[k]->scount > 0) {
         runner_do_recv_spart(r, c->progeny[k], 0);
-        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
-        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
+        ti_gravity_end_min =
+            min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min);
+        ti_gravity_end_max =
+            max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max);
       }
     }
   }
 
 #ifdef SWIFT_DEBUG_CHECKS
-  if (ti_end_min < ti_current)
+  if (ti_gravity_end_min < ti_current)
     error(
         "Received a cell at an incorrect time c->ti_end_min=%lld, "
         "e->ti_current=%lld.",
-        ti_end_min, ti_current);
+        ti_gravity_end_min, ti_current);
 #endif
 
   /* ... and store. */
-  c->ti_end_min = ti_end_min;
-  c->ti_end_max = ti_end_max;
+  c->ti_gravity_end_min = ti_gravity_end_min;
+  c->ti_gravity_end_max = ti_gravity_end_max;
   c->ti_old_gpart = ti_current;
 
   if (timer) TIMER_TOC(timer_dorecv_spart);
@@ -1804,54 +1874,62 @@ void *runner_main(void *data) {
 /* Check that we haven't scheduled an inactive task */
 #ifdef SWIFT_DEBUG_CHECKS
       t->ti_run = e->ti_current;
-#ifndef WITH_MPI
-      if (t->type == task_type_grav_top_level) {
-        if (ci != NULL || cj != NULL)
-          error("Top-level gravity task associated with a cell");
-      } else if (ci == NULL && cj == NULL) {
-
-        error("Task not associated with cells!");
-      } else if (cj == NULL) { /* self */
-
-        if (!cell_is_active(ci, e) && t->type != task_type_sort &&
-            t->type != task_type_send && t->type != task_type_recv &&
-            t->type != task_type_kick1 && t->type != task_type_drift_part &&
-            t->type != task_type_drift_gpart)
-          error(
-              "Task (type='%s/%s') should have been skipped ti_current=%lld "
-              "c->ti_end_min=%lld",
-              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
-              ci->ti_end_min);
-
-        /* Special case for sorts */
-        if (!cell_is_active(ci, e) && t->type == task_type_sort &&
-            !(ci->do_sort || ci->do_sub_sort))
-          error(
-              "Task (type='%s/%s') should have been skipped ti_current=%lld "
-              "c->ti_end_min=%lld t->flags=%d",
-              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
-              ci->ti_end_min, t->flags);
-
-        /* Special case for kick1 */
-        if (!cell_is_starting(ci, e) && t->type == task_type_kick1 &&
-            t->flags == 0)
-          error(
-              "Task (type='%s/%s') should have been skipped ti_current=%lld "
-              "c->ti_end_min=%lld t->flags=%d",
-              taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current,
-              ci->ti_end_min, t->flags);
-
-      } else { /* pair */
-        if (!cell_is_active(ci, e) && !cell_is_active(cj, e))
-
-          if (t->type != task_type_send && t->type != task_type_recv)
-            error(
-                "Task (type='%s/%s') should have been skipped ti_current=%lld "
-                "ci->ti_end_min=%lld cj->ti_end_min=%lld",
-                taskID_names[t->type], subtaskID_names[t->subtype],
-                e->ti_current, ci->ti_end_min, cj->ti_end_min);
-      }
-#endif
+/* #ifndef WITH_MPI */
+/*       if (t->type == task_type_grav_top_level) { */
+/*         if (ci != NULL || cj != NULL) */
+/*           error("Top-level gravity task associated with a cell"); */
+/*       } else if (ci == NULL && cj == NULL) { */
+
+/*         error("Task not associated with cells!"); */
+/*       } else if (cj == NULL) { /\* self *\/ */
+
+/*         if (!cell_is_active_hydro(ci, e) && t->type != task_type_sort && */
+/*             t->type != task_type_send && t->type != task_type_recv && */
+/*             t->type != task_type_kick1 && t->type != task_type_drift_part &&
+ */
+/*             t->type != task_type_drift_gpart) */
+/*           error( */
+/*               "Task (type='%s/%s') should have been skipped ti_current=%lld "
+ */
+/*               "c->ti_end_min=%lld", */
+/*               taskID_names[t->type], subtaskID_names[t->subtype],
+ * e->ti_current, */
+/*               ci->ti_end_min); */
+
+/*         /\* Special case for sorts *\/ */
+/*         if (!cell_is_active_hydro(ci, e) && t->type == task_type_sort && */
+/*             !(ci->do_sort || ci->do_sub_sort)) */
+/*           error( */
+/*               "Task (type='%s/%s') should have been skipped ti_current=%lld "
+ */
+/*               "c->ti_end_min=%lld t->flags=%d", */
+/*               taskID_names[t->type], subtaskID_names[t->subtype],
+ * e->ti_current, */
+/*               ci->ti_end_min, t->flags); */
+
+/*         /\* Special case for kick1 *\/ */
+/*         if (!cell_is_starting(ci, e) && t->type == task_type_kick1 && */
+/*             t->flags == 0) */
+/*           error( */
+/*               "Task (type='%s/%s') should have been skipped ti_current=%lld "
+ */
+/*               "c->ti_end_min=%lld t->flags=%d", */
+/*               taskID_names[t->type], subtaskID_names[t->subtype],
+ * e->ti_current, */
+/*               ci->ti_end_min, t->flags); */
+
+/*       } else { /\* pair *\/ */
+/*         if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) */
+
+/*           if (t->type != task_type_send && t->type != task_type_recv) */
+/*             error( */
+/*                 "Task (type='%s/%s') should have been skipped ti_current=%lld
+ * " */
+/*                 "ci->ti_end_min=%lld cj->ti_end_min=%lld", */
+/*                 taskID_names[t->type], subtaskID_names[t->subtype], */
+/*                 e->ti_current, ci->ti_end_min, cj->ti_end_min); */
+/*       } */
+/* #endif */
 #endif
 
       /* Different types of tasks... */
@@ -1970,7 +2048,8 @@ void *runner_main(void *data) {
           } else if (t->subtype == task_subtype_spart) {
             runner_do_recv_spart(r, ci, 1);
           } else if (t->subtype == task_subtype_multipole) {
-            ci->ti_old_multipole = e->ti_current;
+            cell_unpack_multipoles(ci, t->buff);
+            free(t->buff);
           } else {
             error("Unknown/invalid task subtype (%d).", t->subtype);
           }
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 6181706103d1994fd3d59f3326a8193cca6f6a10..09b6a83ec29dd4f57b8a850ee77a2e5692e1c4ec 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -136,7 +136,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
 
   const int count_i = ci->count;
   const int count_j = cj->count;
@@ -224,7 +224,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
 
   const int count_i = ci->count;
   const int count_j = cj->count;
@@ -314,7 +314,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   const int count = c->count;
   struct part *restrict parts = c->parts;
@@ -392,7 +392,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   const int count = c->count;
   struct part *restrict parts = c->parts;
@@ -802,7 +802,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
   const double dj_min = sort_j[0].d;
   const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
 
-  if (cell_is_active(ci, e)) {
+  if (cell_is_active_hydro(ci, e)) {
 
     /* Loop over the parts in ci. */
     for (int pid = count_i - 1;
@@ -882,7 +882,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
     }   /* loop over the parts in ci. */
   }     /* Cell ci is active */
 
-  if (cell_is_active(cj, e)) {
+  if (cell_is_active_hydro(cj, e)) {
 
     /* Loop over the parts in cj. */
     for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
@@ -979,7 +979,7 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
   const struct engine *restrict e = r->e;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
 
   /* Check that cells are drifted. */
   if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
@@ -1103,11 +1103,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
   int count_active_i = 0, count_active_j = 0;
   struct entry *restrict sort_active_i = NULL, *restrict sort_active_j = NULL;
 
-  if (cell_is_all_active(ci, e)) {
+  if (cell_is_all_active_hydro(ci, e)) {
     /* If everybody is active don't bother copying */
     sort_active_i = sort_i;
     count_active_i = count_i;
-  } else if (cell_is_active(ci, e)) {
+  } else if (cell_is_active_hydro(ci, e)) {
     if (posix_memalign((void *)&sort_active_i, SWIFT_CACHE_ALIGNMENT,
                        sizeof(struct entry) * count_i) != 0)
       error("Failed to allocate active sortlists.");
@@ -1121,11 +1121,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
     }
   }
 
-  if (cell_is_all_active(cj, e)) {
+  if (cell_is_all_active_hydro(cj, e)) {
     /* If everybody is active don't bother copying */
     sort_active_j = sort_j;
     count_active_j = count_j;
-  } else if (cell_is_active(cj, e)) {
+  } else if (cell_is_active_hydro(cj, e)) {
     if (posix_memalign((void *)&sort_active_j, SWIFT_CACHE_ALIGNMENT,
                        sizeof(struct entry) * count_j) != 0)
       error("Failed to allocate active sortlists.");
@@ -1442,8 +1442,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
   }     /* Loop over all cj */
 
   /* Clean-up if necessary */
-  if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sort_active_i);
-  if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sort_active_j);
+  if (cell_is_active_hydro(ci, e) && !cell_is_all_active_hydro(ci, e))
+    free(sort_active_i);
+  if (cell_is_active_hydro(cj, e) && !cell_is_all_active_hydro(cj, e))
+    free(sort_active_j);
 
   TIMER_TOC(TIMER_DOPAIR);
 }
@@ -1462,7 +1464,7 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
   const struct engine *restrict e = r->e;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
 
   /* Check that cells are drifted. */
   if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
@@ -1670,7 +1672,11 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) {
   const struct engine *restrict e = r->e;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Did we mess up the recursion? */
+  if (c->h_max_old * kernel_gamma > c->dmin)
+    error("Cell smaller than smoothing length");
 
   /* Check that cells are drifted. */
   if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
@@ -1817,7 +1823,11 @@ void DOSELF2_BRANCH(struct runner *r, struct cell *c) {
   const struct engine *restrict e = r->e;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
+
+  /* Did we mess up the recursion? */
+  if (c->h_max_old * kernel_gamma > c->dmin)
+    error("Cell smaller than smoothing length");
 
   /* Check that cells are drifted. */
   if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
@@ -1852,7 +1862,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   TIMER_TIC;
 
   /* Should we even bother? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
   if (ci->count == 0 || cj->count == 0) return;
 
   /* Get the type of pair if not specified explicitly. */
@@ -2061,7 +2071,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   }
 
   /* Otherwise, compute the pair directly. */
-  else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
+  else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) {
 
     /* Make sure both cells are drifted to the current timestep. */
     if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
@@ -2094,7 +2104,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
   TIMER_TIC;
 
   /* Should we even bother? */
-  if (ci->count == 0 || !cell_is_active(ci, r->e)) return;
+  if (ci->count == 0 || !cell_is_active_hydro(ci, r->e)) return;
 
   /* Recurse? */
   if (cell_can_recurse_in_self_task(ci)) {
@@ -2142,7 +2152,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   TIMER_TIC;
 
   /* Should we even bother? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
   if (ci->count == 0 || cj->count == 0) return;
 
   /* Get the type of pair if not specified explicitly. */
@@ -2351,7 +2361,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
   }
 
   /* Otherwise, compute the pair directly. */
-  else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
+  else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) {
 
     /* Make sure both cells are drifted to the current timestep. */
     if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
@@ -2384,7 +2394,7 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) {
   TIMER_TIC;
 
   /* Should we even bother? */
-  if (ci->count == 0 || !cell_is_active(ci, r->e)) return;
+  if (ci->count == 0 || !cell_is_active_hydro(ci, r->e)) return;
 
   /* Recurse? */
   if (cell_can_recurse_in_self_task(ci)) {
@@ -2416,7 +2426,9 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
   TIMER_TIC;
 
   /* Should we even bother? */
-  if (!cell_is_active(ci, e) && (cj == NULL || !cell_is_active(cj, e))) return;
+  if (!cell_is_active_hydro(ci, e) &&
+      (cj == NULL || !cell_is_active_hydro(cj, e)))
+    return;
   if (ci->count == 0 || (cj != NULL && cj->count == 0)) return;
 
   /* Find out in which sub-cell of ci the parts are. */
@@ -2970,7 +2982,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
     }
 
     /* Otherwise, compute the pair directly. */
-    else if (cell_is_active(ci, e) || cell_is_active(cj, e)) {
+    else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) {
 
       /* Do any of the cells need to be drifted first? */
       if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!");
diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c
index a8a882e4e765c2679b02347c96c2c7f1f49339f7..945bbb9d892ebe0768cc5cf7b466dcd0192aefed 100644
--- a/src/runner_doiact_fft.c
+++ b/src/runner_doiact_fft.c
@@ -57,23 +57,30 @@ __attribute__((always_inline)) INLINE static int row_major_id(int i, int j,
  * @param rho The density mesh.
  * @param N the size of the mesh along one axis.
  * @param fac The width of a mesh cell.
+ * @param dim The dimensions of the simulation box.
  */
 __attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC(
-    const struct gravity_tensors* m, double* rho, int N, double fac) {
+    const struct gravity_tensors* m, double* rho, int N, double fac,
+    const double dim[3]) {
 
-  int i = (int)(fac * m->CoM[0]);
+  /* Box wrap the multipole's position */
+  const double CoM_x = box_wrap(m->CoM[0], 0., dim[0]);
+  const double CoM_y = box_wrap(m->CoM[1], 0., dim[1]);
+  const double CoM_z = box_wrap(m->CoM[2], 0., dim[2]);
+
+  int i = (int)(fac * CoM_x);
   if (i >= N) i = N - 1;
-  const double dx = fac * m->CoM[0] - i;
+  const double dx = fac * CoM_x - i;
   const double tx = 1. - dx;
 
-  int j = (int)(fac * m->CoM[1]);
+  int j = (int)(fac * CoM_y);
   if (j >= N) j = N - 1;
-  const double dy = fac * m->CoM[1] - j;
+  const double dy = fac * CoM_y - j;
   const double ty = 1. - dy;
 
-  int k = (int)(fac * m->CoM[2]);
+  int k = (int)(fac * CoM_z);
   if (k >= N) k = N - 1;
-  const double dz = fac * m->CoM[2] - k;
+  const double dz = fac * CoM_z - k;
   const double tz = 1. - dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -101,23 +108,30 @@ __attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC(
  * @param pot The potential mesh.
  * @param N the size of the mesh along one axis.
  * @param fac width of a mesh cell.
+ * @param dim The dimensions of the simulation box.
  */
 __attribute__((always_inline)) INLINE static void mesh_to_multipole_CIC(
-    struct gravity_tensors* m, double* pot, int N, double fac) {
+    struct gravity_tensors* m, const double* pot, int N, double fac,
+    const double dim[3]) {
+
+  /* Box wrap the multipole's position */
+  const double CoM_x = box_wrap(m->CoM[0], 0., dim[0]);
+  const double CoM_y = box_wrap(m->CoM[1], 0., dim[1]);
+  const double CoM_z = box_wrap(m->CoM[2], 0., dim[2]);
 
-  int i = (int)(fac * m->CoM[0]);
+  int i = (int)(fac * CoM_x);
   if (i >= N) i = N - 1;
-  const double dx = fac * m->CoM[0] - i;
+  const double dx = fac * CoM_x - i;
   const double tx = 1. - dx;
 
-  int j = (int)(fac * m->CoM[1]);
+  int j = (int)(fac * CoM_y);
   if (j >= N) j = N - 1;
-  const double dy = fac * m->CoM[1] - j;
+  const double dy = fac * CoM_y - j;
   const double ty = 1. - dy;
 
-  int k = (int)(fac * m->CoM[2]);
+  int k = (int)(fac * CoM_z);
   if (k >= N) k = N - 1;
-  const double dz = fac * m->CoM[2] - k;
+  const double dz = fac * CoM_z - k;
   const double tz = 1. - dz;
 
 #ifdef SWIFT_DEBUG_CHECKS
@@ -155,6 +169,7 @@ void runner_do_grav_fft(struct runner* r, int timer) {
   const double a_smooth = e->gravity_properties->a_smooth;
   const double box_size = s->dim[0];
   const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
 
   TIMER_TIC;
 
@@ -195,7 +210,7 @@ void runner_do_grav_fft(struct runner* r, int timer) {
   /* Do a CIC mesh assignment of the multipoles */
   bzero(rho, N * N * N * sizeof(double));
   for (int i = 0; i < nr_cells; ++i)
-    multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac);
+    multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac, dim);
 
   /* Fourier transform to go to magic-land */
   fftw_execute(forward_plan);
@@ -272,7 +287,7 @@ void runner_do_grav_fft(struct runner* r, int timer) {
 
   /* Get the potential from the mesh using CIC */
   for (int i = 0; i < nr_cells; ++i)
-    mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac);
+    mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac, dim);
 
   /* Clean-up the mess */
   fftw_destroy_plan(forward_plan);
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 5d4de609eb05511b579c44b117231f541e566a2c..384e7fa8429f6675846e10646fb873e8377b070a 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -58,7 +58,7 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
       struct cell *cp = c->progeny[k];
 
       /* Do we have a progenitor with any active g-particles ? */
-      if (cp != NULL && cell_is_active(cp, e)) {
+      if (cp != NULL && cell_is_active_gravity(cp, e)) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         if (cp->ti_old_multipole != e->ti_current)
@@ -140,7 +140,7 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e)) return;
+  if (!cell_is_active_gravity(ci, e) || ci->nodeID != engine_rank) return;
 
   /* Short-cut to the multipole */
   const struct multipole *multi_j = &cj->multipole->m_pole;
@@ -148,7 +148,8 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
 #ifdef SWIFT_DEBUG_CHECKS
   if (ci == cj) error("Interacting a cell with itself using M2L");
 
-  if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set.");
+  if (multi_j->num_gpart == 0)
+    error("Multipole does not seem to have been set.");
 
   if (ci->multipole->pot.ti_init != e->ti_current)
     error("ci->grav tensor not initialised.");
@@ -435,7 +436,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+  if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) return;
 
   /* Check that we are not doing something stupid */
   if (ci->split || cj->split) error("Running P-P on splitable cells");
@@ -464,8 +465,8 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
   space_getsid(s, &ci, &cj, cell_shift);
 
   /* Record activity status */
-  const int ci_active = cell_is_active(ci, e);
-  const int cj_active = cell_is_active(cj, e);
+  const int ci_active = cell_is_active_gravity(ci, e);
+  const int cj_active = cell_is_active_gravity(cj, e);
 
   /* Do we need to drift the multipoles ? */
   if (cj_active && ci->ti_old_multipole != e->ti_current)
@@ -638,7 +639,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
   /* Cell properties */
   const int gcount = c->gcount;
   struct gpart *restrict gparts = c->gparts;
-  const int c_active = cell_is_active(c, e);
+  const int c_active = cell_is_active_gravity(c, e);
   const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                          c->loc[1] + 0.5 * c->width[1],
                          c->loc[2] + 0.5 * c->width[2]};
@@ -764,7 +765,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
   /* Cell properties */
   const int gcount = c->gcount;
   struct gpart *restrict gparts = c->gparts;
-  const int c_active = cell_is_active(c, e);
+  const int c_active = cell_is_active_gravity(c, e);
   const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                          c->loc[1] + 0.5 * c->width[1],
                          c->loc[2] + 0.5 * c->width[2]};
@@ -891,7 +892,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) {
 #endif
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_gravity(c, e)) return;
 
   /* Check that we are not doing something stupid */
   if (c->split) error("Running P-P on a splitable cell");
@@ -934,6 +935,7 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
   /* Some constants */
   const struct engine *e = r->e;
   const struct space *s = e->s;
+  const int nodeID = e->nodeID;
   const int periodic = s->periodic;
   const double cell_width = s->width[0];
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
@@ -942,6 +944,11 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
   const double max_distance = props->a_smooth * props->r_cut_max * cell_width;
   const double max_distance2 = max_distance * max_distance;
 
+  /* Anything to do here? */
+  if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) ||
+        (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID)))
+    return;
+
 #ifdef SWIFT_DEBUG_CHECKS
 
   const int gcount_i = ci->gcount;
@@ -954,17 +961,14 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
   /* Sanity check */
   if (ci == cj) error("Pair interaction between a cell and itself.");
 
-  if (cell_is_active(ci, e) && ci->ti_old_multipole != e->ti_current)
+  if (cell_is_active_gravity(ci, e) && ci->ti_old_multipole != e->ti_current)
     error("ci->multipole not drifted.");
-  if (cell_is_active(cj, e) && cj->ti_old_multipole != e->ti_current)
+  if (cell_is_active_gravity(cj, e) && cj->ti_old_multipole != e->ti_current)
     error("cj->multipole not drifted.");
 #endif
 
   TIMER_TIC;
 
-  /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
-
   /* Recover the multipole information */
   struct gravity_tensors *const multi_i = ci->multipole;
   struct gravity_tensors *const multi_j = cj->multipole;
@@ -987,9 +991,9 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
 
 #ifdef SWIFT_DEBUG_CHECKS
     /* Need to account for the interactions we missed */
-    if (cell_is_active(ci, e))
+    if (cell_is_active_gravity(ci, e))
       multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
-    if (cell_is_active(cj, e))
+    if (cell_is_active_gravity(cj, e))
       multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
 #endif
     return;
@@ -1090,7 +1094,7 @@ void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) {
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_gravity(c, e)) return;
 
   /* If the cell is split, interact each progeny with itself, and with
      each of its siblings. */
@@ -1144,11 +1148,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
   TIMER_TIC;
 
   /* Recover the list of top-level cells */
-  struct cell *cells = e->s->cells_top;
-  const int nr_cells = e->s->nr_cells;
+  struct cell *cells = s->cells_top;
+  const int nr_cells = s->nr_cells;
 
   /* Anything to do here? */
-  if (!cell_is_active(ci, e)) return;
+  if (!cell_is_active_gravity(ci, e)) return;
+
+  if (ci->nodeID != engine_rank)
+    error("Non-local cell in long-range gravity task!");
 
   /* Check multipole has been drifted */
   if (ci->ti_old_multipole != e->ti_current)
@@ -1163,14 +1170,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
 
   /* Loop over all the top-level cells and go for a M-M interaction if
    * well-separated */
-  for (int i = 0; i < nr_cells; ++i) {
+  for (int n = 0; n < nr_cells; ++n) {
 
     /* Handle on the top-level cell and it's gravity business*/
-    struct cell *cj = &cells[i];
+    struct cell *cj = &cells[n];
     const struct gravity_tensors *const multi_j = cj->multipole;
 
-    /* Avoid stupid cases */
-    if (ci == cj || cj->gcount == 0) continue;
+    /* Avoid self contributions */
+    if (ci == cj) continue;
 
     /* Get the distance between the CoMs at the last rebuild*/
     double dx_r = CoM_rebuild_i[0] - multi_j->CoM_rebuild[0];
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index faf782894898b5784bdeb05669170c08106dc64d..05ad1ba7c1d4f171e993b870f11acbbe8707a3ef 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -535,7 +535,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
 
@@ -992,7 +992,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
 
   TIMER_TIC;
 
-  if (!cell_is_active(c, e)) return;
+  if (!cell_is_active_hydro(c, e)) return;
 
   if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell.");
 
@@ -1200,8 +1200,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
   const double di_max = sort_i[count_i - 1].d - rshift;
   const double dj_min = sort_j[0].d;
   const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
-  const int active_ci = cell_is_active(ci, e);
-  const int active_cj = cell_is_active(cj, e);
+  const int active_ci = cell_is_active_hydro(ci, e);
+  const int active_cj = cell_is_active_hydro(cj, e);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that particles have been drifted to the current time */
@@ -1574,8 +1574,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
   const double di_max = sort_i[count_i - 1].d - rshift;
   const double dj_min = sort_j[0].d;
   const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
-  const int active_ci = cell_is_active(ci, e);
-  const int active_cj = cell_is_active(cj, e);
+  const int active_ci = cell_is_active_hydro(ci, e);
+  const int active_cj = cell_is_active_hydro(cj, e);
 
 #ifdef SWIFT_DEBUG_CHECKS
   /* Check that particles have been drifted to the current time */
diff --git a/src/scheduler.c b/src/scheduler.c
index 5af9e9c9e19248838bb98c3ee2305d7dc54324f0..5beab065ffe0c135b2cbb247c531728c5238b865 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -150,7 +150,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) {
 
   /* Write header */
   fprintf(f, "digraph task_dep {\n");
-  fprintf(f, "label=\"Task dependencies for SWIFT %s\"", git_revision());
+  fprintf(f, "label=\"Task dependencies for SWIFT %s\";\n", git_revision());
   fprintf(f, "\t compound=true;\n");
   fprintf(f, "\t ratio=0.66;\n");
   fprintf(f, "\t node[nodesep=0.15];\n");
@@ -217,10 +217,82 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) {
 
         /* Write to the ffile */
         fprintf(f, "\t %s->%s;\n", ta_name, tb_name);
+
+        /* Change colour of implicit tasks */
+        if (ta->implicit)
+          fprintf(f, "\t %s [style = filled];\n\t %s [color = lightgrey];\n",
+                  ta_name, ta_name);
+        if (tb->implicit)
+          fprintf(f, "\t %s [style = filled];\n\t %s [color = lightgrey];\n",
+                  tb_name, tb_name);
       }
     }
   }
 
+  int density_cluster[4] = {0};
+  int force_cluster[4] = {0};
+  int gravity_cluster[4] = {0};
+
+  /* Modify the style of some tasks on the plot */
+  for (int type = 0; type < task_type_count; ++type) {
+
+    for (int subtype = 0; subtype < task_subtype_count; ++subtype) {
+
+      const int ind = 2 * (type * task_subtype_count + subtype) * max_nber_dep;
+
+      /* Does this task/sub-task exist? */
+      if (table[ind] != -1) {
+
+        /* Make MPI tasks a different shape */
+        if (type == task_type_send || type == task_type_recv)
+          fprintf(f, "\t \"%s %s\" [shape = diamond];\n", taskID_names[type],
+                  subtaskID_names[subtype]);
+
+        for (int k = 0; k < 4; ++k) {
+          if (type == task_type_self + k && subtype == task_subtype_density)
+            density_cluster[k] = 1;
+          if (type == task_type_self + k && subtype == task_subtype_force)
+            force_cluster[k] = 1;
+          if (type == task_type_self + k && subtype == task_subtype_grav)
+            gravity_cluster[k] = 1;
+        }
+        if (type == task_type_grav_top_level) gravity_cluster[2] = 1;
+        if (type == task_type_grav_long_range) gravity_cluster[3] = 1;
+      }
+    }
+  }
+
+  /* Make a cluster for the density tasks */
+  fprintf(f, "\t subgraph cluster0{\n");
+  fprintf(f, "\t\t label=\"\";\n");
+  for (int k = 0; k < 4; ++k)
+    if (density_cluster[k])
+      fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k],
+              subtaskID_names[task_subtype_density]);
+  fprintf(f, "\t};\n");
+
+  /* Make a cluster for the force tasks */
+  fprintf(f, "\t subgraph cluster1{\n");
+  fprintf(f, "\t\t label=\"\";\n");
+  for (int k = 0; k < 4; ++k)
+    if (force_cluster[k])
+      fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k],
+              subtaskID_names[task_subtype_force]);
+  fprintf(f, "\t};\n");
+
+  /* Make a cluster for the gravity tasks */
+  fprintf(f, "\t subgraph cluster2{\n");
+  fprintf(f, "\t\t label=\"\";\n");
+  for (int k = 0; k < 2; ++k)
+    if (gravity_cluster[k])
+      fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k],
+              subtaskID_names[task_subtype_grav]);
+  if (gravity_cluster[2])
+    fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_top_level]);
+  if (gravity_cluster[3])
+    fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_long_range]);
+  fprintf(f, "\t};\n");
+
   /* Be clean */
   fprintf(f, "}");
   fclose(f);
@@ -814,7 +886,8 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements,
     } else if (t->subtype == task_subtype_grav) {
       scheduler_splittask_gravity(t, s);
     } else if (t->type == task_type_grav_top_level ||
-               t->type == task_type_grav_ghost) {
+               t->type == task_type_grav_ghost_in ||
+               t->type == task_type_grav_ghost_out) {
       /* For future use */
     } else {
       error("Unexpected task sub-type");
@@ -1113,15 +1186,32 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
         cost = wscale * intrinsics_popcount(t->flags) * t->ci->count *
                (sizeof(int) * 8 - intrinsics_clz(t->ci->count));
         break;
+
       case task_type_self:
-        cost = 1 * wscale * t->ci->count * t->ci->count;
+        if (t->subtype == task_subtype_grav)
+          cost = 1 * wscale * t->ci->gcount * t->ci->gcount;
+        else if (t->subtype == task_subtype_external_grav)
+          cost = 1 * wscale * t->ci->gcount;
+        else
+          cost = 1 * wscale * t->ci->count * t->ci->count;
         break;
+
       case task_type_pair:
-        if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
-          cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
-        else
-          cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
+        if (t->subtype == task_subtype_grav) {
+          if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
+            cost = 3 * wscale * t->ci->gcount * t->cj->gcount;
+          else
+            cost = 2 * wscale * t->ci->gcount * t->cj->gcount;
+        } else {
+          if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
+            cost =
+                3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
+          else
+            cost =
+                2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
+        }
         break;
+
       case task_type_sub_pair:
         if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
           if (t->flags < 0)
@@ -1137,11 +1227,15 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
                 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
         }
         break;
+
       case task_type_sub_self:
         cost = 1 * wscale * t->ci->count * t->ci->count;
         break;
       case task_type_ghost:
-        if (t->ci == t->ci->super) cost = wscale * t->ci->count;
+        if (t->ci == t->ci->super_hydro) cost = wscale * t->ci->count;
+        break;
+      case task_type_extra_ghost:
+        if (t->ci == t->ci->super_hydro) cost = wscale * t->ci->count;
         break;
       case task_type_drift_part:
         cost = wscale * t->ci->count;
@@ -1149,14 +1243,23 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
       case task_type_drift_gpart:
         cost = wscale * t->ci->gcount;
         break;
+      case task_type_init_grav:
+        cost = wscale * t->ci->gcount;
+        break;
+      case task_type_grav_down:
+        cost = wscale * t->ci->gcount;
+        break;
+      case task_type_grav_long_range:
+        cost = wscale * t->ci->gcount;
+        break;
       case task_type_kick1:
-        cost = wscale * t->ci->count;
+        cost = wscale * t->ci->count + wscale * t->ci->gcount;
         break;
       case task_type_kick2:
-        cost = wscale * t->ci->count;
+        cost = wscale * t->ci->count + wscale * t->ci->gcount;
         break;
       case task_type_timestep:
-        cost = wscale * t->ci->count;
+        cost = wscale * t->ci->count + wscale * t->ci->gcount;
         break;
       case task_type_send:
         cost = 10 * wscale * t->ci->count * t->ci->count;
@@ -1259,73 +1362,82 @@ void scheduler_start(struct scheduler *s) {
     scheduler_rewait_mapper(s->tid_active, s->active_count, s);
   }
 
-/* Check we have not missed an active task */
-#ifdef SWIFT_DEBUG_CHECKS
-
-  const integertime_t ti_current = s->space->e->ti_current;
-
-  if (ti_current > 0) {
-
-    for (int k = 0; k < s->nr_tasks; k++) {
-
-      struct task *t = &s->tasks[k];
-      struct cell *ci = t->ci;
-      struct cell *cj = t->cj;
-
-      if (t->type == task_type_none) continue;
-
-      /* Don't check MPI stuff */
-      if (t->type == task_type_send || t->type == task_type_recv) continue;
-
-      /* Don't check the FFT task */
-      if (t->type == task_type_grav_top_level ||
-          t->type == task_type_grav_ghost)
-        continue;
-
-      if (ci == NULL && cj == NULL) {
-
-        error("Task not associated with cells!");
-
-      } else if (cj == NULL) { /* self */
-
-        if (ci->ti_end_min == ti_current && t->skip &&
-            t->type != task_type_sort && t->type != task_type_drift_part &&
-            t->type != task_type_drift_gpart)
-          error(
-              "Task (type='%s/%s') should not have been skipped "
-              "ti_current=%lld "
-              "c->ti_end_min=%lld",
-              taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
-              ci->ti_end_min);
-
-        /* Special treatment for sort tasks */
-        /* if (ci->ti_end_min == ti_current && t->skip &&
-            t->type == task_type_sort && t->flags == 0)
-          error(
-              "Task (type='%s/%s') should not have been skipped "
-              "ti_current=%lld "
-              "c->ti_end_min=%lld t->flags=%d",
-              taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
-              ci->ti_end_min, t->flags); */
-
-      } else { /* pair */
-
-        if (t->skip) {
-
-          /* Check that the pair is active if the local cell is active */
-          if ((ci->ti_end_min == ti_current && ci->nodeID == engine_rank) ||
-              (cj->ti_end_min == ti_current && cj->nodeID == engine_rank))
-            error(
-                "Task (type='%s/%s') should not have been skipped "
-                "ti_current=%lld "
-                "ci->ti_end_min=%lld cj->ti_end_min=%lld",
-                taskID_names[t->type], subtaskID_names[t->subtype], ti_current,
-                ci->ti_end_min, cj->ti_end_min);
-        }
-      }
-    }
-  }
-#endif
+  /* Check we have not missed an active task */
+  /* #ifdef SWIFT_DEBUG_CHECKS */
+
+  /*   const integertime_t ti_current = s->space->e->ti_current; */
+
+  /*   if (ti_current > 0) { */
+
+  /*     for (int k = 0; k < s->nr_tasks; k++) { */
+
+  /*       struct task *t = &s->tasks[k]; */
+  /*       struct cell *ci = t->ci; */
+  /*       struct cell *cj = t->cj; */
+
+  /*       if (t->type == task_type_none) continue; */
+
+  /*       /\* Don't check MPI stuff *\/ */
+  /*       if (t->type == task_type_send || t->type == task_type_recv) continue;
+   */
+
+  /*       /\* Don't check the FFT task *\/ */
+  /*       if (t->type == task_type_grav_top_level || */
+  /*           t->type == task_type_grav_ghost_in || */
+  /*           t->type == task_type_grav_ghost_out) */
+  /*         continue; */
+
+  /*       if (ci == NULL && cj == NULL) { */
+
+  /*         error("Task not associated with cells!"); */
+
+  /*       } else if (cj == NULL) { /\* self *\/ */
+
+  /*         if (ci->ti_hydro_end_min == ti_current && t->skip && */
+  /*             t->type != task_type_sort && t->type != task_type_drift_part &&
+   */
+  /*             t->type != task_type_drift_gpart) */
+  /*           error( */
+  /*               "Task (type='%s/%s') should not have been skipped " */
+  /*               "ti_current=%lld " */
+  /*               "c->ti_hydro_end_min=%lld", */
+  /*               taskID_names[t->type], subtaskID_names[t->subtype],
+   * ti_current, */
+  /*               ci->ti_hydro_end_min); */
+
+  /*         /\* Special treatment for sort tasks *\/ */
+  /*         /\* if (ci->ti_end_min == ti_current && t->skip && */
+  /*             t->type == task_type_sort && t->flags == 0) */
+  /*           error( */
+  /*               "Task (type='%s/%s') should not have been skipped " */
+  /*               "ti_current=%lld " */
+  /*               "c->ti_end_min=%lld t->flags=%d", */
+  /*               taskID_names[t->type], subtaskID_names[t->subtype],
+   * ti_current, */
+  /*               ci->ti_end_min, t->flags); *\/ */
+
+  /*       } else { /\* pair *\/ */
+
+  /*         if (t->skip) { */
+
+  /*           /\* Check that the pair is active if the local cell is active *\/
+   */
+  /*           if ((ci->ti_hydro_end_min == ti_current && ci->nodeID ==
+   * engine_rank) || */
+  /*               (cj->ti_hydro_end_min == ti_current && cj->nodeID ==
+   * engine_rank)) */
+  /*             error( */
+  /*                 "Task (type='%s/%s') should not have been skipped " */
+  /*                 "ti_current=%lld " */
+  /*                 "ci->ti_hydro_end_min=%lld cj->ti_hydro_end_min=%lld", */
+  /*                 taskID_names[t->type], subtaskID_names[t->subtype],
+   * ti_current, */
+  /*                 ci->ti_hydro_end_min, cj->ti_hydro_end_min); */
+  /*         } */
+  /*       } */
+  /*     } */
+  /*   } */
+  /* #endif */
 
   /* Loop over the tasks and enqueue whoever is ready. */
   if (s->active_count > 1000) {
@@ -1381,12 +1493,21 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
     switch (t->type) {
       case task_type_self:
       case task_type_sub_self:
+        if (t->subtype == task_subtype_grav)
+          qid = t->ci->super_gravity->owner;
+        else
+          qid = t->ci->super_hydro->owner;
+        break;
       case task_type_sort:
       case task_type_ghost:
-      case task_type_kick1:
-      case task_type_kick2:
       case task_type_drift_part:
+        qid = t->ci->super_hydro->owner;
+        break;
       case task_type_drift_gpart:
+        qid = t->ci->super_gravity->owner;
+        break;
+      case task_type_kick1:
+      case task_type_kick2:
       case task_type_timestep:
         qid = t->ci->super->owner;
         break;
@@ -1419,8 +1540,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
           err = MPI_Irecv(t->ci->sparts, t->ci->scount, spart_mpi_type,
                           t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
         } else if (t->subtype == task_subtype_multipole) {
-          err = MPI_Irecv(t->ci->multipole, 1, multipole_mpi_type,
-                          t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          t->buff = malloc(sizeof(struct gravity_tensors) * t->ci->pcell_size);
+          err = MPI_Irecv(
+              t->buff, sizeof(struct gravity_tensors) * t->ci->pcell_size,
+              MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
         } else {
           error("Unknown communication sub-type");
         }
@@ -1473,13 +1596,11 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
             err = MPI_Issend(t->ci->sparts, t->ci->scount, spart_mpi_type,
                              t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
         } else if (t->subtype == task_subtype_multipole) {
-          if ((t->ci->scount * sizeof(struct gravity_tensors)) >
-              s->mpi_message_limit)
-            err = MPI_Isend(t->ci->multipole, 1, multipole_mpi_type,
-                            t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
-          else
-            err = MPI_Issend(t->ci->multipole, 1, multipole_mpi_type,
-                             t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
+          t->buff = malloc(sizeof(struct gravity_tensors) * t->ci->pcell_size);
+          cell_pack_multipoles(t->ci, t->buff);
+          err = MPI_Isend(
+              t->buff, t->ci->pcell_size * sizeof(struct gravity_tensors),
+              MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
         } else {
           error("Unknown communication sub-type");
         }
diff --git a/src/space.c b/src/space.c
index b9fe639b195226da3d3168581ebd26f3273d28eb..2c7b64badd317190ccc13fda4ac0e9064b78408d 100644
--- a/src/space.c
+++ b/src/space.c
@@ -64,6 +64,9 @@ int space_subsize_pair = space_subsize_pair_default;
 int space_subsize_self = space_subsize_self_default;
 int space_subsize_self_grav = space_subsize_self_grav_default;
 int space_maxsize = space_maxsize_default;
+#ifdef SWIFT_DEBUG_CHECKS
+int last_cell_id;
+#endif
 
 /**
  * @brief Interval stack necessary for parallel particle sorting.
@@ -225,15 +228,18 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->drift_gpart = NULL;
     c->cooling = NULL;
     c->sourceterms = NULL;
-    c->grav_ghost[0] = NULL;
-    c->grav_ghost[1] = NULL;
+    c->grav_ghost_in = NULL;
+    c->grav_ghost_out = NULL;
     c->grav_long_range = NULL;
     c->grav_down = NULL;
     c->super = c;
+    c->super_hydro = c;
+    c->super_gravity = c;
     c->parts = NULL;
     c->xparts = NULL;
     c->gparts = NULL;
     c->sparts = NULL;
+    if (s->gravity) bzero(c->multipole, sizeof(struct gravity_tensors));
     for (int i = 0; i < 13; i++)
       if (c->sort[i] != NULL) {
         free(c->sort[i]);
@@ -243,11 +249,13 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->recv_xv = NULL;
     c->recv_rho = NULL;
     c->recv_gradient = NULL;
+    c->recv_grav = NULL;
     c->recv_ti = NULL;
 
     c->send_xv = NULL;
     c->send_rho = NULL;
     c->send_gradient = NULL;
+    c->send_grav = NULL;
     c->send_ti = NULL;
 #endif
   }
@@ -457,6 +465,8 @@ void space_regrid(struct space *s, int verbose) {
           c->gcount = 0;
           c->scount = 0;
           c->super = c;
+          c->super_hydro = c;
+          c->super_gravity = c;
           c->ti_old_part = ti_old;
           c->ti_old_gpart = ti_old;
           c->ti_old_multipole = ti_old;
@@ -1990,7 +2000,10 @@ void space_split_recursive(struct space *s, struct cell *c,
   const int depth = c->depth;
   int maxdepth = 0;
   float h_max = 0.0f;
-  integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0;
+  integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0,
+                ti_hydro_beg_max = 0;
+  integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0,
+                ti_gravity_beg_max = 0;
   struct part *parts = c->parts;
   struct gpart *gparts = c->gparts;
   struct spart *sparts = c->sparts;
@@ -2079,6 +2092,11 @@ void space_split_recursive(struct space *s, struct cell *c,
       cp->nodeID = c->nodeID;
       cp->parent = c;
       cp->super = NULL;
+      cp->super_hydro = NULL;
+      cp->super_gravity = NULL;
+#ifdef SWIFT_DEBUG_CHECKS
+      cp->cellID = last_cell_id++;
+#endif
     }
 
     /* Split the cell data. */
@@ -2100,9 +2118,18 @@ void space_split_recursive(struct space *s, struct cell *c,
         progeny_gbuff += c->progeny[k]->gcount;
         progeny_sbuff += c->progeny[k]->scount;
         h_max = max(h_max, c->progeny[k]->h_max);
-        ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min);
-        ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max);
-        ti_beg_max = max(ti_beg_max, c->progeny[k]->ti_beg_max);
+        ti_hydro_end_min =
+            min(ti_hydro_end_min, c->progeny[k]->ti_hydro_end_min);
+        ti_hydro_end_max =
+            max(ti_hydro_end_max, c->progeny[k]->ti_hydro_end_max);
+        ti_hydro_beg_max =
+            max(ti_hydro_beg_max, c->progeny[k]->ti_hydro_beg_max);
+        ti_gravity_end_min =
+            min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min);
+        ti_gravity_end_max =
+            max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max);
+        ti_gravity_beg_max =
+            max(ti_gravity_beg_max, c->progeny[k]->ti_gravity_beg_max);
         if (c->progeny[k]->maxdepth > maxdepth)
           maxdepth = c->progeny[k]->maxdepth;
       }
@@ -2221,9 +2248,13 @@ void space_split_recursive(struct space *s, struct cell *c,
     }
 
     /* Convert into integer times */
-    ti_end_min = get_integer_time_end(e->ti_current, time_bin_min);
-    ti_end_max = get_integer_time_end(e->ti_current, time_bin_max);
-    ti_beg_max = get_integer_time_begin(e->ti_current + 1, time_bin_max);
+    ti_hydro_end_min = get_integer_time_end(e->ti_current, time_bin_min);
+    ti_hydro_end_max = get_integer_time_end(e->ti_current, time_bin_max);
+    ti_hydro_beg_max = get_integer_time_begin(e->ti_current + 1, time_bin_max);
+    ti_gravity_end_min = get_integer_time_end(e->ti_current, time_bin_min);
+    ti_gravity_end_max = get_integer_time_end(e->ti_current, time_bin_max);
+    ti_gravity_beg_max =
+        get_integer_time_begin(e->ti_current + 1, time_bin_max);
 
     /* Construct the multipole and the centre of mass*/
     if (s->gravity) {
@@ -2241,10 +2272,12 @@ void space_split_recursive(struct space *s, struct cell *c,
         c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz);
       } else {
         gravity_multipole_init(&c->multipole->m_pole);
-        c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.;
-        c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.;
-        c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.;
-        c->multipole->r_max = 0.;
+        if (c->nodeID == engine_rank) {
+          c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.;
+          c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.;
+          c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.;
+          c->multipole->r_max = 0.;
+        }
       }
       c->multipole->r_max_rebuild = c->multipole->r_max;
       c->multipole->CoM_rebuild[0] = c->multipole->CoM[0];
@@ -2255,9 +2288,12 @@ void space_split_recursive(struct space *s, struct cell *c,
 
   /* Set the values for this cell. */
   c->h_max = h_max;
-  c->ti_end_min = ti_end_min;
-  c->ti_end_max = ti_end_max;
-  c->ti_beg_max = ti_beg_max;
+  c->ti_hydro_end_min = ti_hydro_end_min;
+  c->ti_hydro_end_max = ti_hydro_end_max;
+  c->ti_hydro_beg_max = ti_hydro_beg_max;
+  c->ti_gravity_end_min = ti_gravity_end_min;
+  c->ti_gravity_end_max = ti_gravity_end_max;
+  c->ti_gravity_beg_max = ti_gravity_beg_max;
   c->maxdepth = maxdepth;
 
   /* Set ownership according to the start of the parts array. */
diff --git a/src/task.c b/src/task.c
index 43da1d35680783d977ea743dd4f43c52f0f291bc..72beceea303589fd1370e26a65dbecdde4edcb35 100644
--- a/src/task.c
+++ b/src/task.c
@@ -48,14 +48,13 @@
 
 /* Task type names. */
 const char *taskID_names[task_type_count] = {
-    "none",       "sort",           "self",
-    "pair",       "sub_self",       "sub_pair",
-    "init_grav",  "ghost",          "extra_ghost",
-    "drift_part", "drift_gpart",    "kick1",
-    "kick2",      "timestep",       "send",
-    "recv",       "grav_top_level", "grav_long_range",
-    "grav_ghost", "grav_mm",        "grav_down",
-    "cooling",    "sourceterms"};
+    "none",          "sort",           "self",           "pair",
+    "sub_self",      "sub_pair",       "init_grav",      "ghost_in",
+    "ghost",         "ghost_out",      "extra_ghost",    "drift_part",
+    "drift_gpart",   "kick1",          "kick2",          "timestep",
+    "send",          "recv",           "grav_top_level", "grav_long_range",
+    "grav_ghost_in", "grav_ghost_out", "grav_mm",        "grav_down",
+    "cooling",       "sourceterms"};
 
 /* Sub-task type names. */
 const char *subtaskID_names[task_subtype_count] = {
diff --git a/src/task.h b/src/task.h
index dee888c9f16dd69785a31371da15078af4e0af0c..1b42058f52085053f82a3d875b694a396156635e 100644
--- a/src/task.h
+++ b/src/task.h
@@ -46,7 +46,9 @@ enum task_types {
   task_type_sub_self,
   task_type_sub_pair,
   task_type_init_grav,
+  task_type_ghost_in,
   task_type_ghost,
+  task_type_ghost_out,
   task_type_extra_ghost,
   task_type_drift_part,
   task_type_drift_gpart,
@@ -57,7 +59,8 @@ enum task_types {
   task_type_recv,
   task_type_grav_top_level,
   task_type_grav_long_range,
-  task_type_grav_ghost,
+  task_type_grav_ghost_in,
+  task_type_grav_ghost_out,
   task_type_grav_mm,
   task_type_grav_down,
   task_type_cooling,
diff --git a/src/timeline.h b/src/timeline.h
index 0f38ff3e9421c122b61caf74290c170b62b2dd92..352056cf340e7bbbce336e03e67a060f172155b1 100644
--- a/src/timeline.h
+++ b/src/timeline.h
@@ -32,7 +32,7 @@ typedef long long integertime_t;
 typedef char timebin_t;
 
 /*! The number of time bins */
-#define num_time_bins 56
+#define num_time_bins 26
 
 /*! The maximal number of timesteps in a simulation */
 #define max_nr_timesteps (1LL << (num_time_bins + 1))
diff --git a/tests/test125cells.c b/tests/test125cells.c
index bf1c219fab9927b81bb087684f6ad0d747e958bc..e6bf35cd29270b97dcd771dbedb5f8c26bd8a325 100644
--- a/tests/test125cells.c
+++ b/tests/test125cells.c
@@ -354,8 +354,8 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h,
   cell->loc[2] = offset[2];
 
   cell->ti_old_part = 8;
-  cell->ti_end_min = 8;
-  cell->ti_end_max = 8;
+  cell->ti_hydro_end_min = 8;
+  cell->ti_hydro_end_max = 8;
 
   // shuffle_particles(cell->parts, cell->count);
 
diff --git a/tests/test27cells.c b/tests/test27cells.c
index f9e3ba1dd78e67a11414c88efe3c604b02b1aa16..1952103634104d89b955ed1c47b0836d894ef533 100644
--- a/tests/test27cells.c
+++ b/tests/test27cells.c
@@ -182,8 +182,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[2] = offset[2];
 
   cell->ti_old_part = 8;
-  cell->ti_end_min = 8;
-  cell->ti_end_max = 8;
+  cell->ti_hydro_end_min = 8;
+  cell->ti_hydro_end_max = 8;
 
   shuffle_particles(cell->parts, cell->count);
 
diff --git a/tests/testActivePair.c b/tests/testActivePair.c
index fd2244db971f6a5f7cfeb62b9661c33b7ee88145..e9367db302a32130df84e4381d1ab255681504de 100644
--- a/tests/testActivePair.c
+++ b/tests/testActivePair.c
@@ -141,8 +141,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[2] = offset[2];
 
   cell->ti_old_part = 8;
-  cell->ti_end_min = 8;
-  cell->ti_end_max = 10;
+  cell->ti_hydro_end_min = 8;
+  cell->ti_hydro_end_max = 10;
 
   shuffle_particles(cell->parts, cell->count);
 
diff --git a/tests/testDump.c b/tests/testDump.c
index b1406ba5f5e6652c6e2727d6b161215f4e905d62..7d193ac32f4069dced2ddb2d89db91328f3ff223 100644
--- a/tests/testDump.c
+++ b/tests/testDump.c
@@ -40,7 +40,8 @@ void dump_mapper(void *map_data, int num_elements, void *extra_data) {
   size_t offset;
   char *out_string = dump_get(d, 7, &offset);
   char out_buff[8];
-  snprintf(out_buff, 8, "%06zi\n", offset / 7);
+  /* modulo due to bug in gcc, should be removed */
+  snprintf(out_buff, 8, "%06zi\n", (offset / 7) % 1000000);
   memcpy(out_string, out_buff, 7);
 }
 
diff --git a/tests/testInteractions.c b/tests/testInteractions.c
index 864cdd6aaf8c6cdaed6c4d6462cdaed3f6ab1a5b..71d5728f6721b5817e17b74b23415561ba39c21a 100644
--- a/tests/testInteractions.c
+++ b/tests/testInteractions.c
@@ -673,6 +673,7 @@ int main(int argc, char *argv[]) {
         break;
       case 's':
         sscanf(optarg, "%lf", &spacing);
+	break;
       case 'n':
         sscanf(optarg, "%zu", &count);
         break;
diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c
index ded767c5d657c0a05dc227d1ee8facbfa5f4de63..f3687eac6be87cd60c12404c19916c8101c42dc1 100644
--- a/tests/testPeriodicBC.c
+++ b/tests/testPeriodicBC.c
@@ -171,8 +171,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
   cell->loc[2] = offset[2];
 
   cell->ti_old_part = 8;
-  cell->ti_end_min = 8;
-  cell->ti_end_max = 8;
+  cell->ti_hydro_end_min = 8;
+  cell->ti_hydro_end_max = 8;
 
   shuffle_particles(cell->parts, cell->count);
 
diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c
index e890c7c1a834ec7ca13ed2e8a509b7ea42db28fd..08d6abaa7521de2a7d12fd9672db0d24a5a20a97 100644
--- a/tests/testSPHStep.c
+++ b/tests/testSPHStep.c
@@ -77,8 +77,8 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) {
   cell->width[1] = cellSize;
   cell->width[2] = cellSize;
 
-  cell->ti_end_min = 1;
-  cell->ti_end_max = 1;
+  cell->ti_hydro_end_min = 1;
+  cell->ti_hydro_end_max = 1;
 
   cell->sorted = 0;
   for (int k = 0; k < 13; k++) cell->sort[k] = NULL;