diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml
index 8ebe29fb0216e16aeaafcdc086085d8c9879fc5f..77d3f358b9ba66ab9d40fe664d85ee2d511ab5de 100644
--- a/examples/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_12/eagle_12.yml
@@ -22,9 +22,6 @@ TimeIntegration:
   dt_min:     1e-10 # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-3  # The maximal time-step size of the simulation (in internal units).
   
-Scheduler:
-  max_top_level_cells:    8
-
 # Parameters governing the snapshots
 Snapshots:
   basename:            eagle # Common part of the name of output files
diff --git a/examples/main.c b/examples/main.c
index cf90adc8c8733445d3ee36f8a764b7bcdfe53e5a..38847543bd25d6ce877b713b7e6e9ff614eb16bb 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -486,6 +486,17 @@ int main(int argc, char *argv[]) {
   MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD);
 #endif
 
+  /* Temporary early aborts for modes not supported over MPI. */
+#ifdef WITH_MPI
+  if (with_mpole_reconstruction && nr_nodes > 1)
+    error("Cannot reconstruct m-poles every step over MPI (yet).");
+#endif
+
+#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR)
+  if (with_structure_finding && nr_nodes > 1)
+    error("VEOCIraptor not yet enabled over MPI.");
+#endif
+
   /* Check that we can write the snapshots by testing if the output
    * directory exists and is searchable and writable. */
   char basename[PARSER_MAX_LINE_SIZE];
@@ -729,15 +740,6 @@ int main(int argc, char *argv[]) {
       fflush(stdout);
     }
 
-#ifdef WITH_MPI
-    if (periodic && with_self_gravity)
-      error("Periodic self-gravity over MPI temporarily disabled.");
-#endif
-
-#if defined(WITH_MPI) && defined(HAVE_VELOCIRAPTOR)
-    if (with_structure_finding) error("VEOCIraptor not yet enabled over MPI.");
-#endif
-
 #ifdef SWIFT_DEBUG_CHECKS
     /* Check once and for all that we don't have unwanted links */
     if (!with_stars && !dry_run) {
@@ -769,8 +771,7 @@ int main(int argc, char *argv[]) {
     if (myrank == 0)
       message(
           "Read %lld gas particles, %lld star particles and %lld gparts from "
-          "the "
-          "ICs.",
+          "the ICs.",
           N_total[0], N_total[2], N_total[1]);
 
     /* Verify that the fields to dump actually exist */
@@ -905,11 +906,10 @@ int main(int argc, char *argv[]) {
           "particles (%lld gravity particles)",
           N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]);
       message(
-          "from t=%.3e until t=%.3e with %d threads and %d queues "
-          "(dt_min=%.3e, "
-          "dt_max=%.3e)...",
-          e.time_begin, e.time_end, e.nr_threads, e.sched.nr_queues, e.dt_min,
-          e.dt_max);
+          "from t=%.3e until t=%.3e with %d ranks, %d threads / rank and %d "
+          "task queues / rank (dt_min=%.3e, dt_max=%.3e)...",
+          e.time_begin, e.time_end, nr_nodes, e.nr_threads, e.sched.nr_queues,
+          e.dt_min, e.dt_max);
       fflush(stdout);
     }
   }
diff --git a/src/active.h b/src/active.h
index 3fe52a86b373ff0b33b88eca0dac9b7c6b58a216..949babf1b1f8d27c89d0a5d9c3a93f1825512fd3 100644
--- a/src/active.h
+++ b/src/active.h
@@ -140,6 +140,19 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity(
   return (c->ti_gravity_end_min == e->ti_current);
 }
 
+/**
+ * @brief Does a cell contain any multipole requiring calculation ?
+ *
+ * @param c The #cell.
+ * @param e The #engine containing information about the current time.
+ * @return 1 if the #cell contains at least an active particle, 0 otherwise.
+ */
+__attribute__((always_inline)) INLINE static int cell_is_active_gravity_mm(
+    const struct cell *c, const struct engine *e) {
+
+  return (c->ti_gravity_end_min == e->ti_current);
+}
+
 /**
  * @brief Are *all* g-particles in a cell finishing their time-step now ?
  *
diff --git a/src/cell.c b/src/cell.c
index f7a374e293e13e108b14c733ec066028f8a33c7b..6ad7e4d97e754e98a4ab0eeb76a188ed6bea7c73 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -167,10 +167,13 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) {
  * @param c The #cell.
  * @param pc Pointer to an array of packed cells in which the
  *      cells will be packed.
+ * @param with_gravity Are we running with gravity and hence need
+ *      to exchange multipoles?
  *
  * @return The number of packed cells.
  */
-int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
+int cell_pack(struct cell *restrict c, struct pcell *restrict pc,
+              const int with_gravity) {
 
 #ifdef WITH_MPI
 
@@ -187,6 +190,21 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
   pc->gcount = c->gcount;
   pc->scount = c->scount;
 
+  /* Copy the Multipole related information */
+  if (with_gravity) {
+    const struct gravity_tensors *mp = c->multipole;
+
+    pc->m_pole = mp->m_pole;
+    pc->CoM[0] = mp->CoM[0];
+    pc->CoM[1] = mp->CoM[1];
+    pc->CoM[2] = mp->CoM[2];
+    pc->CoM_rebuild[0] = mp->CoM_rebuild[0];
+    pc->CoM_rebuild[1] = mp->CoM_rebuild[1];
+    pc->CoM_rebuild[2] = mp->CoM_rebuild[2];
+    pc->r_max = mp->r_max;
+    pc->r_max_rebuild = mp->r_max_rebuild;
+  }
+
 #ifdef SWIFT_DEBUG_CHECKS
   pc->cellID = c->cellID;
 #endif
@@ -196,7 +214,7 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
   for (int k = 0; k < 8; k++)
     if (c->progeny[k] != NULL) {
       pc->progeny[k] = count;
-      count += cell_pack(c->progeny[k], &pc[count]);
+      count += cell_pack(c->progeny[k], &pc[count], with_gravity);
     } else {
       pc->progeny[k] = -1;
     }
@@ -251,11 +269,13 @@ int cell_pack_tags(const struct cell *c, int *tags) {
  * @param pc An array of packed #pcell.
  * @param c The #cell in which to unpack the #pcell.
  * @param s The #space in which the cells are created.
+ * @param with_gravity Are we running with gravity and hence need
+ *      to exchange multipoles?
  *
  * @return The number of cells created.
  */
 int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
-                struct space *restrict s) {
+                struct space *restrict s, const int with_gravity) {
 
 #ifdef WITH_MPI
 
@@ -275,6 +295,22 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
   c->cellID = pc->cellID;
 #endif
 
+  /* Copy the Multipole related information */
+  if (with_gravity) {
+
+    struct gravity_tensors *mp = c->multipole;
+
+    mp->m_pole = pc->m_pole;
+    mp->CoM[0] = pc->CoM[0];
+    mp->CoM[1] = pc->CoM[1];
+    mp->CoM[2] = pc->CoM[2];
+    mp->CoM_rebuild[0] = pc->CoM_rebuild[0];
+    mp->CoM_rebuild[1] = pc->CoM_rebuild[1];
+    mp->CoM_rebuild[2] = pc->CoM_rebuild[2];
+    mp->r_max = pc->r_max;
+    mp->r_max_rebuild = pc->r_max_rebuild;
+  }
+
   /* Number of new cells created. */
   int count = 1;
 
@@ -304,7 +340,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
       temp->parent = c;
       c->progeny[k] = temp;
       c->split = 1;
-      count += cell_unpack(&pc[pc->progeny[k]], temp, s);
+      count += cell_unpack(&pc[pc->progeny[k]], temp, s, with_gravity);
     }
 
   /* Return the total number of unpacked cells. */
@@ -1167,6 +1203,7 @@ void cell_clean_links(struct cell *c, void *data) {
   c->gradient = NULL;
   c->force = NULL;
   c->grav = NULL;
+  c->grav_mm = NULL;
 }
 
 /**
@@ -1948,7 +1985,7 @@ void cell_activate_grav_mm_task(struct cell *ci, struct cell *cj,
   const struct engine *e = s->space->e;
 
   /* Anything to do here? */
-  if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e))
+  if (!cell_is_active_gravity_mm(ci, e) && !cell_is_active_gravity_mm(cj, e))
     error("Inactive MM task being activated");
 
   /* Atomically drift the multipole in ci */
@@ -2144,15 +2181,22 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
     struct cell *cj = t->cj;
     const int ci_active = cell_is_active_hydro(ci, e);
     const int cj_active = (cj != NULL) ? cell_is_active_hydro(cj, e) : 0;
+#ifdef WITH_MPI
+    const int ci_nodeID = ci->nodeID;
+    const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1;
+#else
+    const int ci_nodeID = nodeID;
+    const int cj_nodeID = nodeID;
+#endif
 
     /* Only activate tasks that involve a local active cell. */
-    if ((ci_active && ci->nodeID == nodeID) ||
-        (cj_active && cj->nodeID == nodeID)) {
+    if ((ci_active && ci_nodeID == nodeID) ||
+        (cj_active && cj_nodeID == nodeID)) {
       scheduler_activate(s, t);
 
       /* Activate hydro drift */
       if (t->type == task_type_self) {
-        if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s);
+        if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s);
       }
 
       /* Set the correct sorting flags and activate hydro drifts */
@@ -2164,8 +2208,8 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         cj->dx_max_sort_old = cj->dx_max_sort;
 
         /* Activate the drift tasks. */
-        if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s);
-        if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s);
+        if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s);
+        if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s);
 
         /* Check the sorts and activate them if needed. */
         cell_activate_sorts(ci, t->flags, s);
@@ -2186,7 +2230,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
 
 #ifdef WITH_MPI
       /* Activate the send/recv tasks. */
-      if (ci->nodeID != nodeID) {
+      if (ci_nodeID != nodeID) {
 
         /* If the local cell is active, receive data from the foreign cell. */
         if (cj_active) {
@@ -2206,7 +2250,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         /* Is the foreign cell active and will need stuff from us? */
         if (ci_active) {
 
-          scheduler_activate_send(s, cj->send_xv, ci->nodeID);
+          scheduler_activate_send(s, cj->send_xv, ci_nodeID);
 
           /* Drift the cell which will be sent; note that not all sent
              particles will be drifted, only those that are needed. */
@@ -2214,18 +2258,18 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
 
           /* If the local cell is also active, more stuff will be needed. */
           if (cj_active) {
-            scheduler_activate_send(s, cj->send_rho, ci->nodeID);
+            scheduler_activate_send(s, cj->send_rho, ci_nodeID);
 
 #ifdef EXTRA_HYDRO_LOOP
-            scheduler_activate_send(s, cj->send_gradient, ci->nodeID);
+            scheduler_activate_send(s, cj->send_gradient, ci_nodeID);
 #endif
           }
         }
 
         /* If the local cell is active, send its ti_end values. */
-        if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID);
+        if (cj_active) scheduler_activate_send(s, cj->send_ti, ci_nodeID);
 
-      } else if (cj->nodeID != nodeID) {
+      } else if (cj_nodeID != nodeID) {
 
         /* If the local cell is active, receive data from the foreign cell. */
         if (ci_active) {
@@ -2245,7 +2289,7 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
         /* Is the foreign cell active and will need stuff from us? */
         if (cj_active) {
 
-          scheduler_activate_send(s, ci->send_xv, cj->nodeID);
+          scheduler_activate_send(s, ci->send_xv, cj_nodeID);
 
           /* Drift the cell which will be sent; note that not all sent
              particles will be drifted, only those that are needed. */
@@ -2254,16 +2298,16 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) {
           /* If the local cell is also active, more stuff will be needed. */
           if (ci_active) {
 
-            scheduler_activate_send(s, ci->send_rho, cj->nodeID);
+            scheduler_activate_send(s, ci->send_rho, cj_nodeID);
 
 #ifdef EXTRA_HYDRO_LOOP
-            scheduler_activate_send(s, ci->send_gradient, cj->nodeID);
+            scheduler_activate_send(s, ci->send_gradient, cj_nodeID);
 #endif
           }
         }
 
         /* If the local cell is active, send its ti_end values. */
-        if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID);
+        if (ci_active) scheduler_activate_send(s, ci->send_ti, cj_nodeID);
       }
 #endif
     }
@@ -2312,10 +2356,15 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
     struct task *t = l->t;
     struct cell *ci = t->ci;
     struct cell *cj = t->cj;
-    const int ci_nodeID = ci->nodeID;
-    const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1;
     const int ci_active = cell_is_active_gravity(ci, e);
     const int cj_active = (cj != NULL) ? cell_is_active_gravity(cj, e) : 0;
+#ifdef WITH_MPI
+    const int ci_nodeID = ci->nodeID;
+    const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1;
+#else
+    const int ci_nodeID = nodeID;
+    const int cj_nodeID = nodeID;
+#endif
 
     /* Only activate tasks that involve a local active cell. */
     if ((ci_active && ci_nodeID == nodeID) ||
@@ -2331,7 +2380,9 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
       } else if (t->type == task_type_pair) {
         cell_activate_subcell_grav_tasks(ci, cj, s);
       } else if (t->type == task_type_grav_mm) {
-        cell_activate_grav_mm_task(ci, cj, s);
+#ifdef SWIFT_DEBUG_CHECKS
+        error("Incorrectly linked M-M task!");
+#endif
       }
     }
 
@@ -2342,9 +2393,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
       if (ci_nodeID != nodeID) {
 
         /* If the local cell is active, receive data from the foreign cell. */
-        if (cj_active) {
-          scheduler_activate(s, ci->recv_grav);
-        }
+        if (cj_active) scheduler_activate(s, ci->recv_grav);
 
         /* If the foreign cell is active, we want its ti_end values. */
         if (ci_active) scheduler_activate(s, ci->recv_ti);
@@ -2366,9 +2415,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
       } else if (cj_nodeID != nodeID) {
 
         /* If the local cell is active, receive data from the foreign cell. */
-        if (ci_active) {
-          scheduler_activate(s, cj->recv_grav);
-        }
+        if (ci_active) scheduler_activate(s, cj->recv_grav);
 
         /* If the foreign cell is active, we want its ti_end values. */
         if (cj_active) scheduler_activate(s, cj->recv_ti);
@@ -2391,8 +2438,36 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
     }
   }
 
+  for (struct link *l = c->grav_mm; l != NULL; l = l->next) {
+
+    struct task *t = l->t;
+    struct cell *ci = t->ci;
+    struct cell *cj = t->cj;
+    const int ci_active = cell_is_active_gravity_mm(ci, e);
+    const int cj_active = (cj != NULL) ? cell_is_active_gravity_mm(cj, e) : 0;
+#ifdef WITH_MPI
+    const int ci_nodeID = ci->nodeID;
+    const int cj_nodeID = (cj != NULL) ? cj->nodeID : -1;
+#else
+    const int ci_nodeID = nodeID;
+    const int cj_nodeID = nodeID;
+#endif
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (t->type != task_type_grav_mm) error("Incorrectly linked gravity task!");
+#endif
+
+    /* Only activate tasks that involve a local active cell. */
+    if ((ci_active && ci_nodeID == nodeID) ||
+        (cj_active && cj_nodeID == nodeID)) {
+
+      scheduler_activate(s, t);
+      cell_activate_grav_mm_task(ci, cj, s);
+    }
+  }
+
   /* Unskip all the other task types. */
-  if (c->nodeID == nodeID && cell_is_active_gravity(c, e)) {
+  if (c->nodeID == nodeID && cell_is_active_gravity_mm(c, e)) {
 
     if (c->init_grav != NULL) scheduler_activate(s, c->init_grav);
     if (c->init_grav_out != NULL) scheduler_activate(s, c->init_grav_out);
@@ -2418,7 +2493,7 @@ int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) {
 void cell_set_super(struct cell *c, struct cell *super) {
 
   /* Are we in a cell with some kind of self/pair task ? */
-  if (super == NULL && c->nr_tasks > 0) super = c;
+  if (super == NULL && (c->nr_tasks > 0 || c->nr_mm_tasks > 0)) super = c;
 
   /* Set the super-cell */
   c->super = super;
@@ -2461,7 +2536,8 @@ void cell_set_super_hydro(struct cell *c, struct cell *super_hydro) {
 void cell_set_super_gravity(struct cell *c, struct cell *super_gravity) {
 
   /* Are we in a cell with some kind of self/pair task ? */
-  if (super_gravity == NULL && c->grav != NULL) super_gravity = c;
+  if (super_gravity == NULL && (c->grav != NULL || c->grav_mm != NULL))
+    super_gravity = c;
 
   /* Set the super-cell */
   c->super_gravity = super_gravity;
@@ -2938,3 +3014,63 @@ int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj,
 
   return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2);
 }
+
+/**
+ * @brief Can we use the MM interactions fo a given pair of cells?
+ *
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param e The #engine.
+ * @param s The #space.
+ */
+int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj,
+                                 const struct engine *e,
+                                 const struct space *s) {
+
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const int periodic = s->periodic;
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+
+  /* Recover the multipole information */
+  const struct gravity_tensors *const multi_i = ci->multipole;
+  const struct gravity_tensors *const multi_j = cj->multipole;
+
+#ifdef SWIFT_DEBUG_CHECKS
+
+  if (multi_i->CoM[0] < ci->loc[0] ||
+      multi_i->CoM[0] > ci->loc[0] + ci->width[0])
+    error("Invalid multipole position ci");
+  if (multi_i->CoM[1] < ci->loc[1] ||
+      multi_i->CoM[1] > ci->loc[1] + ci->width[1])
+    error("Invalid multipole position ci");
+  if (multi_i->CoM[2] < ci->loc[2] ||
+      multi_i->CoM[2] > ci->loc[2] + ci->width[2])
+    error("Invalid multipole position ci");
+
+  if (multi_j->CoM[0] < cj->loc[0] ||
+      multi_j->CoM[0] > cj->loc[0] + cj->width[0])
+    error("Invalid multipole position cj");
+  if (multi_j->CoM[1] < cj->loc[1] ||
+      multi_j->CoM[1] > cj->loc[1] + cj->width[1])
+    error("Invalid multipole position cj");
+  if (multi_j->CoM[2] < cj->loc[2] ||
+      multi_j->CoM[2] > cj->loc[2] + cj->width[2])
+    error("Invalid multipole position cj");
+
+#endif
+
+  /* Get the distance between the CoMs */
+  double dx = multi_i->CoM[0] - multi_j->CoM[0];
+  double dy = multi_i->CoM[1] - multi_j->CoM[1];
+  double dz = multi_i->CoM[2] - multi_j->CoM[2];
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+  const double r2 = dx * dx + dy * dy + dz * dz;
+
+  return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2);
+}
diff --git a/src/cell.h b/src/cell.h
index 9438804e12d599f7e36bf937b9bdc2958793feac..747b79f05a799b9401c67c23b72b722d385c5ced 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -77,6 +77,24 @@ struct link {
  */
 struct pcell {
 
+  /*! This cell's gravity-related tensors */
+  struct multipole m_pole;
+
+  /*! Centre of mass. */
+  double CoM[3];
+
+  /*! Centre of mass at rebuild time. */
+  double CoM_rebuild[3];
+
+  /*! Upper limit of the CoM<->gpart distance. */
+  double r_max;
+
+  /*! Upper limit of the CoM<->gpart distance at last rebuild. */
+  double r_max_rebuild;
+
+  /*! Relative indices of the cell's progeny. */
+  int progeny[8];
+
   /*! Maximal smoothing length. */
   double h_max;
 
@@ -116,9 +134,6 @@ struct pcell {
   /*! Number of #spart in this cell. */
   int scount;
 
-  /*! Relative indices of the cell's progeny. */
-  int progeny[8];
-
 #ifdef SWIFT_DEBUG_CHECKS
   /* Cell ID (for debugging) */
   int cellID;
@@ -213,6 +228,9 @@ struct cell {
   /*! Linked list of the tasks computing this cell's gravity forces. */
   struct link *grav;
 
+  /*! Linked list of the tasks computing this cell's gravity M-M forces. */
+  struct link *grav_mm;
+
   /*! The task computing this cell's sorts. */
   struct task *sorts;
 
@@ -422,6 +440,9 @@ struct cell {
   /*! Number of tasks that are associated with this cell. */
   short int nr_tasks;
 
+  /*! Number of M-M tasks that are associated with this cell. */
+  short int nr_mm_tasks;
+
   /*! The depth of this cell in the tree. */
   char depth;
 
@@ -479,8 +500,9 @@ int cell_mlocktree(struct cell *c);
 void cell_munlocktree(struct cell *c);
 int cell_slocktree(struct cell *c);
 void cell_sunlocktree(struct cell *c);
-int cell_pack(struct cell *c, struct pcell *pc);
-int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
+int cell_pack(struct cell *c, struct pcell *pc, const int with_gravity);
+int cell_unpack(struct pcell *pc, struct cell *c, struct space *s,
+                const int with_gravity);
 int cell_pack_tags(const struct cell *c, int *tags);
 int cell_unpack_tags(const int *tags, struct cell *c);
 int cell_pack_end_step(struct cell *c, struct pcell_step *pcell);
@@ -522,6 +544,8 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data);
 int cell_has_tasks(struct cell *c);
 int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj,
                          const struct engine *e, const struct space *s);
+int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj,
+                                 const struct engine *e, const struct space *s);
 
 /* Inlined functions (for speed). */
 
diff --git a/src/cosmology.c b/src/cosmology.c
index ff1116bd65837f6718903f7924c6076b5ff40c26..f209997dd3faae3ba884632ffa41cca5d6ae0710 100644
--- a/src/cosmology.c
+++ b/src/cosmology.c
@@ -535,7 +535,7 @@ void cosmology_init_no_cosmo(struct cosmology *c) {
   c->w_0 = 0.;
   c->w_a = 0.;
   c->h = 1.;
-  c->w = 0.;
+  c->w = -1.;
 
   c->a_begin = 1.;
   c->a_end = 1.;
@@ -544,6 +544,7 @@ void cosmology_init_no_cosmo(struct cosmology *c) {
 
   c->H = 0.;
   c->a = 1.;
+  c->z = 0.;
   c->a_inv = 1.;
   c->a2_inv = 1.;
   c->a3_inv = 1.;
diff --git a/src/engine.c b/src/engine.c
index 73d74bed758834dadab2ef898f2b9733bc3f9c17..f039d619c769f6132acadc8b456fd839f3bb5ea7 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1670,10 +1670,11 @@ void engine_exchange_cells(struct engine *e) {
 
   struct space *s = e->s;
   const int nr_proxies = e->nr_proxies;
+  const int with_gravity = e->policy & engine_policy_self_gravity;
   const ticks tic = getticks();
 
   /* Exchange the cell structure with neighbouring ranks. */
-  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s);
+  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
 
   /* Count the number of particles we need to import and re-allocate
      the buffer if needed. */
@@ -2175,21 +2176,21 @@ void engine_exchange_proxy_multipoles(struct engine *e) {
 
     /* And the actual number of things we are going to ship */
     for (int k = 0; k < p->nr_cells_in; k++)
-      count_recv += p->cells_in[k]->pcell_size;
+      count_recv_cells += p->cells_in[k]->pcell_size;
 
     for (int k = 0; k < p->nr_cells_out; k++)
-      count_send += p->cells_out[k]->pcell_size;
+      count_send_cells += p->cells_out[k]->pcell_size;
   }
 
   /* Allocate the buffers for the packed data */
   struct gravity_tensors *buffer_send = NULL;
   if (posix_memalign((void **)&buffer_send, SWIFT_CACHE_ALIGNMENT,
-                     count_send * sizeof(struct gravity_tensors)) != 0)
+                     count_send_cells * sizeof(struct gravity_tensors)) != 0)
     error("Unable to allocate memory for multipole transactions");
 
   struct gravity_tensors *buffer_recv = NULL;
   if (posix_memalign((void **)&buffer_recv, SWIFT_CACHE_ALIGNMENT,
-                     count_recv * sizeof(struct gravity_tensors)) != 0)
+                     count_recv_cells * sizeof(struct gravity_tensors)) != 0)
     error("Unable to allocate memory for multipole transactions");
 
   /* Also allocate the MPI requests */
@@ -2448,7 +2449,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           if (periodic && min_radius > max_distance) continue;
 
           /* Are the cells too close for a MM interaction ? */
-          if (!cell_can_use_pair_mm(ci, cj, e, s)) {
+          if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) {
 
             /* Ok, we need to add a direct pair calculation */
             scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0,
@@ -3513,10 +3514,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
           cell_activate_subcell_grav_tasks(t->ci, t->cj, s);
         }
 
+#ifdef SWIFT_DEBUG_CHECKS
         else if (t->type == task_type_sub_pair &&
                  t->subtype == task_subtype_grav) {
           error("Invalid task sub-type encountered");
         }
+#endif
       }
 
       /* Only interested in density tasks as of here. */
@@ -3621,9 +3624,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         if (ci->nodeID != engine_rank) {
 
           /* If the local cell is active, receive data from the foreign cell. */
-          if (cj_active_gravity) {
-            scheduler_activate(s, ci->recv_grav);
-          }
+          if (cj_active_gravity) scheduler_activate(s, ci->recv_grav);
 
           /* If the foreign cell is active, we want its ti_end values. */
           if (ci_active_gravity) scheduler_activate(s, ci->recv_ti);
@@ -3647,9 +3648,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
         } else if (cj->nodeID != engine_rank) {
 
           /* If the local cell is active, receive data from the foreign cell. */
-          if (ci_active_gravity) {
-            scheduler_activate(s, cj->recv_grav);
-          }
+          if (ci_active_gravity) scheduler_activate(s, cj->recv_grav);
 
           /* If the foreign cell is active, we want its ti_end values. */
           if (cj_active_gravity) scheduler_activate(s, cj->recv_ti);
@@ -3710,8 +3709,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
       const struct cell *cj = t->cj;
       const int ci_nodeID = ci->nodeID;
       const int cj_nodeID = cj->nodeID;
-      const int ci_active_gravity = cell_is_active_gravity(ci, e);
-      const int cj_active_gravity = cell_is_active_gravity(cj, e);
+      const int ci_active_gravity = cell_is_active_gravity_mm(ci, e);
+      const int cj_active_gravity = cell_is_active_gravity_mm(cj, e);
 
       if ((ci_active_gravity && ci_nodeID == engine_rank) ||
           (cj_active_gravity && cj_nodeID == engine_rank))
@@ -3950,19 +3949,14 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) {
 /* If in parallel, exchange the cell structure, top-level and neighbouring
  * multipoles. */
 #ifdef WITH_MPI
-  engine_exchange_cells(e);
-
   if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e);
+
+  engine_exchange_cells(e);
 #endif
 
   /* Re-build the tasks. */
   engine_maketasks(e);
 
-#ifdef WITH_MPI
-  if (e->policy & engine_policy_self_gravity)
-    engine_exchange_proxy_multipoles(e);
-#endif
-
   /* Make the list of top-level cells that have tasks */
   space_list_cells_with_tasks(e->s);
 
@@ -5289,19 +5283,30 @@ void engine_makeproxies(struct engine *e) {
 #ifdef WITH_MPI
   const int nodeID = e->nodeID;
   const struct space *s = e->s;
-  const int *cdim = s->cdim;
+
+  /* Handle on the cells and proxies */
+  struct cell *cells = s->cells_top;
+  struct proxy *proxies = e->proxies;
+
+  /* Some info about the domain */
+  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
   const int periodic = s->periodic;
+  const double cell_width[3] = {cells[0].width[0], cells[0].width[1],
+                                cells[0].width[2]};
 
   /* Get some info about the physics */
-  const double *dim = s->dim;
-  const struct gravity_props *props = e->gravity_properties;
-  const double theta_crit2 = props->theta_crit2;
   const int with_hydro = (e->policy & engine_policy_hydro);
   const int with_gravity = (e->policy & engine_policy_self_gravity);
+  const double theta_crit_inv = e->gravity_properties->theta_crit_inv;
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const double max_distance = e->mesh->r_cut_max;
 
-  /* Handle on the cells and proxies */
-  struct cell *cells = s->cells_top;
-  struct proxy *proxies = e->proxies;
+  /* Maximal distance between CoMs and any particle in the cell */
+  const double r_max2 = cell_width[0] * cell_width[0] +
+                        cell_width[1] * cell_width[1] +
+                        cell_width[2] * cell_width[2];
+  const double r_max = sqrt(r_max2);
 
   /* Let's time this */
   const ticks tic = getticks();
@@ -5315,14 +5320,34 @@ void engine_makeproxies(struct engine *e) {
 
   /* Compute how many cells away we need to walk */
   int delta = 1; /*hydro case */
+
+  /* Gravity needs to take the opening angle into account */
   if (with_gravity) {
-    const double distance = 2.5 * cells[0].width[0] / props->theta_crit;
-    delta = (int)(distance / cells[0].width[0]) + 1;
+    const double distance = 2. * r_max * theta_crit_inv;
+    delta = (int)(distance / cells[0].dmin) + 1;
+  }
+
+  /* Turn this into upper and lower bounds for loops */
+  int delta_m = delta;
+  int delta_p = delta;
+
+  /* Special case where every cell is in range of every other one */
+  if (delta >= cdim[0] / 2) {
+    if (cdim[0] % 2 == 0) {
+      delta_m = cdim[0] / 2;
+      delta_p = cdim[0] / 2 - 1;
+    } else {
+      delta_m = cdim[0] / 2;
+      delta_p = cdim[0] / 2;
+    }
   }
 
   /* Let's be verbose about this choice */
   if (e->verbose)
-    message("Looking for proxies up to %d top-level cells away", delta);
+    message(
+        "Looking for proxies up to %d top-level cells away (delta_m=%d "
+        "delta_p=%d)",
+        delta, delta_m, delta_p);
 
   /* Loop over each cell in the space. */
   int ind[3];
@@ -5333,33 +5358,24 @@ void engine_makeproxies(struct engine *e) {
         /* Get the cell ID. */
         const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]);
 
-        double CoM_i[3] = {0., 0., 0.};
-        double r_max_i = 0.;
-
-        if (with_gravity) {
-
-          /* Get ci's multipole */
-          const struct gravity_tensors *multi_i = cells[cid].multipole;
-          CoM_i[0] = multi_i->CoM[0];
-          CoM_i[1] = multi_i->CoM[1];
-          CoM_i[2] = multi_i->CoM[2];
-          r_max_i = multi_i->r_max;
-        }
+        /* and it's location */
+        const double loc_i[3] = {cells[cid].loc[0], cells[cid].loc[1],
+                                 cells[cid].loc[2]};
 
         /* Loop over all its neighbours (periodic). */
-        for (int i = -delta; i <= delta; i++) {
+        for (int i = -delta_m; i <= delta_p; i++) {
           int ii = ind[0] + i;
           if (ii >= cdim[0])
             ii -= cdim[0];
           else if (ii < 0)
             ii += cdim[0];
-          for (int j = -delta; j <= delta; j++) {
+          for (int j = -delta_m; j <= delta_p; j++) {
             int jj = ind[1] + j;
             if (jj >= cdim[1])
               jj -= cdim[1];
             else if (jj < 0)
               jj += cdim[1];
-            for (int k = -delta; k <= delta; k++) {
+            for (int k = -delta_m; k <= delta_p; k++) {
               int kk = ind[2] + k;
               if (kk >= cdim[2])
                 kk -= cdim[2];
@@ -5382,9 +5398,12 @@ void engine_makeproxies(struct engine *e) {
 
               int proxy_type = 0;
 
-              /* In the hydro case, only care about neighbours */
+              /* In the hydro case, only care about direct neighbours */
               if (with_hydro) {
 
+                // MATTHIEU: to do: Write a better expression for the
+                // non-periodic case.
+
                 /* This is super-ugly but checks for direct neighbours */
                 /* with periodic BC */
                 if (((abs(ind[0] - ii) <= 1 ||
@@ -5402,16 +5421,20 @@ void engine_makeproxies(struct engine *e) {
               /* In the gravity case, check distances using the MAC. */
               if (with_gravity) {
 
-                /* Get cj's multipole */
-                const struct gravity_tensors *multi_j = cells[cjd].multipole;
-                const double CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1],
-                                         multi_j->CoM[2]};
-                const double r_max_j = multi_j->r_max;
+                /* We don't have multipoles yet (or there CoMs) so we will have
+                   to cook up something based on cell locations only. We hence
+                   need an upper limit on the distance that the CoMs in those
+                   cells could have. We then can decide whether we are too close
+                   for an M2L interaction and hence require a proxy as this pair
+                   of cells cannot rely on just an M2L calculation. */
+
+                const double loc_j[3] = {cells[cjd].loc[0], cells[cjd].loc[1],
+                                         cells[cjd].loc[2]};
 
-                /* Let's compute the current distance between the cell pair*/
-                double dx = CoM_i[0] - CoM_j[0];
-                double dy = CoM_i[1] - CoM_j[1];
-                double dz = CoM_i[2] - CoM_j[2];
+                /* Start with the distance between the cell centres. */
+                double dx = loc_i[0] - loc_j[0];
+                double dy = loc_i[1] - loc_j[1];
+                double dz = loc_i[2] - loc_j[2];
 
                 /* Apply BC */
                 if (periodic) {
@@ -5419,11 +5442,32 @@ void engine_makeproxies(struct engine *e) {
                   dy = nearest(dy, dim[1]);
                   dz = nearest(dz, dim[2]);
                 }
+
+                /* Add to it for the case where the future CoMs are in the
+                 * corners */
+                dx += cell_width[0];
+                dy += cell_width[1];
+                dz += cell_width[2];
+
+                /* This is a crazy upper-bound but the best we can do */
                 const double r2 = dx * dx + dy * dy + dz * dz;
 
-                /* Are we too close for M2L? */
-                if (!gravity_M2L_accept(r_max_i, r_max_j, theta_crit2, r2))
-                  proxy_type |= (int)proxy_cell_type_gravity;
+                /* Minimal distance between any pair of particles */
+                const double min_radius = sqrt(r2) - 2. * r_max;
+
+                /* Are we beyond the distance where the truncated forces are 0
+                 * but not too far such that M2L can be used? */
+                if (periodic) {
+
+                  if ((min_radius < max_distance) &&
+                      (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2)))
+                    proxy_type |= (int)proxy_cell_type_gravity;
+
+                } else {
+
+                  if (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2))
+                    proxy_type |= (int)proxy_cell_type_gravity;
+                }
               }
 
               /* Abort if not in range at all */
diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c
index 2359b8a9cdf785bce719a1d0379d177d00328b9e..d49b850f98c3d011b4bc1f4a7462be31954e1e7f 100644
--- a/src/mesh_gravity.c
+++ b/src/mesh_gravity.c
@@ -314,7 +314,7 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s,
   fftw_plan inverse_plan = fftw_plan_dft_c2r_3d(
       N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
 
-  const ticks tic = getticks();
+  ticks tic = getticks();
 
   /* Zero everything */
   bzero(rho, N * N * N * sizeof(double));
@@ -324,9 +324,23 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s,
     gpart_to_mesh_CIC(&s->gparts[i], rho, N, cell_fac, dim);
 
   if (verbose)
-    message("gpart assignment took %.3f %s.",
+    message("Gpart assignment took %.3f %s.",
             clocks_from_ticks(getticks() - tic), clocks_getunit());
 
+#ifdef WITH_MPI
+
+  MPI_Barrier(MPI_COMM_WORLD);
+  tic = getticks();
+
+  /* Merge everybody's share of the density mesh */
+  MPI_Allreduce(MPI_IN_PLACE, rho, N * N * N, MPI_DOUBLE, MPI_SUM,
+                MPI_COMM_WORLD);
+
+  if (verbose)
+    message("Mesh comunication took %.3f %s.",
+            clocks_from_ticks(getticks() - tic), clocks_getunit());
+#endif
+
   /* message("\n\n\n DENSITY"); */
   /* print_array(rho, N); */
 
diff --git a/src/multipole.h b/src/multipole.h
index c05aa36890313ea22f725ee272746bdf63f597ea..2ada835e32565dc4075dd1352ba3959b2ba4766b 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -186,12 +186,12 @@ struct gravity_tensors {
     /*! The actual content */
     struct {
 
-      /*! Multipole mass */
-      struct multipole m_pole;
-
       /*! Field tensor for the potential */
       struct grav_tensor pot;
 
+      /*! Multipole mass */
+      struct multipole m_pole;
+
       /*! Centre of mass of the matter dsitribution */
       double CoM[3];
 
diff --git a/src/proxy.c b/src/proxy.c
index 8d34e18f250f3b70e74c0e4a30be2eebcb631666..55d1f7d082fe883ce6dba83791017391c48cf032 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -159,103 +159,6 @@ void proxy_tags_exchange(struct proxy *proxies, int num_proxies,
 #endif
 }
 
-/**
- * @brief Exchange the cell structures with all proxies.
- *
- * @param proxies The list of #proxy that will send/recv cells.
- * @param num_proxies The number of proxies.
- * @param s The space into which the particles will be unpacked.
- */
-void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
-                          struct space *s) {
-
-#ifdef WITH_MPI
-
-  MPI_Request *reqs;
-  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) ==
-      NULL)
-    error("Failed to allocate request buffers.");
-  MPI_Request *reqs_in = reqs;
-  MPI_Request *reqs_out = &reqs[num_proxies];
-
-  /* Run through the cells and get the size of the ones that will be sent off.
-   */
-  int count_out = 0;
-  int offset[s->nr_cells];
-  for (int k = 0; k < s->nr_cells; k++) {
-    offset[k] = count_out;
-    if (s->cells_top[k].sendto)
-      count_out +=
-          (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k]));
-  }
-
-  /* Allocate the pcells. */
-  struct pcell *pcells = NULL;
-  if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
-                     sizeof(struct pcell) * count_out) != 0)
-    error("Failed to allocate pcell buffer.");
-
-  /* Pack the cells. */
-  for (int k = 0; k < s->nr_cells; k++)
-    if (s->cells_top[k].sendto) {
-      cell_pack(&s->cells_top[k], &pcells[offset[k]]);
-      s->cells_top[k].pcell = &pcells[offset[k]];
-    }
-
-  /* Launch the first part of the exchange. */
-  for (int k = 0; k < num_proxies; k++) {
-    proxy_cells_exchange_first(&proxies[k]);
-    reqs_in[k] = proxies[k].req_cells_count_in;
-    reqs_out[k] = proxies[k].req_cells_count_out;
-  }
-
-  /* Wait for each count to come in and start the recv. */
-  for (int k = 0; k < num_proxies; k++) {
-    int pid = MPI_UNDEFINED;
-    MPI_Status status;
-    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
-        pid == MPI_UNDEFINED)
-      error("MPI_Waitany failed.");
-    // message( "request from proxy %i has arrived." , pid );
-    proxy_cells_exchange_second(&proxies[pid]);
-  }
-
-  /* Wait for all the sends to have finished too. */
-  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
-    error("MPI_Waitall on sends failed.");
-
-  /* Set the requests for the cells. */
-  for (int k = 0; k < num_proxies; k++) {
-    reqs_in[k] = proxies[k].req_cells_in;
-    reqs_out[k] = proxies[k].req_cells_out;
-  }
-
-  /* Wait for each pcell array to come in from the proxies. */
-  for (int k = 0; k < num_proxies; k++) {
-    int pid = MPI_UNDEFINED;
-    MPI_Status status;
-    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
-        pid == MPI_UNDEFINED)
-      error("MPI_Waitany failed.");
-    // message( "cell data from proxy %i has arrived." , pid );
-    for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++)
-      count += cell_unpack(&proxies[pid].pcells_in[count],
-                           proxies[pid].cells_in[j], s);
-  }
-
-  /* Wait for all the sends to have finished too. */
-  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
-    error("MPI_Waitall on sends failed.");
-
-  /* Clean up. */
-  free(reqs);
-  free(pcells);
-
-#else
-  error("SWIFT was not compiled with MPI support.");
-#endif
-}
-
 /**
  * @brief Exchange cells with a remote node, first part.
  *
@@ -350,6 +253,105 @@ void proxy_cells_exchange_second(struct proxy *p) {
 #endif
 }
 
+/**
+ * @brief Exchange the cell structures with all proxies.
+ *
+ * @param proxies The list of #proxy that will send/recv cells.
+ * @param num_proxies The number of proxies.
+ * @param s The space into which the particles will be unpacked.
+ * @param with_gravity Are we running with gravity and hence need
+ *      to exchange multipoles?
+ */
+void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
+                          struct space *s, const int with_gravity) {
+
+#ifdef WITH_MPI
+
+  MPI_Request *reqs;
+  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * num_proxies)) ==
+      NULL)
+    error("Failed to allocate request buffers.");
+  MPI_Request *reqs_in = reqs;
+  MPI_Request *reqs_out = &reqs[num_proxies];
+
+  /* Run through the cells and get the size of the ones that will be sent off.
+   */
+  int count_out = 0;
+  int offset[s->nr_cells];
+  for (int k = 0; k < s->nr_cells; k++) {
+    offset[k] = count_out;
+    if (s->cells_top[k].sendto)
+      count_out +=
+          (s->cells_top[k].pcell_size = cell_getsize(&s->cells_top[k]));
+  }
+
+  /* Allocate the pcells. */
+  struct pcell *pcells = NULL;
+  if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT,
+                     sizeof(struct pcell) * count_out) != 0)
+    error("Failed to allocate pcell buffer.");
+
+  /* Pack the cells. */
+  for (int k = 0; k < s->nr_cells; k++)
+    if (s->cells_top[k].sendto) {
+      cell_pack(&s->cells_top[k], &pcells[offset[k]], with_gravity);
+      s->cells_top[k].pcell = &pcells[offset[k]];
+    }
+
+  /* Launch the first part of the exchange. */
+  for (int k = 0; k < num_proxies; k++) {
+    proxy_cells_exchange_first(&proxies[k]);
+    reqs_in[k] = proxies[k].req_cells_count_in;
+    reqs_out[k] = proxies[k].req_cells_count_out;
+  }
+
+  /* Wait for each count to come in and start the recv. */
+  for (int k = 0; k < num_proxies; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    // message( "request from proxy %i has arrived." , pid );
+    proxy_cells_exchange_second(&proxies[pid]);
+  }
+
+  /* Wait for all the sends to have finished too. */
+  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Set the requests for the cells. */
+  for (int k = 0; k < num_proxies; k++) {
+    reqs_in[k] = proxies[k].req_cells_in;
+    reqs_out[k] = proxies[k].req_cells_out;
+  }
+
+  /* Wait for each pcell array to come in from the proxies. */
+  for (int k = 0; k < num_proxies; k++) {
+    int pid = MPI_UNDEFINED;
+    MPI_Status status;
+    if (MPI_Waitany(num_proxies, reqs_in, &pid, &status) != MPI_SUCCESS ||
+        pid == MPI_UNDEFINED)
+      error("MPI_Waitany failed.");
+    // message( "cell data from proxy %i has arrived." , pid );
+    for (int count = 0, j = 0; j < proxies[pid].nr_cells_in; j++)
+      count += cell_unpack(&proxies[pid].pcells_in[count],
+                           proxies[pid].cells_in[j], s, with_gravity);
+  }
+
+  /* Wait for all the sends to have finished too. */
+  if (MPI_Waitall(num_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
+    error("MPI_Waitall on sends failed.");
+
+  /* Clean up. */
+  free(reqs);
+  free(pcells);
+
+#else
+  error("SWIFT was not compiled with MPI support.");
+#endif
+}
+
 /**
  * @brief Add a cell to the given proxy's input list.
  *
diff --git a/src/proxy.h b/src/proxy.h
index 63f51846d0bfb646e1f2f9209413437b4c966983..fbc3f1a163333146b26c937fae55a7fc4c1f12d7 100644
--- a/src/proxy.h
+++ b/src/proxy.h
@@ -102,9 +102,7 @@ void proxy_parts_exchange_second(struct proxy *p);
 void proxy_addcell_in(struct proxy *p, struct cell *c, int type);
 void proxy_addcell_out(struct proxy *p, struct cell *c, int type);
 void proxy_cells_exchange(struct proxy *proxies, int num_proxies,
-                          struct space *s);
-void proxy_cells_exchange_first(struct proxy *p);
-void proxy_cells_exchange_second(struct proxy *p);
+                          struct space *s, int with_gravity);
 void proxy_tags_exchange(struct proxy *proxies, int num_proxies,
                          struct space *s);
 
diff --git a/src/runner.c b/src/runner.c
index 7e67c6e943f7493bf2e472da622291653cfe9001..94d36f0f9c0c450c20a961327808cf203f13bafb 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1955,11 +1955,6 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) {
       if (gparts[k].time_bin == time_bin_inhibited) continue;
       time_bin_min = min(time_bin_min, gparts[k].time_bin);
       time_bin_max = max(time_bin_max, gparts[k].time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (gparts[k].ti_drift != ti_current)
-        error("Received un-drifted g-particle !");
-#endif
     }
 
     /* Convert into a time */
@@ -2034,11 +2029,6 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) {
       if (sparts[k].time_bin == time_bin_inhibited) continue;
       time_bin_min = min(time_bin_min, sparts[k].time_bin);
       time_bin_max = max(time_bin_max, sparts[k].time_bin);
-
-#ifdef SWIFT_DEBUG_CHECKS
-      if (sparts[k].ti_drift != ti_current)
-        error("Received un-drifted s-particle !");
-#endif
     }
 
     /* Convert into a time */
@@ -2068,8 +2058,8 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) {
 #endif
 
   /* ... and store. */
-  c->ti_gravity_end_min = ti_gravity_end_min;
-  c->ti_gravity_end_max = ti_gravity_end_max;
+  // c->ti_gravity_end_min = ti_gravity_end_min;
+  // c->ti_gravity_end_max = ti_gravity_end_max;
   c->ti_old_gpart = ti_current;
 
   if (timer) TIMER_TOC(timer_dorecv_spart);
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 240f812984362fcd4130f2465ff1b176bb6fb067..34d3e690dd27f1d55511847b8a921b9c9ba84aac 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -1144,7 +1144,7 @@ static INLINE void runner_dopair_grav_mm(struct runner *r,
   TIMER_TIC;
 
   /* Anything to do here? */
-  if (!cell_is_active_gravity(ci, e) || ci->nodeID != engine_rank) return;
+  if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return;
 
   /* Short-cut to the multipole */
   const struct multipole *multi_j = &cj->multipole->m_pole;
@@ -1485,8 +1485,8 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
     error("Running M-M task with two inactive cells.");
 #endif
 
-  if (cell_is_active_gravity(ci, e)) runner_dopair_grav_mm(r, ci, cj);
-  if (cell_is_active_gravity(cj, e)) runner_dopair_grav_mm(r, cj, ci);
+  if (cell_is_active_gravity_mm(ci, e)) runner_dopair_grav_mm(r, ci, cj);
+  if (cell_is_active_gravity_mm(cj, e)) runner_dopair_grav_mm(r, cj, ci);
 }
 
 /**
diff --git a/src/scheduler.c b/src/scheduler.c
index c751a75fb6ed55491202e55b823f8f0fca61da17..a90a3f1b0fa2401a6bc128069486c679df535435 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -804,11 +804,8 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
  */
 static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
 
-/* Temporarily prevent MPI here */
-#ifndef WITH_MPI
   const struct space *sp = s->space;
   struct engine *e = sp->e;
-#endif
 
   /* Iterate on this task until we're done with it. */
   int redo = 1;
@@ -838,9 +835,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
         break;
       }
 
-/* Temporarily prevent MPI here */
-#ifndef WITH_MPI
-
       /* Should we split this task? */
       if (cell_can_split_self_gravity_task(ci)) {
 
@@ -879,7 +873,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
           } /* Self-gravity only */
         }   /* Make tasks explicitly */
       }     /* Cell is split */
-#endif      /* WITH_MPI */
     }       /* Self interaction */
 
     /* Pair interaction? */
@@ -895,20 +888,17 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
         break;
       }
 
-/* Temporarily prevent MPI here */
-#ifndef WITH_MPI
-
       /* Should we replace it with an M-M task? */
-      if (cell_can_use_pair_mm(ci, cj, e, sp)) {
+      if (cell_can_use_pair_mm_rebuild(ci, cj, e, sp)) {
 
         t->type = task_type_grav_mm;
         t->subtype = task_subtype_none;
 
         /* Since this task will not be split, we can already link it */
-        atomic_inc(&ci->nr_tasks);
-        atomic_inc(&cj->nr_tasks);
-        engine_addlink(e, &ci->grav, t);
-        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&ci->nr_mm_tasks);
+        atomic_inc(&cj->nr_mm_tasks);
+        engine_addlink(e, &ci->grav_mm, t);
+        engine_addlink(e, &cj->grav_mm, t);
         break;
       }
 
@@ -916,9 +906,12 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
       if (cell_can_split_pair_gravity_task(ci) &&
           cell_can_split_pair_gravity_task(cj)) {
 
+        const long long gcount_i = ci->gcount;
+        const long long gcount_j = cj->gcount;
+
         /* Replace by a single sub-task? */
         if (scheduler_dosub && /* Use division to avoid integer overflow. */
-            ci->gcount < space_subsize_pair_grav / cj->gcount) {
+            gcount_i * gcount_j < ((long long)space_subsize_pair_grav)) {
 
           /* Otherwise, split it. */
         } else {
@@ -954,9 +947,8 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
           }
         } /* Split the pair */
       }
-#endif /* WITH_MPI */
-    }  /* pair interaction? */
-  }    /* iterate over the current task. */
+    } /* pair interaction? */
+  }   /* iterate over the current task. */
 }
 
 /**
diff --git a/src/space.c b/src/space.c
index 797757f71e36e808f652c14ece5d54de9ed3d12d..9a9dd05c237a69d034bf618490c834ade5ee33f7 100644
--- a/src/space.c
+++ b/src/space.c
@@ -164,10 +164,12 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                          multipole_rec_end);
     c->sorts = NULL;
     c->nr_tasks = 0;
+    c->nr_mm_tasks = 0;
     c->density = NULL;
     c->gradient = NULL;
     c->force = NULL;
     c->grav = NULL;
+    c->grav_mm = NULL;
     c->dx_max_part = 0.0f;
     c->dx_max_sort = 0.0f;
     c->sorted = 0;
@@ -202,6 +204,13 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements,
     c->do_sub_sort = 0;
     c->do_grav_sub_drift = 0;
     c->do_sub_drift = 0;
+    c->ti_hydro_end_min = -1;
+    c->ti_hydro_end_max = -1;
+    c->ti_gravity_end_min = -1;
+    c->ti_gravity_end_max = -1;
+#ifdef SWIFT_DEBUG_CHECKS
+    c->cellID = 0;
+#endif
     if (s->gravity) bzero(c->multipole, sizeof(struct gravity_tensors));
     for (int i = 0; i < 13; i++)
       if (c->sort[i] != NULL) {
@@ -334,7 +343,7 @@ void space_regrid(struct space *s, int verbose) {
 
   /* Are we about to allocate new top level cells without a regrid?
    * Can happen when restarting the application. */
-  int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL);
+  const int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL);
 #endif
 
   /* Do we need to re-build the upper-level cells? */
@@ -428,6 +437,10 @@ void space_regrid(struct space *s, int verbose) {
           c->tag = -1;
 #endif  // WITH_MPI
           if (s->gravity) c->multipole = &s->multipoles_top[cid];
+#ifdef SWIFT_DEBUG_CHECKS
+          c->cellID = -last_cell_id;
+          last_cell_id++;
+#endif
         }
 
     /* Be verbose about the change. */
@@ -919,6 +932,12 @@ void space_rebuild(struct space *s, int verbose) {
     c->ti_old_part = ti_current;
     c->ti_old_gpart = ti_current;
     c->ti_old_multipole = ti_current;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    c->cellID = -last_cell_id;
+    last_cell_id++;
+#endif
+
     if (c->nodeID == engine_rank) {
       c->parts = finger;
       c->xparts = xfinger;
@@ -2903,6 +2922,10 @@ void space_init(struct space *s, struct swift_params *params,
   /* Init the space lock. */
   if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock.");
 
+#ifdef SWIFT_DEBUG_CHECKS
+  last_cell_id = 1;
+#endif
+
   /* Build the cells recursively. */
   if (!dry_run) space_regrid(s, verbose);
 }