diff --git a/examples/ZeldovichPancake_3D/plotSolution.py b/examples/ZeldovichPancake_3D/plotSolution.py
index a040683a4e80ca2298d4b16f269bdabae0b00508..dd1c6243e4637b25da187e93c89490a655303153 100644
--- a/examples/ZeldovichPancake_3D/plotSolution.py
+++ b/examples/ZeldovichPancake_3D/plotSolution.py
@@ -192,4 +192,4 @@ ylim(0, 1)
 xticks([])
 yticks([])
 
-savefig("ZeldovichPancake_%.3d.png"%snap, dpi=200)
+savefig("ZeldovichPancake_%.4d.png"%snap, dpi=200)
diff --git a/examples/ZeldovichPancake_3D/zeldovichPancake.yml b/examples/ZeldovichPancake_3D/zeldovichPancake.yml
index d0bbe55222447ab5967c6c46b956fc1d6251ede4..39e02e3aab685bc4435d80e1d756b0b0c843c879 100644
--- a/examples/ZeldovichPancake_3D/zeldovichPancake.yml
+++ b/examples/ZeldovichPancake_3D/zeldovichPancake.yml
@@ -46,7 +46,6 @@ Cosmology:
 Gravity:
   mesh_side_length:   16
   eta: 0.025
-  theta: 0.85
-  r_cut_max: 5.
+  theta: 0.8
   comoving_softening: 0.0001
   max_physical_softening: 0.0001
diff --git a/src/engine.c b/src/engine.c
index c0eabed649881165d534ad16d584a7c56b310861..056720265fc5db3e21e18dc5f5d9e8a0efe200a4 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -2379,8 +2379,16 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
     }
 
     /* Recover the multipole information */
-    struct gravity_tensors *const multi_i = ci->multipole;
+    const struct gravity_tensors *const multi_i = ci->multipole;
     const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], multi_i->CoM[2]};
+    const double r_max_i = multi_i->r_max;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    if (multi_i->r_max != multi_i->r_max_rebuild)
+      error(
+          "Multipole size not equal ot it's size after rebuild. But we just "
+          "rebuilt...");
+#endif
 
     /* Loop over every other cell */
     for (int ii = 0; ii < cdim[0]; ii++) {
@@ -2414,8 +2422,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           const double r2 = dx * dx + dy * dy + dz * dz;
 
           /* Are the cells too close for a MM interaction ? */
-          if (!gravity_M2L_accept(multi_i->r_max_rebuild,
-                                  multi_j->r_max_rebuild, theta_crit2, r2)) {
+          if (!gravity_M2L_accept(r_max_i, multi_j->r_max, theta_crit2, r2)) {
 
             /* Ok, we need to add a direct pair calculation */
             scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0,
@@ -3155,7 +3162,7 @@ void engine_maketasks(struct engine *e) {
 #else
   const int hydro_tasks_per_cell = 27 * 2;
 #endif
-  const int self_grav_tasks_per_cell = 27 * 2;
+  const int self_grav_tasks_per_cell = 125;
   const int ext_grav_tasks_per_cell = 1;
 
   if (e->policy & engine_policy_hydro)
@@ -3730,7 +3737,7 @@ int engine_estimate_nr_tasks(struct engine *e) {
 #endif
   }
   if (e->policy & engine_policy_self_gravity) {
-    n1 += 32;
+    n1 += 125;
     n2 += 1;
 #ifdef WITH_MPI
     n2 += 2;
diff --git a/src/gravity_cache.h b/src/gravity_cache.h
index e5e78900afa1d1ed5cb66c58fbecb626ed9cbd8b..5d2338127acabc076e27ccaee9c1bc8748dbd661 100644
--- a/src/gravity_cache.h
+++ b/src/gravity_cache.h
@@ -286,6 +286,72 @@ gravity_cache_populate_no_mpole(const timebin_t max_active_bin,
   }
 }
 
+/**
+ * @brief Fills a #gravity_cache structure with some #gpart and make them use
+ * the multi-pole.
+ *
+ * @param max_active_bin The largest active bin in the current time-step.
+ * @param c The #gravity_cache to fill.
+ * @param gparts The #gpart array to read from.
+ * @param gcount The number of particles to read.
+ * @param gcount_padded The number of particle to read padded to the next
+ * multiple of the vector length.
+ * @param cell The cell we play with (to get reasonable padding positions).
+ * @param grav_props The global gravity properties.
+ */
+__attribute__((always_inline)) INLINE static void
+gravity_cache_populate_all_mpole(const timebin_t max_active_bin,
+                                 struct gravity_cache *c,
+                                 const struct gpart *restrict gparts,
+                                 const int gcount, const int gcount_padded,
+                                 const struct cell *cell,
+                                 const struct gravity_props *grav_props) {
+
+  /* Make the compiler understand we are in happy vectorization land */
+  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
+  swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
+                            SWIFT_CACHE_ALIGNMENT);
+  swift_assume_size(gcount_padded, VEC_SIZE);
+
+  /* Fill the input caches */
+  for (int i = 0; i < gcount; ++i) {
+    x[i] = (float)(gparts[i].x[0]);
+    y[i] = (float)(gparts[i].x[1]);
+    z[i] = (float)(gparts[i].x[2]);
+    epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
+    m[i] = gparts[i].mass;
+    active[i] = (int)(gparts[i].time_bin <= max_active_bin);
+    use_mpole[i] = 1;
+  }
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (gcount_padded < gcount) error("Padded counter smaller than counter");
+#endif
+
+  /* Particles used for padding should get impossible positions
+   * that have a reasonable magnitude. We use the cell width for this */
+  const float pos_padded[3] = {-2.f * (float)cell->width[0],
+                               -2.f * (float)cell->width[1],
+                               -2.f * (float)cell->width[2]};
+  const float eps_padded = epsilon[0];
+
+  /* Pad the caches */
+  for (int i = gcount; i < gcount_padded; ++i) {
+    x[i] = pos_padded[0];
+    y[i] = pos_padded[1];
+    z[i] = pos_padded[2];
+    epsilon[i] = eps_padded;
+    m[i] = 0.f;
+    active[i] = 0;
+    use_mpole[i] = 0;
+  }
+}
+
 /**
  * @brief Write the output cache values back to the active #gpart.
  *
diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h
index 66771439a42553c05e6d766a953fe349e2abbe7c..f11f760022d326cafd2ae5da2bc535273b8cc947 100644
--- a/src/kernel_long_gravity.h
+++ b/src/kernel_long_gravity.h
@@ -34,7 +34,7 @@
 #define GADGET2_LONG_RANGE_CORRECTION
 
 #ifdef GADGET2_LONG_RANGE_CORRECTION
-#define kernel_long_gravity_truncation_name "Gadget-2 (error function)"
+#define kernel_long_gravity_truncation_name "Gadget-like (using erfc())"
 #else
 #define kernel_long_gravity_truncation_name "Exp-based Sigmoid"
 #endif
diff --git a/src/multipole.h b/src/multipole.h
index 231a4f5f1646a7baf41766a431991782256e55d0..d75704db2514ba6f1f175a47678a42a85a40eeaa 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -2397,7 +2397,8 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb,
  * multipoles.
  */
 __attribute__((always_inline)) INLINE static int gravity_M2L_accept(
-    double r_crit_a, double r_crit_b, double theta_crit2, double r2) {
+    const double r_crit_a, const double r_crit_b, const double theta_crit2,
+    const double r2) {
 
   const double size = r_crit_a + r_crit_b;
   const double size2 = size * size;
@@ -2410,8 +2411,7 @@ __attribute__((always_inline)) INLINE static int gravity_M2L_accept(
 
 /**
  * @brief Checks whether a particle-cell interaction can be appromixated by a
- * M2P
- * interaction using the distance and cell radius.
+ * M2P interaction using the distance and cell radius.
  *
  * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179,
  * Issue 1, pp.27-42, equation 10.
@@ -2419,10 +2419,10 @@ __attribute__((always_inline)) INLINE static int gravity_M2L_accept(
  * @param r_max2 The square of the size of the multipole.
  * @param theta_crit2 The square of the critical opening angle.
  * @param r2 Square of the distance (periodically wrapped) between the
- * multipoles.
+ * particle and the multipole.
  */
 __attribute__((always_inline)) INLINE static int gravity_M2P_accept(
-    float r_max2, float theta_crit2, float r2) {
+    const float r_max2, const float theta_crit2, const float r2) {
 
   // MATTHIEU: Make this mass-dependent ?
 
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index c8cf98070895253852acfd760af8b2077d77dbf4..5109cc77a670481dcca37f080efe21c0a0c9bbf8 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -154,7 +154,7 @@ static INLINE void runner_dopair_grav_pp_full(
     struct gravity_cache *restrict cj_cache, const int gcount_i,
     const int gcount_j, const int gcount_padded_j, const int periodic,
     const float dim[3], const struct engine *restrict e,
-    struct gpart *restrict gparts_i, struct gpart *restrict gparts_j) {
+    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
 
   /* Loop over all particles in ci... */
   for (int pid = 0; pid < gcount_i; pid++) {
@@ -277,7 +277,7 @@ static INLINE void runner_dopair_grav_pp_truncated(
     struct gravity_cache *restrict cj_cache, const int gcount_i,
     const int gcount_j, const int gcount_padded_j, const float dim[3],
     const float r_s_inv, const struct engine *restrict e,
-    struct gpart *restrict gparts_i, struct gpart *restrict gparts_j) {
+    struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) {
 
 #ifdef SWIFT_DEBUG_CHECKS
   if (!e->s->periodic)
@@ -455,7 +455,7 @@ static INLINE void runner_dopair_grav_pm_full(
 
     const float r2 = dx * dx + dy * dy + dz * dz;
 
-#ifdef SWIFT_DEBUG_CHECKS
+#ifdef SWIFT_DEBUG_CHECKSa
     const float r_max_j = cj->multipole->r_max;
     const float r_max2 = r_max_j * r_max_j;
     const float theta_crit2 = e->gravity_properties->theta_crit2;
@@ -572,7 +572,7 @@ static INLINE void runner_dopair_grav_pm_truncated(
 
     const float r2 = dx * dx + dy * dy + dz * dz;
 
-#ifdef SWIFT_DEBUG_CHECKS
+#ifdef SWIFT_DEBUG_CHECKSa
     const float r_max_j = cj->multipole->r_max;
     const float r_max2 = r_max_j * r_max_j;
     const float theta_crit2 = e->gravity_properties->theta_crit2;
@@ -1152,15 +1152,92 @@ static INLINE void runner_dopair_grav_mm(struct runner *r,
         cj->ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
 
   /* Let's interact at this level */
-  if (0)
-    gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM,
-                cj->multipole->CoM, props, periodic, dim, r_s_inv);
-
-  runner_dopair_grav_pp(r, ci, cj, 0);
+  gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM,
+              cj->multipole->CoM, props, periodic, dim, r_s_inv);
 
   TIMER_TOC(timer_dopair_grav_mm);
 }
 
+static INLINE void runner_dopair_recursive_grav_pm(struct runner *r,
+                                                   struct cell *ci,
+                                                   const struct cell *cj) {
+  /* Some constants */
+  const struct engine *e = r->e;
+  const int periodic = e->mesh->periodic;
+  const float dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]};
+  const float r_s_inv = e->mesh->r_s_inv;
+
+  /* Anything to do here? */
+  if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Early abort? */
+  if (ci->gcount == 0 || cj->gcount == 0)
+    error("Doing pair gravity on an empty cell !");
+
+  /* Sanity check */
+  if (ci == cj) error("Pair interaction between a cell and itself.");
+
+  if (cj->ti_old_multipole != e->ti_current)
+    error("cj->multipole not drifted.");
+#endif
+
+  /* Can we recurse further? */
+  if (ci->split) {
+
+    /* Loop over ci's children */
+    for (int k = 0; k < 8; k++) {
+      if (ci->progeny[k] != NULL)
+        runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj);
+    }
+
+    /* Ok, let's do the interaction here */
+  } else {
+
+    /* Start by constructing particle caches */
+
+    /* Cache to play with */
+    struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+
+    /* Computed the padded counts */
+    const int gcount_i = ci->gcount;
+    const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+
+#ifdef SWIFT_DEBUG_CHECKS
+    /* Check that we fit in cache */
+    if (gcount_i > ci_cache->count)
+      error("Not enough space in the cache! gcount_i=%d", gcount_i);
+#endif
+
+    /* Fill the cache */
+    gravity_cache_populate_all_mpole(e->max_active_bin, ci_cache, ci->gparts,
+                                     gcount_i, gcount_padded_i, ci,
+                                     e->gravity_properties);
+
+    /* Recover the multipole info and the CoM locations */
+    const struct multipole *multi_j = &cj->multipole->m_pole;
+    const float CoM_j[3] = {(float)(cj->multipole->CoM[0]),
+                            (float)(cj->multipole->CoM[1]),
+                            (float)(cj->multipole->CoM[2])};
+
+    /* Can we use the Newtonian version or do we need the truncated one ? */
+    if (!periodic) {
+
+      runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                 periodic, dim, e, ci->gparts, gcount_i, cj);
+
+    } else {
+
+      runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j,
+                                      dim, r_s_inv, e, ci->gparts, gcount_i,
+                                      cj);
+    }
+
+    /* Write back to the particles */
+    gravity_cache_write_back(ci_cache, ci->gparts, gcount_i);
+  }
+}
+
 /**
  * @brief Computes the interaction of all the particles in a cell with all the
  * particles of another cell.
@@ -1392,8 +1469,6 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
   const double theta_crit2 = e->gravity_properties->theta_crit2;
   const double max_distance = e->mesh->r_cut_max;
 
-  const int cdim[3] = {e->s->cdim[0], e->s->cdim[1], e->s->cdim[2]};
-
   TIMER_TIC;
 
   /* Recover the list of top-level cells */
@@ -1412,12 +1487,15 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
 
   /* Recover the local multipole */
   struct gravity_tensors *const multi_i = ci->multipole;
-  // const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1],
-  // multi_i->CoM[2]};
   const double CoM_rebuild_i[3] = {multi_i->CoM_rebuild[0],
                                    multi_i->CoM_rebuild[1],
                                    multi_i->CoM_rebuild[2]};
+
+  /* Flag that contributions will be recieved */
+  multi_i->pot.interacted = 1;
+
   int cc = 0;
+
   /* Loop over all the top-level cells and go for a M-M interaction if
    * well-separated */
   for (int n = 0; n < nr_cells; ++n) {
@@ -1455,9 +1533,6 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
       /* Are we beyond the distance where the truncated forces are 0 ?*/
       if (periodic && max_radius > max_distance) {
 
-/* message("hello"); */
-/* message("max_r = %e max_distance = %e", max_radius, max_distance); */
-
 #ifdef SWIFT_DEBUG_CHECKS
         /* Need to account for the interactions we missed */
         multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
@@ -1465,48 +1540,15 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci,
         continue;
       }
 
-      const int cid = n;
-      const int i = cid / (cdim[1] * cdim[2]);
-      const int j = (cid / cdim[2]) % cdim[1];
-      const int k = cid % cdim[2];
-
+      /* Call the PM interaction fucntion on the active sub-cells of ci */
       ++cc;
-      if (0) message("Interacting with [%d %d %d] (%d)", i, j, k, cc);
-
-      // MATTHIEU
-      runner_dopair_grav_mm(r, ci, cj);
-      continue;
-
-      /*       /\* Let's compute the current distance between the cell pair*\/
-       */
-      /*       double dx = CoM_i[0] - multi_j->CoM[0]; */
-      /*       double dy = CoM_i[1] - multi_j->CoM[1]; */
-      /*       double dz = CoM_i[2] - multi_j->CoM[2]; */
-
-      /*       /\* Apply BC *\/ */
-      /*       if (periodic) { */
-      /*         dx = nearest(dx, dim[0]); */
-      /*         dy = nearest(dy, dim[1]); */
-      /*         dz = nearest(dz, dim[2]); */
-      /*       } */
-      /*       const double r2 = dx * dx + dy * dy + dz * dz; */
-
-      /*       /\* Check the multipole acceptance criterion *\/ */
-      /*       if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max,
-       * theta_crit2, r2)) { */
-
-      /*         /\* Go for a (non-symmetric) M-M calculation *\/ */
-      /*         runner_dopair_grav_mm(r, ci, cj); */
-      /*       } else { */
-      /*         /\* Alright, we have to take charge of that pair in a different
-       * way. *\/ */
-      /*         // MATTHIEU: We should actually open the tree-node here and
-       * recurse. */
-      /*         runner_dopair_grav_mm(r, ci, cj); */
-      /* } */
+      runner_dopair_recursive_grav_pm(r, ci, cj);
+
     } /* We are in charge of this pair */
   }   /* Loop over top-level cells */
 
+  message("cc=%d", cc);
+
   if (timer) TIMER_TOC(timer_dograv_long_range);
 }