diff --git a/src/cell.c b/src/cell.c
index 94f8c4a10520936cc862807e390bd6a08cea7dba..a2e55e3c0194baf2ae6509317b243f5304bfdb70 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -2384,10 +2384,16 @@ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) {
 
   for (int ind = 0; ind < num_elements; ind++) {
     struct cell *c = &((struct cell *)map_data)[ind];
+
+    /* Super-pointer for hydro */
     if (e->policy & engine_policy_hydro) cell_set_super_hydro(c, NULL);
-    if (e->policy &
-        (engine_policy_self_gravity | engine_policy_external_gravity))
+
+    /* Super-pointer for gravity */
+    if ((e->policy & engine_policy_self_gravity) ||
+        (e->policy & engine_policy_external_gravity))
       cell_set_super_gravity(c, NULL);
+
+    /* Super-pointer for common operations */
     cell_set_super(c, NULL);
   }
 }
@@ -2762,3 +2768,38 @@ void cell_check_timesteps(struct cell *c) {
   error("Calling debugging code without debugging flag activated.");
 #endif
 }
+
+/**
+ * @brief Can we use the MM interactions fo a given pair of cells?
+ *
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ * @param e The #engine.
+ * @param s The #space.
+ */
+int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj,
+                         const struct engine *e, const struct space *s) {
+
+  const double theta_crit2 = e->gravity_properties->theta_crit2;
+  const int periodic = s->periodic;
+  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
+
+  /* Recover the multipole information */
+  const struct gravity_tensors *const multi_i = ci->multipole;
+  const struct gravity_tensors *const multi_j = cj->multipole;
+
+  /* Get the distance between the CoMs */
+  double dx = multi_i->CoM[0] - multi_j->CoM[0];
+  double dy = multi_i->CoM[1] - multi_j->CoM[1];
+  double dz = multi_i->CoM[2] - multi_j->CoM[2];
+
+  /* Apply BC */
+  if (periodic) {
+    dx = nearest(dx, dim[0]);
+    dy = nearest(dy, dim[1]);
+    dz = nearest(dz, dim[2]);
+  }
+  const double r2 = dx * dx + dy * dy + dz * dz;
+
+  return gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2);
+}
diff --git a/src/cell.h b/src/cell.h
index e53bf7e305c78df4af1a093a2fff4c9689d94ef2..62ac0d0535b49d3e0b3a3affdc645522c0b53659 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -521,6 +521,8 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s);
 void cell_clear_drift_flags(struct cell *c, void *data);
 void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data);
 int cell_has_tasks(struct cell *c);
+int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj,
+                         const struct engine *e, const struct space *s);
 
 /* Inlined functions (for speed). */
 
diff --git a/src/engine.c b/src/engine.c
index ac43011499aa924c74bf004084b9387774bed2d4..dfe97b2d466f5ee539a644f6bcf3927117f0f512 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -2339,7 +2339,6 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
   const int periodic = s->periodic;
   const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
   const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
-  const double theta_crit2 = e->gravity_properties->theta_crit2;
   struct cell *cells = s->cells_top;
   const double max_distance = e->mesh->r_cut_max;
 
@@ -2364,7 +2363,6 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
     /* Recover the multipole information */
     const struct gravity_tensors *const multi_i = ci->multipole;
     const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], multi_i->CoM[2]};
-    const double r_max_i = multi_i->r_max;
 
 #ifdef SWIFT_DEBUG_CHECKS
     if (multi_i->r_max != multi_i->r_max_rebuild)
@@ -2412,7 +2410,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
           if (periodic && min_radius > max_distance) continue;
 
           /* Are the cells too close for a MM interaction ? */
-          if (!gravity_M2L_accept(r_max_i, multi_j->r_max, theta_crit2, r2)) {
+          if (!cell_can_use_pair_mm(ci, cj, e, s)) {
 
             /* Ok, we need to add a direct pair calculation */
             scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0,
@@ -3678,7 +3676,7 @@ int engine_estimate_nr_tasks(struct engine *e) {
   }
   if (e->policy & engine_policy_self_gravity) {
     n1 += 125;
-    n2 += 1;
+    n2 += 8;
 #ifdef WITH_MPI
     n2 += 2;
 #endif
@@ -4192,7 +4190,7 @@ void engine_skip_force_and_kick(struct engine *e) {
         t->type == task_type_kick1 || t->type == task_type_kick2 ||
         t->type == task_type_timestep || t->subtype == task_subtype_force ||
         t->subtype == task_subtype_grav || t->type == task_type_end_force ||
-        t->type == task_type_grav_long_range ||
+        t->type == task_type_grav_long_range || t->type == task_type_grav_mm ||
         t->type == task_type_grav_down || t->type == task_type_cooling ||
         t->type == task_type_sourceterms)
       t->skip = 1;
diff --git a/src/runner.c b/src/runner.c
index 1590e478e95e5c285f535c7d45fa9c43e4566617..ffe1b13745f8b713e19c47e9efa7e8c94010622e 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -2219,6 +2219,9 @@ void *runner_main(void *data) {
         case task_type_grav_long_range:
           runner_do_grav_long_range(r, t->ci, 1);
           break;
+        case task_type_grav_mm:
+          runner_dopair_grav_mm_symmetric(r, t->ci, t->cj);
+          break;
         case task_type_cooling:
           runner_do_cooling(r, t->ci, 1);
           break;
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 05c523657b22b8e23ef44bde32ed6b42f6246db2..c6cd06c79efbdb6cd2c84880a601ebf402f484f9 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -1475,6 +1475,27 @@ static INLINE void runner_doself_recursive_grav(struct runner *r,
   if (gettimer) TIMER_TOC(timer_dosub_self_grav);
 }
 
+/**
+ * @brief Call the non-symmetric M-M calculation on two cells if active.
+ *
+ * @param r The #runner object.
+ * @param ci The first #cell.
+ * @param cj The second #cell.
+ */
+void runner_dopair_grav_mm_symmetric(struct runner *r, struct cell *ci,
+                                     struct cell *cj) {
+
+  const struct engine *e = r->e;
+
+#ifdef SWIFT_DEBUG_CHECKS
+  if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e))
+    error("Running M-M task with two inactive cells.");
+#endif
+
+  if (cell_is_active_gravity(ci, e)) runner_dopair_grav_mm(r, ci, cj);
+  if (cell_is_active_gravity(cj, e)) runner_dopair_grav_mm(r, cj, ci);
+}
+
 /**
  * @brief Performs all M-M interactions between a given top-level cell and all
  * the other top-levels that are far enough.
diff --git a/src/scheduler.c b/src/scheduler.c
index da38255b79431cc3c181c4e435204a33111a7257..c7376ab56638e38f455ddb8ce56e4deefc7cbb8f 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -241,7 +241,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) {
   int density_cluster[4] = {0};
   int gradient_cluster[4] = {0};
   int force_cluster[4] = {0};
-  int gravity_cluster[4] = {0};
+  int gravity_cluster[5] = {0};
 
   /* Check whether we need to construct a group of tasks */
   for (int type = 0; type < task_type_count; ++type) {
@@ -265,6 +265,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) {
         }
         if (type == task_type_grav_mesh) gravity_cluster[2] = 1;
         if (type == task_type_grav_long_range) gravity_cluster[3] = 1;
+        if (type == task_type_grav_mm) gravity_cluster[4] = 1;
       }
     }
   }
@@ -307,6 +308,8 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) {
     fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_mesh]);
   if (gravity_cluster[3])
     fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_long_range]);
+  if (gravity_cluster[4])
+    fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_mm]);
   fprintf(f, "\t};\n");
 
   /* Be clean */
@@ -800,6 +803,9 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
  */
 static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
 
+  const struct space *sp = s->space;
+  const struct engine *e = sp->e;
+
   /* Iterate on this task until we're done with it. */
   int redo = 1;
   while (redo) {
@@ -820,7 +826,7 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
     if (t->type == task_type_self) {
 
       /* Get a handle on the cell involved. */
-      struct cell *ci = t->ci;
+      const struct cell *ci = t->ci;
 
       /* Foreign task? */
       if (ci->nodeID != s->nodeID) {
@@ -829,50 +835,106 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {
       }
 
       /* Should we split this task? */
-      if (ci->split && ci->gcount > space_subsize_self_grav) {
-
-        /* Take a step back (we're going to recycle the current task)... */
-        redo = 1;
-
-        /* Add the self tasks. */
-        int first_child = 0;
-        while (ci->progeny[first_child] == NULL) first_child++;
-        t->ci = ci->progeny[first_child];
-        for (int k = first_child + 1; k < 8; k++)
-          if (ci->progeny[k] != NULL)
-            scheduler_splittask_gravity(
-                scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
-                                  ci->progeny[k], NULL),
-                s);
-
-        /* Make a task for each pair of progeny */
-        if (t->subtype != task_subtype_external_grav) {
-          for (int j = 0; j < 8; j++)
-            if (ci->progeny[j] != NULL)
-              for (int k = j + 1; k < 8; k++)
-                if (ci->progeny[k] != NULL)
-                  scheduler_splittask_gravity(
-                      scheduler_addtask(s, task_type_pair, t->subtype,
-                                        sub_sid_flag[j][k], 0, ci->progeny[j],
-                                        ci->progeny[k]),
-                      s);
-        }
-      } /* Cell is split */
+      if (ci->split) {
 
-    } /* Self interaction */
+        if (scheduler_dosub && ci->gcount < space_subsize_self_grav) {
+
+          /* Otherwise, split it. */
+        } else {
+
+          /* Take a step back (we're going to recycle the current task)... */
+          redo = 1;
+
+          /* Add the self tasks. */
+          int first_child = 0;
+          while (ci->progeny[first_child] == NULL) first_child++;
+          t->ci = ci->progeny[first_child];
+          for (int k = first_child + 1; k < 8; k++)
+            if (ci->progeny[k] != NULL)
+              scheduler_splittask_gravity(
+                  scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
+                                    ci->progeny[k], NULL),
+                  s);
+
+          /* Make a task for each pair of progeny */
+          if (t->subtype != task_subtype_external_grav) {
+            for (int j = 0; j < 8; j++)
+              if (ci->progeny[j] != NULL)
+                for (int k = j + 1; k < 8; k++)
+                  if (ci->progeny[k] != NULL)
+                    scheduler_splittask_gravity(
+                        scheduler_addtask(s, task_type_pair, t->subtype,
+                                          sub_sid_flag[j][k], 0, ci->progeny[j],
+                                          ci->progeny[k]),
+                        s);
+
+          } /* Self-gravity only */
+        }   /* Make tasks explicitly */
+      }     /* Cell is split */
+    }       /* Self interaction */
 
     /* Pair interaction? */
     else if (t->type == task_type_pair) {
 
       /* Get a handle on the cells involved. */
-      struct cell *ci = t->ci;
-      struct cell *cj = t->cj;
+      const struct cell *ci = t->ci;
+      const struct cell *cj = t->cj;
 
       /* Foreign task? */
       if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
         t->skip = 1;
         break;
       }
+
+      /* Should we replace it with an M-M task? */
+      if (cell_can_use_pair_mm(ci, cj, e, sp)) {
+
+        t->type = task_type_grav_mm;
+        t->subtype = task_subtype_none;
+        break;
+      }
+
+      /* Should this task be split-up? */
+      if (ci->split && cj->split) {
+
+        /* Replace by a single sub-task? */
+        if (scheduler_dosub && /* Use division to avoid integer overflow. */
+            ci->gcount < space_subsize_pair_grav / cj->gcount) {
+
+          /* Otherwise, split it. */
+        } else {
+
+          /* Take a step back (we're going to recycle the current task)... */
+          redo = 1;
+
+          /* Find the first non-empty childrens of the cells */
+          int first_ci_child = 0, first_cj_child = 0;
+          while (ci->progeny[first_ci_child] == NULL) first_ci_child++;
+          while (cj->progeny[first_cj_child] == NULL) first_cj_child++;
+
+          /* Recycle the current pair */
+          t->ci = ci->progeny[first_ci_child];
+          t->cj = cj->progeny[first_cj_child];
+
+          /* Make a task for every other pair of progeny */
+          for (int i = first_ci_child; i < 8; i++) {
+            if (ci->progeny[i] != NULL) {
+              for (int j = first_cj_child; j < 8; j++) {
+                if (cj->progeny[j] != NULL) {
+
+                  /* Skip the recycled pair */
+                  if (i == first_ci_child && j == first_cj_child) continue;
+
+                  scheduler_splittask_gravity(
+                      scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
+                                        ci->progeny[i], cj->progeny[j]),
+                      s);
+                }
+              }
+            }
+          }
+        } /* Split the pair */
+      }
     } /* pair interaction? */
   }   /* iterate over the current task. */
 }
@@ -1333,7 +1395,8 @@ void scheduler_rewait_mapper(void *map_data, int num_elements,
 #ifdef SWIFT_DEBUG_CHECKS
     /* Check that we don't have more waits that what can be stored. */
     if (t->wait < 0)
-      error("Task unlocked by more than %d tasks!",
+      error("Task (type=%s/%s) unlocked by more than %d tasks!",
+            taskID_names[t->type], subtaskID_names[t->subtype],
             (1 << (8 * sizeof(t->wait) - 1)) - 1);
 #endif